diff --git a/.travis.yml b/.travis.yml
index 38111308..4883afbd 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -64,7 +64,6 @@ jobs:
       name: release rc
       script:
         - echo "//registry.npmjs.org/:_authToken=\${NPM_TOKEN}" > .npmrc
-        - npm whoami
         - npm run release:rc -- --yes
 
 notifications:
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data
deleted file mode 100644
index 70641f44..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data
+++ /dev/null
@@ -1,5 +0,0 @@
-
-ØÐÈ±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼ÈÐ
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data
deleted file mode 100644
index 41456196..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data
+++ /dev/null
@@ -1,4 +0,0 @@
-
-ys# js-ipfs-repo
-Implementation of the IPFS repo spec (https://github.com/ipfs/specs/tree/master/repo) in JavaScript
-s
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data
deleted file mode 100644
index ce734230..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data
+++ /dev/null
@@ -1,4 +0,0 @@
-4
-" siÝö¹»"­Â¹Wë<§ö¦óG|…¶eòµ4	 3
-1.2MiB.txtÎæL
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data
deleted file mode 100644
index c52fcda4..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data
deleted file mode 100644
index 4f882793..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-¼	®­r[€€
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data
deleted file mode 100644
index 615417b1..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data
deleted file mode 100644
index 951bfe04..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data
+++ /dev/null
@@ -1,23 +0,0 @@
-
-€ø                    IPFS Alpha Security Notes
-
-We try hard to ensure our system is safe and robust, but all software
-has bugs, especially new software. This distribution is meant to be an
-alpha preview, don't use it for anything mission critical.
-
-Please note the following:
-
-- This is alpha software and has not been audited. It is our goal
-  to conduct a proper security audit once we close in on a 1.0 release.
-
-- ipfs is a networked program, and may have serious undiscovered
-  vulnerabilities. It is written in Go, and we do not execute any
-  user provided data. But please point any problems out to us in a
-  github issue, or email security@ipfs.io privately.
-
-- ipfs uses encryption for all communication, but it's NOT PROVEN SECURE
-  YET!  It may be totally broken. For now, the code is included to make
-  sure we benchmark our operations with encryption in mind. In the future,
-  there will be an "unsafe" mode for high performance intranet apps.
-  If this is a blocking feature for you, please contact us.
-ø
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data
deleted file mode 100644
index d19d0c86..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data
deleted file mode 100644
index 42f65bd9..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data
deleted file mode 100644
index c9885c45..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data
deleted file mode 100644
index e743bdbf..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data
deleted file mode 100644
index 627ffcdf..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data
+++ /dev/null
@@ -1,55 +0,0 @@
-
-•
-                  IPFS -- Inter-Planetary File system
-
-IPFS is a global, versioned, peer-to-peer filesystem. It combines good ideas
-from Git, BitTorrent, Kademlia, SFS, and the Web. It is like a single bit-
-torrent swarm, exchanging git objects. IPFS provides an interface as simple
-as the HTTP web, but with permanence built in. You can also mount the world
-at /ipfs.
-
-IPFS is a protocol:
-- defines a content-addressed file system
-- coordinates content delivery
-- combines Kademlia + BitTorrent + Git
-
-IPFS is a filesystem:
-- has directories and files
-- mountable filesystem (via FUSE)
-
-IPFS is a web:
-- can be used to view documents like the web
-- files accessible via HTTP at `http://ipfs.io/<path>`
-- browsers or extensions can learn to use `ipfs://` directly
-- hash-addressed content guarantees authenticity
-
-IPFS is modular:
-- connection layer over any network protocol
-- routing layer
-- uses a routing layer DHT (kademlia/coral)
-- uses a path-based naming service
-- uses bittorrent-inspired block exchange
-
-IPFS uses crypto:
-- cryptographic-hash content addressing
-- block-level deduplication
-- file integrity + versioning
-- filesystem-level encryption + signing support
-
-IPFS is p2p:
-- worldwide peer-to-peer file transfers
-- completely decentralized architecture
-- **no** central point of failure
-
-IPFS is a cdn:
-- add a file to the filesystem locally, and it's now available to the world
-- caching-friendly (content-hash naming)
-- bittorrent-based bandwidth distribution
-
-IPFS has a name service:
-- IPNS, an SFS inspired name system
-- global namespace based on PKI
-- serves to build trust chains
-- compatible with other NSes
-- can map DNS, .onion, .bit, etc to IPNS
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data
deleted file mode 100644
index 42c502e2..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data
deleted file mode 100644
index 46fecabf..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data
+++ /dev/null
@@ -1,2 +0,0 @@
-
-¯÷óÒQáÃúàÚ€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data
deleted file mode 100644
index 1379fd9c..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data
deleted file mode 100644
index ee87b15f..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data
+++ /dev/null
@@ -1,1452 +0,0 @@
-
-»ó±ól systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-
-±ó
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data
deleted file mode 100644
index 6860441a..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data
+++ /dev/null
@@ -1,3 +0,0 @@
-/
-" gq†¸ÿ6\u8~:çò©6~ágÃæÖZ.è¸directT2
-" 6(¤¡•%Ý„»¿þ.À°Ó¾5(û¼Èþ·òû÷ ab	recursive·T
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data
deleted file mode 100644
index f57749f0..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data
deleted file mode 100644
index 6a0cbe82..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-QáÃúàÚ€€
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data
deleted file mode 100644
index 74de75af..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data
deleted file mode 100644
index f4c039c2..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data
deleted file mode 100644
index 8eb2a515..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data
deleted file mode 100644
index a9c1c069..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data
deleted file mode 100644
index 1067edb4..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data
+++ /dev/null
@@ -1,6 +0,0 @@
-
-ÛÓ
-ÐÈ±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼ÈÓ
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data
deleted file mode 100644
index 4741988d..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data
+++ /dev/null
@@ -1,4729 +0,0 @@
-
-Š€€€e academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others €€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data
deleted file mode 100644
index df20559d..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data
deleted file mode 100644
index ecce1053..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data
+++ /dev/null
@@ -1,4 +0,0 @@
-5
-" ¸˜µ×¾FØ_ëuØ”álúšzåS?Ž™|Ú²ë­×Pc@js-ipfs-repoŸ
-
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data
deleted file mode 100644
index 96566028..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data
+++ /dev/null
@@ -1,6 +0,0 @@
-
-ª¢5
-" $çþGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø200Bytes.txtÓ3
-" Y”„9_)ažô€Ë¹2¾RÅm™Å–keà9ð˜»ïdir-another0
-" TyÃ5;_9Yf»q€ƒFóLhylóœÐ/Éílevel-1Ã
-¢
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data
deleted file mode 100644
index fa45ee79..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data
deleted file mode 100644
index bbe6bda7..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data
deleted file mode 100644
index b99ceb21..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data
+++ /dev/null
@@ -1,3 +0,0 @@
-,
-" ø `ªuŸ>/2®âlilÉfÚÉYB‘'M%’§ÞbarŽ
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data
deleted file mode 100644
index be380799..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data
+++ /dev/null
@@ -1,4730 +0,0 @@
-
-Š€€€[7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successfu€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data
deleted file mode 100644
index 508cff2e..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data
deleted file mode 100644
index 0b520379..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data
deleted file mode 100644
index e705b9b0..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data
deleted file mode 100644
index 725a9b22..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data
+++ /dev/null
@@ -1,5 +0,0 @@
-
-@:4
-" siÝö¹»"­Â¹Wë<§ö¦óG|…¶eòµ4	 3
-1.2MiB.txtÎæL
-:
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data
deleted file mode 100644
index bfe6600f..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data
deleted file mode 100644
index 2424f592..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data
deleted file mode 100644
index 72674694..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data
deleted file mode 100644
index 3da92595..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data
+++ /dev/null
@@ -1,24 +0,0 @@
-
-¸°The MIT License (MIT)
-
-Copyright (c) 2015 IPFS
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-°
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data
deleted file mode 100644
index 2a6dbb58..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data
+++ /dev/null
@@ -1,2 +0,0 @@
-
- äL €€ €€ €€ €€ ä
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data
deleted file mode 100644
index 8c345f38..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data
+++ /dev/null
@@ -1,4732 +0,0 @@
-
-Š€€€ systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among th€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data
deleted file mode 100644
index e3ec206f..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data
+++ /dev/null
@@ -1,5 +0,0 @@
-5
-" $çþGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø200Bytes.txtÓ3
-" Y”„9_)ažô€Ë¹2¾RÅm™Å–keà9ð˜»ïdir-another0
-" TyÃ5;_9Yf»q€ƒFóLhylóœÐ/Éílevel-1Ã
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data
deleted file mode 100644
index ee87b9db..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data
+++ /dev/null
@@ -1,4 +0,0 @@
-0
-" ‹‡,¤îQv3–Ýk>\óIzxEî”ElÏM/fµLICENSE»1
-" JZ•XoRâXÏ!Fwd87U¨Å;£ÀSÃ¶Wwí	README.md{
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data
deleted file mode 100644
index 5ea0edda..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data
deleted file mode 100644
index e845c839..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data
deleted file mode 100644
index 4eb5d7bf..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data
+++ /dev/null
@@ -1,4 +0,0 @@
-
-A;5
-" $çþGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø200Bytes.txtÓ
-;
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data
deleted file mode 100644
index a762644a..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data
deleted file mode 100644
index 8e5a1d76..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data
deleted file mode 100644
index 5b090964..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data
deleted file mode 100644
index f9810363..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data
+++ /dev/null
@@ -1,4728 +0,0 @@
-
-Š€€€There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data
deleted file mode 100644
index 62d1c297..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data
+++ /dev/null
@@ -1,8 +0,0 @@
-
-Å½Come hang out in our IRC chat room if you have any questions.
-
-Contact the ipfs dev team:
-- Bugs: https://github.com/ipfs/go-ipfs/issues
-- Help: irc.freenode.org/#ipfs
-- Email: dev@ipfs.io
-½
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data
deleted file mode 100644
index 00360cfb..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data
+++ /dev/null
@@ -1,3 +0,0 @@
-4
-" ®çUÂÞFrÿé­ën¯÷óëbÅÁâ‡¾–?íð|<¿	test-dataŸ½ø
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data
deleted file mode 100644
index 026ac913..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data
deleted file mode 100644
index 7c40850f..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data
deleted file mode 100644
index 912b64e0..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data
deleted file mode 100644
index 9f1e7af6..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data
deleted file mode 100644
index dcd69d0b..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data
deleted file mode 100644
index 71be805f..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data
+++ /dev/null
@@ -1,9 +0,0 @@
-
-¿·Some helpful resources for finding your way around ipfs:
-
-- quick-start: a quick show of various ipfs features.
-- ipfs commands: a list of all commands
-- ipfs --help: every command describes itself
-- https://github.com/ipfs/go-ipfs -- the src repository
-- #ipfs on irc.freenode.org -- the community irc channel
-·
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data
deleted file mode 100644
index aacafb9f..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data
deleted file mode 100644
index ca141be2..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data
deleted file mode 100644
index 69e8f9e4..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data
+++ /dev/null
@@ -1,2 +0,0 @@
-
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data
deleted file mode 100644
index 637f391c..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data
+++ /dev/null
@@ -1,2 +0,0 @@
-
-›¸ƒíx\úÎƒ€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data
deleted file mode 100644
index 44403205..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-x\úÎƒ€€
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data
deleted file mode 100644
index cbd601a6..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data
deleted file mode 100644
index 7b58d6c8..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data
+++ /dev/null
@@ -1,3 +0,0 @@
-/
-" æ@ŠÃ÷¬šÔ†D¯Éùg«âªçÆA÷»éŠ7directT2
-" “;AÓÔPŒßôY0ßõk®ù}ÃEç=šµp«áû¹	recursiveáT
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data
deleted file mode 100644
index 46d10573..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data
deleted file mode 100644
index 3f5311b7..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data
deleted file mode 100644
index f0b3a599..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data
deleted file mode 100644
index a3e60c9e..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data
deleted file mode 100644
index bb713c56..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data
deleted file mode 100644
index 5accb645..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data
+++ /dev/null
@@ -1,3 +0,0 @@
-5
-" $çþGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø200Bytes.txtÓ
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data
deleted file mode 100644
index c3a2f685..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data
deleted file mode 100644
index a655cf83..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data
+++ /dev/null
@@ -1,5 +0,0 @@
-
-ÐÈ±wx‰çÝxMÖú{
-D£ÕßzH/&^ñÁÍÏRS‰“òŽž/•Ûv,ËÛR
-ò=š€N¿¥÷g~üóÝpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!ŽKE‰~JÖ•ì¦o¤j™Übïn3¨eTð·)D+;s
-ì»“üý:Ty!c¾3šÕðƒ\*þ–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~þe¼È
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data
deleted file mode 100644
index a8f98693..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data
deleted file mode 100644
index 6d043733..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data
+++ /dev/null
@@ -1,2 +0,0 @@
-
-réËÄ'Q°²#€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data
deleted file mode 100644
index 1524efce..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data
deleted file mode 100644
index 389e1117..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data
+++ /dev/null
@@ -1,28 +0,0 @@
-
-ËÃHello and Welcome to IPFS!
-
-â–ˆâ–ˆâ•—â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ•— â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ•—â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ•—
-â–ˆâ–ˆâ•‘â–ˆâ–ˆâ•”â•â•â–ˆâ–ˆâ•—â–ˆâ–ˆâ•”â•â•â•â•â•â–ˆâ–ˆâ•”â•â•â•â•â•
-â–ˆâ–ˆâ•‘â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ•”â•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ•—  â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ•—
-â–ˆâ–ˆâ•‘â–ˆâ–ˆâ•”â•â•â•â• â–ˆâ–ˆâ•”â•â•â•  â•šâ•â•â•â•â–ˆâ–ˆâ•‘
-â–ˆâ–ˆâ•‘â–ˆâ–ˆâ•‘     â–ˆâ–ˆâ•‘     â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ•‘
-â•šâ•â•â•šâ•â•     â•šâ•â•     â•šâ•â•â•â•â•â•â•
-
-If you're seeing this, you have successfully installed
-IPFS and are now interfacing with the ipfs merkledag!
-
- -------------------------------------------------------
-| Warning:                                              |
-|   This is alpha software. Use at your own discretion! |
-|   Much is missing or lacking polish. There are bugs.  |
-|   Not yet secure. Read the security notes for more.   |
- -------------------------------------------------------
-
-Check out some of the other files in this directory:
-
-  ./about
-  ./help
-  ./quick-start     <-- usage examples
-  ./readme          <-- this file
-  ./security-notes
-Ã
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data
deleted file mode 100644
index 5a59204a..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data
+++ /dev/null
@@ -1,2 +0,0 @@
-
-stem. Some€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data
deleted file mode 100644
index 1a86e0be..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data
deleted file mode 100644
index 74f62a02..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data
+++ /dev/null
@@ -1,3 +0,0 @@
-
-
-Ä'Q°²#€€
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data
deleted file mode 100644
index 3a99c365..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data
+++ /dev/null
@@ -1,3 +0,0 @@
-4
-" Y”„9_)ažô€Ë¹2¾RÅm™Å–keà9ð˜»ïjs-ipfs-repo
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING
deleted file mode 100644
index a153331d..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING
+++ /dev/null
@@ -1 +0,0 @@
-/repo/flatfs/shard/v1/next-to-last/2
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data
deleted file mode 100644
index 38a7ed3a..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data
deleted file mode 100644
index 562529a2..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data
deleted file mode 100644
index dedf499f..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data
deleted file mode 100644
index 9e5174d0..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data
+++ /dev/null
@@ -1,4 +0,0 @@
-5
-" $çþGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø200Bytes.txtÓ/
-" Y”„9_)ažô€Ë¹2¾RÅm™Å–keà9ð˜»ïlevel-2
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data
deleted file mode 100644
index 5a3836e9..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data
deleted file mode 100644
index a4027d46..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data
deleted file mode 100644
index 10aa2ae4..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data
deleted file mode 100644
index c1f9899a..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data
deleted file mode 100644
index 4e910622..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data
deleted file mode 100644
index 871a6bf0..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data
+++ /dev/null
@@ -1,4729 +0,0 @@
-
-Š€€€
-Š€€€There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-
-There have been many attempts at constructing a global
-distributed file system. Some systems have seen signifi-
-cant success, and others failed completely. Among the academic
-attempts, AFS [6] has succeeded widely and is still
-in use today. Others [7, ?] have not attained the same
-success. Outside of academia, the most successful systems
-have been peer-to-peer file-sharing applications primarily
-geared toward large media (audio and video). Most notably,
-Napster, KaZaA, and BitTorrent [2] deployed large
-file distribution systems supporting over 100 million simultaneous
-users. Even today, BitTorrent maintains a massive
-deployment where tens of millions of nodes churn daily [16].
-These applications saw greater numbers of users and files distributed
-than their academic file system counterparts. However,
-the applications were not designed as infrastructure to
-be built upon. While there have been successful repurposings1
-, no general file-system has emerged that offers global,
-low-latency, and decentralized distribution.
-Perhaps this is because a â€œgood enoughâ€ system for most
-use cases already exists: HTTP. By far, HTTP is the most
-successful â€œdistributed system of filesâ€ ever deployed. Coupled
-with the browser, HTTP has had enormous technical
-and social impact. It has become the de facto way to transmit
-files across the internet. Yet, it fails to take advantage
-of dozens of brilliant file distribution techniques invented in
-the last fifteen years. From one prespective, evolving Web
-infrastructure is near-impossible, given the number of backwards
-compatibility constraints and the number of strong
-1For example, Linux distributions use BitTorrent to transmit
-disk images, and Blizzard, Inc. uses it to distribute
-video game content.
-parties invested in the current model. But from another perspective,
-new protocols have emerged and gained wide use
-since the emergence of HTTP. What is lacking is upgrading
-design: enhancing the current HTTP web, and introducing
-new functionality without degrading user experience.
-Industry has gotten away with using HTTP this long because
-moving small files around is relatively cheap, even for
-small organizations with lots of traffic. But we are entering
-a new era of data distribution with new challenges: (a)
-hosting and distributing petabyte datasets, (b) computing
-on large data across organizations, (c) high-volume highdefinition
-on-demand or real-time media streams, (d) versioning
-and linking of massive datasets, (e) preventing accidental
-disappearance of important files, and more. Many
-of these can be boiled down to â€œlots of data, accessible everywhere.â€
-Pressed by critical features and bandwidth concerns,
-we have already given up HTTP for different data
-distribution protocols. The next step is making them part
-of the Web itself.
-Orthogonal to efficient data distribution, version control
-systems have managed to develop important data collaboration
-workflows. Git, the distributed source code version
-control system, developed many useful ways to model and
-implement distributed data operations. The Git toolchain
-offers versatile versioning functionality that large file distribution
-systems severely lack. New solutions inspired by Git
-are emerging, such as Camlistore [?], a personal file storage
-system, and Dat [?] a data collaboration toolchain
-and dataset package manager. Git has already influenced
-distributed filesystem design [9], as its content addressed
-Merkle DAG data model enables powerful file distribution
-strategies. What remains to be explored is how this data
-structure can influence the design of high-throughput oriented
-file systems, and how it might upgrade the Web itself.
-This paper introduces IPFS, a novel peer-to-peer versioncontrolled
-filesystem seeking to reconcile these issues. IPFS
-synthesizes learnings from many past successful systems.
-Careful interface-focused integration yields a system greater
-than the sum of its parts. The central IPFS principle is
-modeling all data as part of the same Merkle DAG.
-There have been many attempts at constructing a global
-distributed file sy€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data
deleted file mode 100644
index a6e00f34..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data
deleted file mode 100644
index b6539897..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data
deleted file mode 100644
index 6b72d373..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data
+++ /dev/null
@@ -1,2 +0,0 @@
-
-uºÀ¼	®­r[€€
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data
deleted file mode 100644
index 9cda061b..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data
deleted file mode 100644
index 7f2f4e92..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data
deleted file mode 100644
index 2dd80560..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data
+++ /dev/null
@@ -1,114 +0,0 @@
-
-ž–# 0.1 - Quick Start
-
-This is a set of short examples with minimal explanation. It is meant as
-a "quick start". Soon, we'll write a longer tour :-)
-
-
-Add a file to ipfs:
-
-  echo "hello world" >hello
-  ipfs add hello
-
-
-View it:
-
-  ipfs cat <the-hash-you-got-here>
-
-
-Try a directory:
-
-  mkdir foo
-  mkdir foo/bar
-  echo "baz" > foo/baz
-  echo "baz" > foo/bar/baz
-  ipfs add -r foo
-
-
-View things:
-
-  ipfs ls <the-hash-here>
-  ipfs ls <the-hash-here>/bar
-  ipfs cat <the-hash-here>/baz
-  ipfs cat <the-hash-here>/bar/baz
-  ipfs cat <the-hash-here>/bar
-  ipfs ls <the-hash-here>/baz
-
-
-References:
-
-  ipfs refs <the-hash-here>
-  ipfs refs -r <the-hash-here>
-  ipfs refs --help
-
-
-Get:
-
-  ipfs get <the-hash-here> foo2
-  diff foo foo2
-
-
-Objects:
-
-  ipfs object get <the-hash-here>
-  ipfs object get <the-hash-here>/foo2
-  ipfs object --help
-
-
-Pin + GC:
-
-  ipfs pin -r <the-hash-here>
-  ipfs gc
-  ipfs ls <the-hash-here>
-  ipfs unpin -r <the-hash-here>
-  ipfs gc
-
-
-Daemon:
-
-  ipfs daemon  (in another terminal)
-  ipfs id
-
-
-Network:
-
-  (must be online)
-  ipfs swarm peers
-  ipfs id
-  ipfs cat <hash-of-remote-object>
-
-
-Mount:
-
-  (warning: fuse is finicky!)
-  ipfs mount
-  cd /ipfs/<
-
-
-Tool:
-
-  ipfs version
-  ipfs update
-  ipfs commands
-  ipfs config --help
-  open http://localhost:5001/webui
-
-
-Browse:
-
-  webui:
-
-    http://localhost:5001/webui
-
-  video:
-
-    http://localhost:8080/ipfs/QmVc6zuAneKJzicnJpfrqCH9gSy6bz54JhcypfJYhGUFQu/play#/ipfs/QmTKZgRNwDNZwHtJSjCp6r5FYefzpULfy37JvMt9DwvXse
-
-  images:
-
-    http://localhost:8080/ipfs/QmZpc3HvfjEXvLWGQPWbHk3AjD5j8NEN4gmFN8Jmrd5g83/cs
-
-  markdown renderer app:
-
-    http://localhost:8080/ipfs/QmX7M9CiYXjVeFnkfVGf3y5ixTZ2ACeSGyL1vBJY1HvQPp/mdown
-–
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data
deleted file mode 100644
index 64ce0aeb..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data
deleted file mode 100644
index 81663143..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data
deleted file mode 100644
index b75d8023..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data
deleted file mode 100644
index 9553a942..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data
+++ /dev/null
@@ -1,2 +0,0 @@
-
-
\ No newline at end of file
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data
deleted file mode 100644
index e80dbd9a..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data
deleted file mode 100644
index d899663b..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data
deleted file mode 100644
index ba0caf40..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data
deleted file mode 100644
index 1d48c015..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data
deleted file mode 100644
index b1df8c51..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data
deleted file mode 100644
index b0ac590e..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data
deleted file mode 100644
index 3b40300d..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data
deleted file mode 100644
index 819ec6cf..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data
deleted file mode 100644
index c57d7186..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README b/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README
deleted file mode 100644
index 23cb0909..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README
+++ /dev/null
@@ -1,30 +0,0 @@
-This is a repository of IPLD objects. Each IPLD object is in a single file,
-named <base32 encoding of cid>.data. Where <base32 encoding of cid> is the
-"base32" encoding of the CID (as specified in
-https://github.com/multiformats/multibase) without the 'B' prefix.
-All the object files are placed in a tree of directories, based on a
-function of the CID. This is a form of sharding similar to
-the objects directory in git repositories. Previously, we used
-prefixes, we now use the next-to-last two charters.
-
-    func NextToLast(base32cid string) {
-      nextToLastLen := 2
-      offset := len(base32cid) - nextToLastLen - 1
-      return str[offset : offset+nextToLastLen]
-    }
-
-For example, an object with a base58 CIDv1 of
-
-    zb2rhYSxw4ZjuzgCnWSt19Q94ERaeFhu9uSqRgjSdx9bsgM6f
-
-has a base32 CIDv1 of
-
-    BAFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA
-
-and will be placed at
-
-    SC/AFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA.data
-
-with 'SC' being the last-to-next two characters and the 'B' at the
-beginning of the CIDv1 string is the multibase prefix that is not
-stored in the filename.
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb
deleted file mode 100644
index fc04d660..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb
deleted file mode 100644
index 63d9d260..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT b/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT
deleted file mode 100644
index 5b540107..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT
+++ /dev/null
@@ -1 +0,0 @@
-MANIFEST-000011
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK
deleted file mode 100644
index e69de29b..00000000
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG
deleted file mode 100644
index fb2ef830..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG
+++ /dev/null
@@ -1,5 +0,0 @@
-=============== Aug 19, 2016 (CEST) ===============
-15:48:10.633634 log@legend FÂ·NumFile SÂ·FileSize NÂ·Entry CÂ·BadEntry BÂ·BadBlock KeÂ·KeyError DÂ·DroppedEntry LÂ·Level QÂ·SeqNum TÂ·TimeElapsed
-15:48:10.634191 db@open opening
-15:48:10.639318 db@janitor FÂ·4 GÂ·0
-15:48:10.639379 db@open done TÂ·5.16729ms
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old
deleted file mode 100644
index f5ffd612..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old
+++ /dev/null
@@ -1,7 +0,0 @@
-=============== Apr 22, 2016 (WEST) ===============
-03:16:42.272495 log@legend FÂ·NumFile SÂ·FileSize NÂ·Entry CÂ·BadEntry BÂ·BadBlock KeÂ·KeyError DÂ·DroppedEntry LÂ·Level QÂ·SeqNum TÂ·TimeElapsed
-03:16:42.272857 db@open opening
-03:16:42.275673 db@janitor FÂ·4 GÂ·0
-03:16:42.275700 db@open done TÂ·2.831108ms
-03:16:42.596938 db@close closing
-03:16:42.597082 db@close done TÂ·139.194Âµs
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 b/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011
deleted file mode 100644
index 7af87ca8..00000000
Binary files a/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 and /dev/null differ
diff --git a/packages/ipfs-unixfs-importer/test/test-repo/version b/packages/ipfs-unixfs-importer/test/test-repo/version
deleted file mode 100644
index 1e8b3149..00000000
--- a/packages/ipfs-unixfs-importer/test/test-repo/version
+++ /dev/null
@@ -1 +0,0 @@
-6
diff --git a/packages/ipfs-unixfs/README.md b/packages/ipfs-unixfs/README.md
index b196391e..c5242f97 100644
--- a/packages/ipfs-unixfs/README.md
+++ b/packages/ipfs-unixfs/README.md
@@ -204,4 +204,3 @@ This repository falls under the IPFS [Code of Conduct](https://github.com/ipfs/c
 ## License
 
 [MIT](LICENSE)
-