diff --git a/.travis.yml b/.travis.yml index 38111308..4883afbd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -64,7 +64,6 @@ jobs: name: release rc script: - echo "//registry.npmjs.org/:_authToken=\${NPM_TOKEN}" > .npmrc - - npm whoami - npm run release:rc -- --yes notifications: diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data deleted file mode 100644 index 70641f44..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2B/CIQCLHFOKW5OR6TMLOEULA42ZCNIUH5AGNU7R5OCASITUKITSBP22BA.data +++ /dev/null @@ -1,5 +0,0 @@ - -ØĞȱwx‰çİxMÖú{ -D£ÕßzH/&^ñÁ ÍÏRS‰“ò/•Ûv,ËÛR -ò=š€N¿¥÷g~üóİpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!K E‰~J Ö•ì¦o¤j™Übïn3¨eTğ·)D+;s -컓üı:Ty!c¾3šÕğƒ\*ş–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~şe¼ÈĞ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data deleted file mode 100644 index 41456196..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2F/CIQEUWUVLBXVFYSYCHHSCRTXCYHGIOBXKWUMKFR3UPAFHQ5WK5362FQ.data +++ /dev/null @@ -1,4 +0,0 @@ - -ys# js-ipfs-repo -Implementation of the IPFS repo spec (https://github.com/ipfs/specs/tree/master/repo) in JavaScript -s \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data deleted file mode 100644 index ce734230..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/2P/CIQLCBXFJEBKPWASINYRB5OBXDOZ3PBXLDJQNRVNYD7HUBGVZD6T2PA.data +++ /dev/null @@ -1,4 +0,0 @@ -4 -" siİö¹»"­Â¹Wë<§ö¦óG|…¶eòµ4  3 -1.2MiB.txtÎæL - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data deleted file mode 100644 index c52fcda4..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/36/CIQPCRPIF437ZUEIOXQTYFMJJUQQQYH7GD5KH5KAGZO5AH32WSYA36Y.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data deleted file mode 100644 index 4f882793..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/4Q/CIQNGE6QOMDGK6PZN47RUX6ME526TDJRTIQD6I4KHCKAQFAK3UQR4QI.data +++ /dev/null @@ -1,3 +0,0 @@ - - -¼ ®­r[€€ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data deleted file mode 100644 index 615417b1..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5B/CIQISQH67DCYN567CMOT7WV5DEB4G2V23S5VLOHTKJCG5DLHY3D65BY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data deleted file mode 100644 index 951bfe04..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/5V/CIQFFRR4O52TS2Z7QLDDTF32OIR4FWLKT5YLL7MLDVIT7DC3NHOK5VA.data +++ /dev/null @@ -1,23 +0,0 @@ - -€ø IPFS Alpha Security Notes - -We try hard to ensure our system is safe and robust, but all software -has bugs, especially new software. This distribution is meant to be an -alpha preview, don't use it for anything mission critical. - -Please note the following: - -- This is alpha software and has not been audited. It is our goal - to conduct a proper security audit once we close in on a 1.0 release. - -- ipfs is a networked program, and may have serious undiscovered - vulnerabilities. It is written in Go, and we do not execute any - user provided data. But please point any problems out to us in a - github issue, or email security@ipfs.io privately. - -- ipfs uses encryption for all communication, but it's NOT PROVEN SECURE - YET! It may be totally broken. For now, the code is included to make - sure we benchmark our operations with encryption in mind. In the future, - there will be an "unsafe" mode for high performance intranet apps. - If this is a blocking feature for you, please contact us. -ø \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data deleted file mode 100644 index d19d0c86..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6S/CIQNA2D5X3XOZKCQR3S572FA2I3OAXB7BL7JBZBVJCWPAKBQEUT56SI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data deleted file mode 100644 index 42f65bd9..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/6Z/CIQP64AMQTERTSCVPF6RWBY5Z6PAZJUPHBNDXE7DCRPCRG6FKVSG6ZQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data deleted file mode 100644 index c9885c45..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/75/CIQMB7DLJFKD267QJ2B5FJNHZPTSVA7IB6OHXSQ2XSVEEKMKK6RT75I.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data deleted file mode 100644 index e743bdbf..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/77/CIQJQST6CUA4IK56QW7VN4KFDQHKPSA6IA4NGFBUYHVR7FGTHSWB77I.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data deleted file mode 100644 index 627ffcdf..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7J/CIQKKLBWAIBQZOIS5X7E32LQAL6236OUKZTMHPQSFIXPWXNZHQOV7JQ.data +++ /dev/null @@ -1,55 +0,0 @@ - -•  - IPFS -- Inter-Planetary File system - -IPFS is a global, versioned, peer-to-peer filesystem. It combines good ideas -from Git, BitTorrent, Kademlia, SFS, and the Web. It is like a single bit- -torrent swarm, exchanging git objects. IPFS provides an interface as simple -as the HTTP web, but with permanence built in. You can also mount the world -at /ipfs. - -IPFS is a protocol: -- defines a content-addressed file system -- coordinates content delivery -- combines Kademlia + BitTorrent + Git - -IPFS is a filesystem: -- has directories and files -- mountable filesystem (via FUSE) - -IPFS is a web: -- can be used to view documents like the web -- files accessible via HTTP at `http://ipfs.io/` -- browsers or extensions can learn to use `ipfs://` directly -- hash-addressed content guarantees authenticity - -IPFS is modular: -- connection layer over any network protocol -- routing layer -- uses a routing layer DHT (kademlia/coral) -- uses a path-based naming service -- uses bittorrent-inspired block exchange - -IPFS uses crypto: -- cryptographic-hash content addressing -- block-level deduplication -- file integrity + versioning -- filesystem-level encryption + signing support - -IPFS is p2p: -- worldwide peer-to-peer file transfers -- completely decentralized architecture -- **no** central point of failure - -IPFS is a cdn: -- add a file to the filesystem locally, and it's now available to the world -- caching-friendly (content-hash naming) -- bittorrent-based bandwidth distribution - -IPFS has a name service: -- IPNS, an SFS inspired name system -- global namespace based on PKI -- serves to build trust chains -- compatible with other NSes -- can map DNS, .onion, .bit, etc to IPNS - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data deleted file mode 100644 index 42c502e2..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/7R/CIQGNXYJ6NHQTTNWY7E7MLOVZD5BRFMJL3H27REITBBUWURIKQTP7RQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data deleted file mode 100644 index 46fecabf..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/A3/CIQD76D3PRB4H6QE6C7DJX224YXYFEKLENPHGHEJPQZ723Z4L4IUA3I.data +++ /dev/null @@ -1,2 +0,0 @@ - -¯÷óÒQáÃúàÚ€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data deleted file mode 100644 index 1379fd9c..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQAC3NNL5OBTUOK2A4CXXDS5Y6DLLCSC26OJVKNQ5IVDXLHGM5HADA.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data deleted file mode 100644 index ee87b15f..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AD/CIQDKCWPMK632NCNXGUY4TT465TRBMZJ5XRELAX655W3OS5K7ZHVADI.data +++ /dev/null @@ -1,1452 +0,0 @@ - -»ó±ól systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - - -±ó \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data deleted file mode 100644 index 6860441a..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AE/CIQONICFQZH7QVU6IPSIM3AK7AD554D3BWZPAGEAQYQOWMFZQDUUAEI.data +++ /dev/null @@ -1,3 +0,0 @@ -/ -" gq†¸ÿ6\u8~:çò©6~ágÃæÖZ.è¸directT2 -" 6(¤¡•%İ„»¿ş.À°Ó¾5(û¼Èş·òû÷ ab recursive·T \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data deleted file mode 100644 index f57749f0..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQHG2O56243WIVNYK4VP2Z4U73KN42HPSC3MZPSDW2QQNAJD6ICAMY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data deleted file mode 100644 index 6a0cbe82..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AM/CIQLILAMXCIV5B2ONEE63TLHVKS52D52O77I52JGFOH7U3ACVOKNAMY.data +++ /dev/null @@ -1,3 +0,0 @@ - - -QáÃúàÚ€€ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data deleted file mode 100644 index 74de75af..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AP/CIQHAKDLTL5GMIFGN5YVY4BA22FPHUIODJEXS4LCTQDWA275XAJDAPI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data deleted file mode 100644 index f4c039c2..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AU/CIQHESGWL6ZUVT4RKZS6V2ZJ4IDV7RR6M6FQFAOFWXOJYNVRTHTMAUI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data deleted file mode 100644 index 8eb2a515..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/AY/CIQGCTOPJA57F6YZZAVSFL44RVD2GWTNJ7OPQXRT3NG7NGD633QHAYY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data deleted file mode 100644 index a9c1c069..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/BY/CIQGTEDX5RFSVBJ7FQAHSZGMC5HOO4XG5GZAZRZ5EA43NKKQEJXWBYY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data deleted file mode 100644 index 1067edb4..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C2/CIQK5PD7SAKEC7TPLYXS3EZQRAVYTFPR5ZY6HTI7DDQAG6AZQOXIC2I.data +++ /dev/null @@ -1,6 +0,0 @@ - -ÛÓ -Ğȱwx‰çİxMÖú{ -D£ÕßzH/&^ñÁ ÍÏRS‰“ò/•Ûv,ËÛR -ò=š€N¿¥÷g~üóİpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!K E‰~J Ö•ì¦o¤j™Übïn3¨eTğ·)D+;s -컓üı:Ty!c¾3šÕğƒ\*ş–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~şe¼ÈÓ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data deleted file mode 100644 index 4741988d..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQHS67KEOPN356BDJDRIMDCS5NQBMLCVMUGUAM2BWZNUWV7WZ7FC3Q.data +++ /dev/null @@ -1,4729 +0,0 @@ - -Š€€€e academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others €€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data deleted file mode 100644 index df20559d..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C3/CIQLFQWZO45IGFSCQEDJKOZCAYNHTNVD6NVXVPWWBHATU2YG4E6HC3A.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data deleted file mode 100644 index ecce1053..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/C4/CIQDDZ5EDQK5AP7LRTLZHQZUR2R3GECRFV3WPKNL7PL2SKFIL2LXC4Y.data +++ /dev/null @@ -1,4 +0,0 @@ -5 -" ¸˜µ×¾FØ_ëuØ”álúšzåS?™|Ú²ë­×Pc@ js-ipfs-repoŸ - - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data deleted file mode 100644 index 96566028..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CB/CIQGXO5AO2FIISVRDFHFQ5W7W3HEHGNS5KD2JNYYZGCQ3NQPV22QCBI.data +++ /dev/null @@ -1,6 +0,0 @@ - -ª¢5 -" $çşGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø 200Bytes.txtÓ3 -" Y”„9_)aô€Ë¹2¾RÅm™Å–keà9ğ˜»ï dir-another0 -" TyÃ5 ;_9Yf»q€ƒFóLhylóœĞ/Éílevel-1à -¢ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data deleted file mode 100644 index fa45ee79..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CJ/CIQLMFYJNAZ2H33DVIPIS3A7S2WIMHBY62BY5OFCIR2NVHZUHT7PCJY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data deleted file mode 100644 index bbe6bda7..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/CY/CIQDMKFEUGKSLXMEXO774EZOYCYNHPRVFD53ZSAU7237F67XDSQGCYQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data deleted file mode 100644 index b99ceb21..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DJ/CIQCKYVVKKTZ5H7RDBHOBR72KDZZ7Y4BAVSMNITBWIGNAQQFG4UUDJQ.data +++ /dev/null @@ -1,3 +0,0 @@ -, -" ø `ªuŸ>/2®âl ilÉfÚÉYB‘'M%’§Şbar - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data deleted file mode 100644 index be380799..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQH7E5UPL3DQGEXQXDT2QKNMU2GRLW43TH7QFHITLDVLJKQFZYEDZI.data +++ /dev/null @@ -1,4730 +0,0 @@ - -Š€€€[7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successfu€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data deleted file mode 100644 index 508cff2e..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/DZ/CIQL7TG2PB52XIZLLHDYIUFMHUQLMMZWBNBZSLDXFCPZ5VDNQQ2WDZQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data deleted file mode 100644 index 0b520379..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/E7/CIQB3KY36M7HMZI5BLRPMPN7LOPHD2LZWNGBZR5BTOBHNWGBDFTAE7A.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data deleted file mode 100644 index e705b9b0..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EN/CIQCRUFL6YJQJMIKI6BX7HZT3BZQJV45EDKQXCYAUET3RJH5DDU3ENY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data deleted file mode 100644 index 725a9b22..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/EV/CIQJ5DVL3AD53EYMWBQG7MMACDUYWJDJ57AS6VIHPMOWJZEFZDUBEVI.data +++ /dev/null @@ -1,5 +0,0 @@ - -@:4 -" siİö¹»"­Â¹Wë<§ö¦óG|…¶eòµ4  3 -1.2MiB.txtÎæL -: \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data deleted file mode 100644 index bfe6600f..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/F4/CIQEQSMVHW6MIRMY6OMBNBTMNWLDKXZT5LYQATLBVMMIB7PLH3O6F4A.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data deleted file mode 100644 index 2424f592..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FD/CIQJ2TCGF6GE6PCUOMKLCKVFYKXRQ3TSVG6EZM2UY5ZIBJ22L43SFDQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data deleted file mode 100644 index 72674694..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FH/CIQA2BWUV64FUQIWMLOIQLCSZHDZ45BCX3DCYBTPQIKXAWEAW3J2FHA.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data deleted file mode 100644 index 3da92595..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FN/CIQIXBZMUTXFC5QIGMLJNXLLHZOPGSL2PBC65D4UIVWM6TI5F5TAFNI.data +++ /dev/null @@ -1,24 +0,0 @@ - -¸°The MIT License (MIT) - -Copyright (c) 2015 IPFS - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - -° \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data deleted file mode 100644 index 2a6dbb58..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FO/CIQEJ66ULKLILRVZ27ZTFGBIC3UBVLG47MFXU5BNPTUNOD6T2YUXFOI.data +++ /dev/null @@ -1,2 +0,0 @@ - - äL €€ €€ €€ €€ ä  \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data deleted file mode 100644 index 8c345f38..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/FX/CIQHQPKVAQUPZWCBVFLZUCHA2EDBTJBDRNVM3RZ4RT3JGIJA4H3OFXY.data +++ /dev/null @@ -1,4732 +0,0 @@ - -Š€€€ systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among th€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data deleted file mode 100644 index e3ec206f..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GA/CIQHJVBBNCQ4MK2HS3TXVFMR3DY7VNDIL5THJ2V6ZR3XHEJTMO2FGAI.data +++ /dev/null @@ -1,5 +0,0 @@ -5 -" $çşGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø 200Bytes.txtÓ3 -" Y”„9_)aô€Ë¹2¾RÅm™Å–keà9ğ˜»ï dir-another0 -" TyÃ5 ;_9Yf»q€ƒFóLhylóœĞ/Éílevel-1à - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data deleted file mode 100644 index ee87b9db..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/GQ/CIQH7OEYWXL34RWYL7VXLWEU4FWPVGT24VJT7DUZPTNLF25N25IGGQA.data +++ /dev/null @@ -1,4 +0,0 @@ -0 -" ‹‡,¤îQv3–İk>\óIzxEî”ElÏM/fµLICENSE»1 -" JZ•XoRâXÏ!Fwd87U¨Å;£ÀSöWwí README.md{ - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data deleted file mode 100644 index 5ea0edda..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQDDVW2EZIJF4NQH7WJNESD7XHQSXA5EGJVNTPVHD7444C2KLKXHDI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data deleted file mode 100644 index e845c839..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HD/CIQHILW6C2T5CZPUWAAOL5AJJEZ3MHJ5BNX7WPFW5RNT53JSYDZKHDY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data deleted file mode 100644 index 4eb5d7bf..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQOSORZMMBDPROY2NYNBLJRYMPGU23M5WLZD7BMT7RIF7RFJEOHHVY.data +++ /dev/null @@ -1,4 +0,0 @@ - -A;5 -" $çşGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø 200Bytes.txtÓ -; \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data deleted file mode 100644 index a762644a..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/HV/CIQPPIDDACRG4OUFQJQ7HWNALKTF6V5TVUZG34DEHJWZ2CFADQEQHVY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data deleted file mode 100644 index 8e5a1d76..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/I2/CIQMRL3OZOQ6FWVCILNBKQYHSKHGOHWXORGFUFDU6Z3SFI6MWC7CI2I.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data deleted file mode 100644 index 5b090964..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IB/CIQMYGTLMBSYWR7X6Z676GQTL3W5NOSHG2QSNMWMASRY47R6DISDIBY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data deleted file mode 100644 index f9810363..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IH/CIQA44S3C5B67N6QBLHMMHVPPOUE7L6CUBCDZVQGGAOYAGF3PHL3IHQ.data +++ /dev/null @@ -1,4728 +0,0 @@ - -Š€€€There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data deleted file mode 100644 index 62d1c297..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/IL/CIQJFGRQHQ45VCQLM7AJNF2GF5UHUAGGHC6LLAH6VYDEKLQMD4QLILY.data +++ /dev/null @@ -1,8 +0,0 @@ - -ŽCome hang out in our IRC chat room if you have any questions. - -Contact the ipfs dev team: -- Bugs: https://github.com/ipfs/go-ipfs/issues -- Help: irc.freenode.org/#ipfs -- Email: dev@ipfs.io -½ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data deleted file mode 100644 index 00360cfb..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/J3/CIQCUWNWXF4BLH7JZBCXMVGYBS5FJPGJ6W6SP2WIZ4K7PJVNE4IXJ3I.data +++ /dev/null @@ -1,3 +0,0 @@ -4 -" ®çUÂŞFrÿé­ën¯÷óëbÅÁ⇾–?íğ|<¿ test-dataŸ½ø - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data deleted file mode 100644 index 026ac913..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KB/CIQFDNVHVCWCRUXO3W7UGOPXRQBCBQYCHLVLZGSOLCJH6MH53TULKBI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data deleted file mode 100644 index 7c40850f..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KL/CIQI55DVBRTRLGOEFG3NSUVUVDC5FV3BQJ6BDG6TQLX5ZMDXH3CDKLY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data deleted file mode 100644 index 912b64e0..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KN/CIQC2GMPEBY4FY6LK3KAVQ6KPD2RFEURQB672H4QQPHGWO7HJZAIKNA.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data deleted file mode 100644 index 9f1e7af6..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/KV/CIQEB4J2WBPZIHHIIG4LFC3VPCUL7IRICU6DOD4BWS6GFOQNMZSAKVI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data deleted file mode 100644 index dcd69d0b..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LA/CIQOK7LUYNC5GJDWTV6SEZ4ZJJ64QAMOZ7DFV5GWMJMTD3PN73HPLAY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data deleted file mode 100644 index 71be805f..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LG/CIQJBQD2O6K4CGJVCCTJNUP57QHR4SKHZ74OIITBBGLOMCO3ZOLWLGA.data +++ /dev/null @@ -1,9 +0,0 @@ - -¿·Some helpful resources for finding your way around ipfs: - -- quick-start: a quick show of various ipfs features. -- ipfs commands: a list of all commands -- ipfs --help: every command describes itself -- https://github.com/ipfs/go-ipfs -- the src repository -- #ipfs on irc.freenode.org -- the community irc channel -· \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data deleted file mode 100644 index aacafb9f..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LM/CIQIFHHL6JGE5AT5CX66FZUHQKFZ3F5J4HDTDEDSFIFRZJOOHYHYLMI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data deleted file mode 100644 index ca141be2..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LO/CIQJLKDKHMLW3CBIFI6FNN3RDDQJQ37UPBHLBSDE4IODA3OGUYZNLOI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data deleted file mode 100644 index 69e8f9e4..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/LU/CIQKRGVCTXRXFOEYT7SM4CCDVTLJKVZBCKJ2K25QUHDSDC54EBIKLUI.data +++ /dev/null @@ -1,2 +0,0 @@ - - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data deleted file mode 100644 index 637f391c..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/MJ/CIQJIH4MW534AFHKJ4RJWSADQQKP5JF75FJD5HAG2FNU24Y5GDLNMJA.data +++ /dev/null @@ -1,2 +0,0 @@ - -›¸ƒíx\ú΃€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data deleted file mode 100644 index 44403205..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NW/CIQHKHTR6BILKGTUCWOFDAU3EEHTE3TTXRHQU4JOD5RWMJNIKFKCNWA.data +++ /dev/null @@ -1,3 +0,0 @@ - - -x\ú΃€€ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data deleted file mode 100644 index cbd601a6..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/NY/CIQAS5P5V6R6ZWYCMEMIG77GPKPNN3IR55NKZVQ2KFWN35IZWHFVNYI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data deleted file mode 100644 index 7b58d6c8..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O6/CIQOYW2THIZBRGI7IN33ROGCKOFZLXJJ2MPKYZBTV4H3N7GYHXMAO6A.data +++ /dev/null @@ -1,3 +0,0 @@ -/ -" æ@ŠÃ÷¬šÔ†D¯Éùg«âªçÆA÷»éŠ7directT2 -" “;AÓÔPŒßôY0ßõk®ù}ÃEç=šµp«á û¹ recursiveáT \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data deleted file mode 100644 index 46d10573..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/O7/CIQP7DQNED7CUCZXDXMNPJC2VQXADCIZ6KIZ3JF6FZDZYYKJMDCFO7Y.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data deleted file mode 100644 index 3f5311b7..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OC/CIQC2GOQFKDUPKVP4XNQVV7M5ZJ4MP57IEM3W2WKV6GLUPPIZTZXOCQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data deleted file mode 100644 index f0b3a599..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OD/CIQOJATAB4IXQU4GS3N6IKABVBWLIVP5HQQOBQLSENPEZBXAU5WGODY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data deleted file mode 100644 index a3e60c9e..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/OY/CIQDEMXSPYNNJT5CP2V6OMAMSK3VAN5F525AUQF4O5SHFSENFK6MOYA.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data deleted file mode 100644 index bb713c56..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/P7/CIQDOBRSMVCPS3KLV6TKWC6SUHX5RG5GAWZO4GENXABVJY3S6QTDP7I.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data deleted file mode 100644 index 5accb645..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/PX/CIQPRIAYMCVHLHZ6F4ZK5YTMBUFWS3GJM3NMSWKCSETRGTI5EWJKPXQ.data +++ /dev/null @@ -1,3 +0,0 @@ -5 -" $çşGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø 200Bytes.txtÓ - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data deleted file mode 100644 index c3a2f685..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QE/CIQBIGRKU5D2NNT76M7A62F6AVPNDXPC7EBVACIKPANWX3MEZ5FOQEA.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data deleted file mode 100644 index a655cf83..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQCIGHH7ZD6OLFCIHXDI65QV7HXRBC2F4XBVG4KUCTQIA2EMAJ7QFY.data +++ /dev/null @@ -1,5 +0,0 @@ - -Ğȱwx‰çİxMÖú{ -D£ÕßzH/&^ñÁ ÍÏRS‰“ò/•Ûv,ËÛR -ò=š€N¿¥÷g~üóİpf1®\[ä>ß%ŒîU‚1ñ@Q©¾Ê×€2&m6Èq¸¹QØ…ï] Î|½Å·!K E‰~J Ö•ì¦o¤j™Übïn3¨eTğ·)D+;s -컓üı:Ty!c¾3šÕğƒ\*ş–­T7…‚E?[˜¢Pv}¼ÉA+´c†xù~şe¼È \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data deleted file mode 100644 index a8f98693..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QF/CIQGPALRQ24P6NS4OWHTQ7R247ZI7KJWP3QWPQYS43LFULQC5ANLQFI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data deleted file mode 100644 index 6d043733..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QU/CIQHUGO6PZFU3HS5W5Y25STP74OMT5SRRJZXL4LHDRQ3SIM7NCODQUI.data +++ /dev/null @@ -1,2 +0,0 @@ - -réËÄ'Q°²#€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data deleted file mode 100644 index 1524efce..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQHXG7QQS5B7FZ3UOMUYOWJZ5KWXCKGGO7MBLHYPX6ZZ5XZK66AQVI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/QV/CIQOHMGEIKMPYHAUTL57JSEZN64SIJ5OIHSGJG4TJSSJLGI3PBJLQVI.data deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data deleted file mode 100644 index 389e1117..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/R3/CIQBED3K6YA5I3QQWLJOCHWXDRK5EXZQILBCKAPEDUJENZ5B5HJ5R3A.data +++ /dev/null @@ -1,28 +0,0 @@ - -ËÃHello and Welcome to IPFS! - -██╗██████╗ ███████╗███████╗ -██║██╔â•â•â–ˆâ–ˆâ•—██╔â•â•â•â•â•â–ˆâ–ˆâ•”â•â•â•â•â• -██║██████╔â•â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ•— ███████╗ -██║██╔â•â•â•â• ██╔â•â•â• â•šâ•â•â•â•â–ˆâ–ˆâ•‘ -██║██║ ██║ ███████║ -â•šâ•â•â•šâ•â• â•šâ•â• â•šâ•â•â•â•â•â•â• - -If you're seeing this, you have successfully installed -IPFS and are now interfacing with the ipfs merkledag! - - ------------------------------------------------------- -| Warning: | -| This is alpha software. Use at your own discretion! | -| Much is missing or lacking polish. There are bugs. | -| Not yet secure. Read the security notes for more. | - ------------------------------------------------------- - -Check out some of the other files in this directory: - - ./about - ./help - ./quick-start <-- usage examples - ./readme <-- this file - ./security-notes -à \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data deleted file mode 100644 index 5a59204a..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RJ/CIQMLF3XEWG6VSOUXDSJ5BG26LKETTMBVMXIW7JKRHRPLIBNCFCYRJI.data +++ /dev/null @@ -1,2 +0,0 @@ - -stem. Some€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data deleted file mode 100644 index 1a86e0be..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RW/CIQC3G665ZIYGOQYC3MQULKGIBXEOFAZHK46I5UQ3O7EEJQP4FW6RWI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data deleted file mode 100644 index 74f62a02..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/RX/CIQE66Z547DTRXMML2XLVBUPU4PW4HGY3HNOT22D27SWEWL7BM4KRXA.data +++ /dev/null @@ -1,3 +0,0 @@ - - -Ä'Q°²#€€ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data deleted file mode 100644 index 3a99c365..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/S5/CIQHBGZNZRPWVEFNMTLP4OS5EAVHFMCX2HD7FZUC2B3WUU3D4LGKS5A.data +++ /dev/null @@ -1,3 +0,0 @@ -4 -" Y”„9_)aô€Ë¹2¾RÅm™Å–keà9ğ˜»ï js-ipfs-repo - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING deleted file mode 100644 index a153331d..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SHARDING +++ /dev/null @@ -1 +0,0 @@ -/repo/flatfs/shard/v1/next-to-last/2 diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data deleted file mode 100644 index 38a7ed3a..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SQ/CIQABZII22CKQPRFRNJDBZLZDVWDLXB4FB63ZSHKE25TXTZ5PRFNSQQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data deleted file mode 100644 index 562529a2..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SS/CIQJYN3KXS5PFDN3C3UKITZEAQ5E2ZZSIKJ5GLFKYZH5G5GPOPZRSSQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data deleted file mode 100644 index dedf499f..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/SY/CIQGL4ZZ3NU3CYC6HCGJL2PB3SCFDLNIMJ2VBGFII2ETFDJKKRWCSYY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data deleted file mode 100644 index 9e5174d0..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQFIELZYM2Q2O27HFMWNO3RQCBUN42MNB4WYEHTCKOBKEOQC4X4T3I.data +++ /dev/null @@ -1,4 +0,0 @@ -5 -" $çşGç,¢Aî4{°¯Ïx„Z/.›Š §D`ø 200Bytes.txtÓ/ -" Y”„9_)aô€Ë¹2¾RÅm™Å–keà9ğ˜»ïlevel-2 - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data deleted file mode 100644 index 5a3836e9..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/T3/CIQHHFPK232RMHO7DXAEDSDEBS2BUY2XK2X3LJTP4SPIM5NYBINUT3Y.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data deleted file mode 100644 index a4027d46..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TK/CIQCATTJHTFQJK6QMVRDXHZBQLVTXHRZR2G3R5OU3G7HREQTYK3KTKQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data deleted file mode 100644 index 10aa2ae4..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQFEAGMNNXXTYKYQSANT6IBNTFN7WR5RPD5F6GN6MBKUUO25DNOTWQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data deleted file mode 100644 index c1f9899a..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/TW/CIQMVX3GSIETJNHF3OOH7TMLHB74V6MEEZY5V54Z7JHJV2MUZ7R2TWI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data deleted file mode 100644 index 4e910622..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/U2/CIQBQTRBKKRZNSXRN5A2DTCL5QYIC75SVOKTZJSFYV7IHLCDDT6IU2Q.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data deleted file mode 100644 index 871a6bf0..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UI/CIQIKOTOTJQ2NFB4A3MMLHKZBTNOIK3QAZ3XSCASZZUPD2ZDRHTOUIY.data +++ /dev/null @@ -1,4729 +0,0 @@ - -Š€€€ -Š€€€There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. - -There have been many attempts at constructing a global -distributed file system. Some systems have seen signifi- -cant success, and others failed completely. Among the academic -attempts, AFS [6] has succeeded widely and is still -in use today. Others [7, ?] have not attained the same -success. Outside of academia, the most successful systems -have been peer-to-peer file-sharing applications primarily -geared toward large media (audio and video). Most notably, -Napster, KaZaA, and BitTorrent [2] deployed large -file distribution systems supporting over 100 million simultaneous -users. Even today, BitTorrent maintains a massive -deployment where tens of millions of nodes churn daily [16]. -These applications saw greater numbers of users and files distributed -than their academic file system counterparts. However, -the applications were not designed as infrastructure to -be built upon. While there have been successful repurposings1 -, no general file-system has emerged that offers global, -low-latency, and decentralized distribution. -Perhaps this is because a “good enough†system for most -use cases already exists: HTTP. By far, HTTP is the most -successful “distributed system of files†ever deployed. Coupled -with the browser, HTTP has had enormous technical -and social impact. It has become the de facto way to transmit -files across the internet. Yet, it fails to take advantage -of dozens of brilliant file distribution techniques invented in -the last fifteen years. From one prespective, evolving Web -infrastructure is near-impossible, given the number of backwards -compatibility constraints and the number of strong -1For example, Linux distributions use BitTorrent to transmit -disk images, and Blizzard, Inc. uses it to distribute -video game content. -parties invested in the current model. But from another perspective, -new protocols have emerged and gained wide use -since the emergence of HTTP. What is lacking is upgrading -design: enhancing the current HTTP web, and introducing -new functionality without degrading user experience. -Industry has gotten away with using HTTP this long because -moving small files around is relatively cheap, even for -small organizations with lots of traffic. But we are entering -a new era of data distribution with new challenges: (a) -hosting and distributing petabyte datasets, (b) computing -on large data across organizations, (c) high-volume highdefinition -on-demand or real-time media streams, (d) versioning -and linking of massive datasets, (e) preventing accidental -disappearance of important files, and more. Many -of these can be boiled down to “lots of data, accessible everywhere.†-Pressed by critical features and bandwidth concerns, -we have already given up HTTP for different data -distribution protocols. The next step is making them part -of the Web itself. -Orthogonal to efficient data distribution, version control -systems have managed to develop important data collaboration -workflows. Git, the distributed source code version -control system, developed many useful ways to model and -implement distributed data operations. The Git toolchain -offers versatile versioning functionality that large file distribution -systems severely lack. New solutions inspired by Git -are emerging, such as Camlistore [?], a personal file storage -system, and Dat [?] a data collaboration toolchain -and dataset package manager. Git has already influenced -distributed filesystem design [9], as its content addressed -Merkle DAG data model enables powerful file distribution -strategies. What remains to be explored is how this data -structure can influence the design of high-throughput oriented -file systems, and how it might upgrade the Web itself. -This paper introduces IPFS, a novel peer-to-peer versioncontrolled -filesystem seeking to reconcile these issues. IPFS -synthesizes learnings from many past successful systems. -Careful interface-focused integration yields a system greater -than the sum of its parts. The central IPFS principle is -modeling all data as part of the same Merkle DAG. -There have been many attempts at constructing a global -distributed file sy€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data deleted file mode 100644 index a6e00f34..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQJXEREB4VRPZXUPP4EUUMABGQ4FLCIV7RFAMWV6VTYHGGTHMQOUNA.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data deleted file mode 100644 index b6539897..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/UN/CIQOMBKARLB7PAITVSNH7VEGIQJRPL6J7FT2XYVKAXT4MQPXXPUYUNY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data deleted file mode 100644 index 6b72d373..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VD/CIQM57VXK2GGRETV46PZJAGXIPIE5O6JRUKAV7BBCJWARIYFH3ZEVDY.data +++ /dev/null @@ -1,2 +0,0 @@ - -uºÀ¼ ®­r[€€ \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data deleted file mode 100644 index 9cda061b..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VJ/CIQCHY6PCZKAFELCCPFO7GDQ6JVXLCA47BWB47DSAT5DLNKZC4BBVJY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data deleted file mode 100644 index 7f2f4e92..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VK/CIQECT7TVGIFJGMWNC2ZGSXRYQXFPA4ZVFEFYTIZ5CZLMW5F3VZDVKY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data deleted file mode 100644 index 2dd80560..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VO/CIQGFTQ7FSI2COUXWWLOQ45VUM2GUZCGAXLWCTOKKPGTUWPXHBNIVOY.data +++ /dev/null @@ -1,114 +0,0 @@ - - – # 0.1 - Quick Start - -This is a set of short examples with minimal explanation. It is meant as -a "quick start". Soon, we'll write a longer tour :-) - - -Add a file to ipfs: - - echo "hello world" >hello - ipfs add hello - - -View it: - - ipfs cat - - -Try a directory: - - mkdir foo - mkdir foo/bar - echo "baz" > foo/baz - echo "baz" > foo/bar/baz - ipfs add -r foo - - -View things: - - ipfs ls - ipfs ls /bar - ipfs cat /baz - ipfs cat /bar/baz - ipfs cat /bar - ipfs ls /baz - - -References: - - ipfs refs - ipfs refs -r - ipfs refs --help - - -Get: - - ipfs get foo2 - diff foo foo2 - - -Objects: - - ipfs object get - ipfs object get /foo2 - ipfs object --help - - -Pin + GC: - - ipfs pin -r - ipfs gc - ipfs ls - ipfs unpin -r - ipfs gc - - -Daemon: - - ipfs daemon (in another terminal) - ipfs id - - -Network: - - (must be online) - ipfs swarm peers - ipfs id - ipfs cat - - -Mount: - - (warning: fuse is finicky!) - ipfs mount - cd /ipfs/< - - -Tool: - - ipfs version - ipfs update - ipfs commands - ipfs config --help - open http://localhost:5001/webui - - -Browse: - - webui: - - http://localhost:5001/webui - - video: - - http://localhost:8080/ipfs/QmVc6zuAneKJzicnJpfrqCH9gSy6bz54JhcypfJYhGUFQu/play#/ipfs/QmTKZgRNwDNZwHtJSjCp6r5FYefzpULfy37JvMt9DwvXse - - images: - - http://localhost:8080/ipfs/QmZpc3HvfjEXvLWGQPWbHk3AjD5j8NEN4gmFN8Jmrd5g83/cs - - markdown renderer app: - - http://localhost:8080/ipfs/QmX7M9CiYXjVeFnkfVGf3y5ixTZ2ACeSGyL1vBJY1HvQPp/mdown -– \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data deleted file mode 100644 index 64ce0aeb..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/VR/CIQDWXASKC6E6M5YUHWLFE3D5ORIQALUCDXFGAGNWUDBLMHCNE7NVRQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data deleted file mode 100644 index 81663143..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/W4/CIQCZN3PHPV7FIQRZ2RMICUTK4IM4CCOKKJ45QAY2MK34ECEPNVWW4I.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data deleted file mode 100644 index b75d8023..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/WH/CIQOEE6HDAGCAUN6YNJ2WMWJUZ6PTIZPGPCLKYGPGTIRX5IMJNXZWHQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data deleted file mode 100644 index 9553a942..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/X3/CIQFTFEEHEDF6KLBT32BFAGLXEZL4UWFNWM4LFTLMXQBCERZ6CMLX3Y.data +++ /dev/null @@ -1,2 +0,0 @@ - - \ No newline at end of file diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data deleted file mode 100644 index e80dbd9a..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XL/CIQAARXKSMRHHPDJX4RMZVL2FA3652JYS3PGJZYATFZEVYZTYZ7OXLQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data deleted file mode 100644 index d899663b..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/XO/CIQJGO2B2N75IUEM372FSMG76VV256I4PXBULZZ5ASNLK4FL4EG7XOI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data deleted file mode 100644 index ba0caf40..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Y5/CIQDNPSZDJIJ4OGB34IJIMF4NVVGGYKXGXX4DKZ3GF5O4XQ5TCEKY5I.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data deleted file mode 100644 index 1d48c015..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YE/CIQAHHAIILXU6ZJ3QZRQJFXG22DLMMTR3ZMBZ3P3DXUEXXVG6UCOYEQ.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data deleted file mode 100644 index b1df8c51..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/YI/CIQN7OB5A4BJDFSWOWU5P2PHGINZFYFOW4SGA22CWXPWRHTLKR6MYII.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data deleted file mode 100644 index b0ac590e..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/Z3/CIQHZ35U3FQWXNWA5EK5UUB7WWOAO6DB6OS3PKO65SS674P4ZTJ4Z3A.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data deleted file mode 100644 index 3b40300d..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZE/CIQAWCSZTDXZMMUBJ4ZT5AP4QK7NG2ICSLINABMIGOV3FL2GV5NBZEI.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data deleted file mode 100644 index 819ec6cf..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZJ/CIQAC3JCY6ILJZIESXPWN37WGYJHMRBO75J3KRL47DPFYLDHEYZFZJY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data b/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data deleted file mode 100644 index c57d7186..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/blocks/ZP/CIQK5Z2VYLPEM4X75GWQP23OV737H23CYXARTYUHX2LD73PQPQMTZPY.data and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README b/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README deleted file mode 100644 index 23cb0909..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/blocks/_README +++ /dev/null @@ -1,30 +0,0 @@ -This is a repository of IPLD objects. Each IPLD object is in a single file, -named .data. Where is the -"base32" encoding of the CID (as specified in -https://github.com/multiformats/multibase) without the 'B' prefix. -All the object files are placed in a tree of directories, based on a -function of the CID. This is a form of sharding similar to -the objects directory in git repositories. Previously, we used -prefixes, we now use the next-to-last two charters. - - func NextToLast(base32cid string) { - nextToLastLen := 2 - offset := len(base32cid) - nextToLastLen - 1 - return str[offset : offset+nextToLastLen] - } - -For example, an object with a base58 CIDv1 of - - zb2rhYSxw4ZjuzgCnWSt19Q94ERaeFhu9uSqRgjSdx9bsgM6f - -has a base32 CIDv1 of - - BAFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA - -and will be placed at - - SC/AFKREIA22FLID5AJ2KU7URG47MDLROZIH6YF2KALU2PWEFPVI37YLKRSCA.data - -with 'SC' being the last-to-next two characters and the 'B' at the -beginning of the CIDv1 string is the multibase prefix that is not -stored in the filename. diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb deleted file mode 100644 index fc04d660..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000002.ldb and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb b/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb deleted file mode 100644 index 63d9d260..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/datastore/000005.ldb and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT b/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT deleted file mode 100644 index 5b540107..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/datastore/CURRENT +++ /dev/null @@ -1 +0,0 @@ -MANIFEST-000011 diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOCK deleted file mode 100644 index e69de29b..00000000 diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG deleted file mode 100644 index fb2ef830..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG +++ /dev/null @@ -1,5 +0,0 @@ -=============== Aug 19, 2016 (CEST) =============== -15:48:10.633634 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed -15:48:10.634191 db@open opening -15:48:10.639318 db@janitor F·4 G·0 -15:48:10.639379 db@open done T·5.16729ms diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old b/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old deleted file mode 100644 index f5ffd612..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/datastore/LOG.old +++ /dev/null @@ -1,7 +0,0 @@ -=============== Apr 22, 2016 (WEST) =============== -03:16:42.272495 log@legend F·NumFile S·FileSize N·Entry C·BadEntry B·BadBlock Ke·KeyError D·DroppedEntry L·Level Q·SeqNum T·TimeElapsed -03:16:42.272857 db@open opening -03:16:42.275673 db@janitor F·4 G·0 -03:16:42.275700 db@open done T·2.831108ms -03:16:42.596938 db@close closing -03:16:42.597082 db@close done T·139.194µs diff --git a/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 b/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 deleted file mode 100644 index 7af87ca8..00000000 Binary files a/packages/ipfs-unixfs-importer/test/test-repo/datastore/MANIFEST-000011 and /dev/null differ diff --git a/packages/ipfs-unixfs-importer/test/test-repo/version b/packages/ipfs-unixfs-importer/test/test-repo/version deleted file mode 100644 index 1e8b3149..00000000 --- a/packages/ipfs-unixfs-importer/test/test-repo/version +++ /dev/null @@ -1 +0,0 @@ -6 diff --git a/packages/ipfs-unixfs/README.md b/packages/ipfs-unixfs/README.md index b196391e..c5242f97 100644 --- a/packages/ipfs-unixfs/README.md +++ b/packages/ipfs-unixfs/README.md @@ -204,4 +204,3 @@ This repository falls under the IPFS [Code of Conduct](https://github.com/ipfs/c ## License [MIT](LICENSE) -