From 83a5feb688036b93c4d31b79076a89a2fc54afba Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Thu, 26 Jul 2018 11:41:04 -0400 Subject: [PATCH 1/3] feat(importer): add rabin fingerprinting chunk algorithm This is required to have feature parity with go-ipfs which supports rabin chunking algorithm. Rabin chunker supports the following `chunkerOptions` minChunkSize: {integer} avgChunkSize: {integer} maxChunkSize: {integer} polynomial: {string} window: {integer} Rabin chunker uses the same defaults defined by go-ipfs-chunker. Supports ipfs/js-ipfs#1283 License: MIT Signed-off-by: Dan Ordille --- package.json | 7 +++-- src/builder/builder.js | 3 +- src/chunker/index.js | 8 ++++++ src/chunker/rabin.js | 28 +++++++++++++++++++ src/importer/index.js | 5 +--- test/chunker-rabin.js | 62 ++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 106 insertions(+), 7 deletions(-) create mode 100644 src/chunker/index.js create mode 100644 src/chunker/rabin.js create mode 100644 test/chunker-rabin.js diff --git a/package.json b/package.json index a2070756..7f9fc175 100644 --- a/package.json +++ b/package.json @@ -5,7 +5,8 @@ "leadMaintainer": "Alex Potsides ", "main": "src/index.js", "browser": { - "fs": false + "fs": false, + "rabin": false }, "scripts": { "test": "aegir test", @@ -72,7 +73,9 @@ "pull-through": "^1.0.18", "pull-traverse": "^1.0.3", "pull-write": "^1.1.4", - "sparse-array": "^1.3.1" + "rabin": "^1.6.0", + "sparse-array": "^1.3.1", + "stream-to-pull-stream": "^1.7.2" }, "contributors": [ "Alan Shaw ", diff --git a/src/builder/builder.js b/src/builder/builder.js index b0c117ed..a94763dd 100644 --- a/src/builder/builder.js +++ b/src/builder/builder.js @@ -16,7 +16,8 @@ const DAGNode = dagPB.DAGNode const defaultOptions = { chunkerOptions: { - maxChunkSize: 262144 + maxChunkSize: 262144, + avgChunkSize: 262144 }, rawLeaves: false, hashAlg: 'sha2-256', diff --git a/src/chunker/index.js b/src/chunker/index.js new file mode 100644 index 00000000..e983ae83 --- /dev/null +++ b/src/chunker/index.js @@ -0,0 +1,8 @@ +'use strict' + +const chunkers = { + fixed: require('../chunker/fixed-size'), + rabin: require('../chunker/rabin') +} + +module.exports = chunkers diff --git a/src/chunker/rabin.js b/src/chunker/rabin.js new file mode 100644 index 00000000..8fe506e4 --- /dev/null +++ b/src/chunker/rabin.js @@ -0,0 +1,28 @@ +'use strict' + +const createRabin = require('rabin') +const toPull = require('stream-to-pull-stream') + +module.exports = (options) => { + let min, max, avg + if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) { + avg = options.avgChunkSize + min = options.minChunkSize + max = options.maxChunkSize + } else { + avg = options.avgChunkSize + min = avg / 3 + max = avg + (avg / 2) + } + + const sizepow = Math.floor(Math.log2(avg)) + const rabin = createRabin({ + min: min, + max: max, + bits: sizepow, + window: options.window || 16, + polynomial: options.polynomial || '0x3DF305DFB2A805' + }) + + return toPull.duplex(rabin) +} diff --git a/src/importer/index.js b/src/importer/index.js index beb8b994..5be9670f 100644 --- a/src/importer/index.js +++ b/src/importer/index.js @@ -8,10 +8,7 @@ const assert = require('assert') const setImmediate = require('async/setImmediate') const DAGBuilder = require('../builder') const createTreeBuilder = require('./tree-builder') - -const chunkers = { - fixed: require('../chunker/fixed-size') -} +const chunkers = require('../chunker') const defaultOptions = { chunker: 'fixed', diff --git a/test/chunker-rabin.js b/test/chunker-rabin.js new file mode 100644 index 00000000..a0d7d7cb --- /dev/null +++ b/test/chunker-rabin.js @@ -0,0 +1,62 @@ +/* eslint-env mocha */ +'use strict' + +const chunker = require('./../src/chunker/rabin') +const chai = require('chai') +chai.use(require('dirty-chai')) +const expect = chai.expect +const pull = require('pull-stream') +const loadFixture = require('aegir/fixtures') + +const rawFile = loadFixture('test/fixtures/1MiB.txt') + +describe('chunker: rabin', function () { + this.timeout(30000) + + it('chunks non flat buffers', (done) => { + const b1 = Buffer.alloc(2 * 256) + const b2 = Buffer.alloc(1 * 256) + const b3 = Buffer.alloc(5 * 256) + + b1.fill('a') + b2.fill('b') + b3.fill('c') + + pull( + pull.values([b1, b2, b3]), + chunker({minChunkSize: 48, avgChunkSize: 96, maxChunkSize: 192}), + pull.collect((err, chunks) => { + expect(err).to.not.exist() + chunks.forEach((chunk) => { + expect(chunk).to.have.length.gte(48) + expect(chunk).to.have.length.lte(192) + }) + done() + }) + ) + }) + + it('256 KiB avg chunks of non scalar filesize', (done) => { + const KiB256 = 262144 + let file = Buffer.concat([rawFile, Buffer.from('hello')]) + const opts = { + minChunkSize: KiB256 / 3, + avgChunkSize: KiB256, + maxChunkSize: KiB256 + (KiB256 / 2) + } + pull( + pull.values([file]), + chunker(opts), + pull.collect((err, chunks) => { + expect(err).to.not.exist() + + chunks.forEach((chunk) => { + expect(chunk).to.have.length.gte(opts.minChunkSize) + expect(chunk).to.have.length.lte(opts.maxChunkSize) + }) + + done() + }) + ) + }) +}) From 3ffbcd7c8d22ceca6f497d1c42dbae8a978d73e5 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Mon, 6 Aug 2018 12:34:58 -0400 Subject: [PATCH 2/3] test: enable rabin chunker test in node env License: MIT Signed-off-by: Dan Ordille --- test/node.js | 1 + 1 file changed, 1 insertion(+) diff --git a/test/node.js b/test/node.js index 064428e9..ae50d454 100644 --- a/test/node.js +++ b/test/node.js @@ -44,6 +44,7 @@ describe('IPFS UnixFS Engine', () => { // Chunkers require('./chunker-fixed-size') + require('./chunker-rabin') // Graph Builders require('./builder')(repo) From 6f1f56867df9d421bb32b4f3e9e7cde42b5c9bf1 Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Tue, 7 Aug 2018 14:27:05 -0400 Subject: [PATCH 3/3] test: add test for rabin chunker when only avgChunkSize is specified License: MIT Signed-off-by: Dan Ordille --- test/chunker-rabin.js | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/test/chunker-rabin.js b/test/chunker-rabin.js index a0d7d7cb..a661072c 100644 --- a/test/chunker-rabin.js +++ b/test/chunker-rabin.js @@ -36,6 +36,23 @@ describe('chunker: rabin', function () { ) }) + it('uses default min and max chunk size when only avgChunkSize is specified', (done) => { + const b1 = Buffer.alloc(10 * 256) + b1.fill('a') + pull( + pull.values([b1]), + chunker({avgChunkSize: 256}), + pull.collect((err, chunks) => { + expect(err).to.not.exist() + chunks.forEach((chunk) => { + expect(chunk).to.have.length.gte(256 / 3) + expect(chunk).to.have.length.lte(256 * (256 / 2)) + }) + done() + }) + ) + }) + it('256 KiB avg chunks of non scalar filesize', (done) => { const KiB256 = 262144 let file = Buffer.concat([rawFile, Buffer.from('hello')])