Skip to content

Commit

Permalink
feat(importer): Add rabin chunker to available importer chunker algor…
Browse files Browse the repository at this point in the history
…ithms

This is required to have feature parity with go-ipfs which supports rabin chunking algorithm. Rabin
chunker supports teh following `chunkerOptions`: `minChunkSize`, `avgChunkSize`, `window`, and
`polynomial`. Rabin chunker uses the same defaults specified by the go-ipfs-chunker.

Supports ipfs/js-ipfs#1283

License: MIT
Signed-off-by: Dan Ordille <[email protected]>
  • Loading branch information
dordille committed Jul 26, 2018
1 parent dfc9f20 commit 510b9a6
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 7 deletions.
7 changes: 5 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@
"leadMaintainer": "Alex Potsides <[email protected]>",
"main": "src/index.js",
"browser": {
"fs": false
"fs": false,
"rabin": false
},
"scripts": {
"test": "aegir test",
Expand Down Expand Up @@ -72,7 +73,9 @@
"pull-through": "^1.0.18",
"pull-traverse": "^1.0.3",
"pull-write": "^1.1.4",
"sparse-array": "^1.3.1"
"rabin": "^1.6.0",
"sparse-array": "^1.3.1",
"stream-to-pull-stream": "^1.7.2"
},
"contributors": [
"Alan Shaw <[email protected]>",
Expand Down
3 changes: 2 additions & 1 deletion src/builder/builder.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ const DAGNode = dagPB.DAGNode

const defaultOptions = {
chunkerOptions: {
maxChunkSize: 262144
maxChunkSize: 262144,
avgChunkSize: 262144
},
rawLeaves: false,
hashAlg: 'sha2-256',
Expand Down
8 changes: 8 additions & 0 deletions src/chunker/index.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
'use strict'

const chunkers = {
fixed: require('../chunker/fixed-size'),
rabin: require('../chunker/rabin')
}

module.exports = chunkers
28 changes: 28 additions & 0 deletions src/chunker/rabin.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
'use strict'

const createRabin = require('rabin')
const toPull = require('stream-to-pull-stream')

module.exports = (options) => {
let min, max, avg
if (options.minChunkSize && options.maxChunkSize && options.avgChunkSize) {
avg = options.avgChunkSize
min = options.minChunkSize
max = options.maxChunkSize
} else {
avg = options.avgChunkSize
min = avg / 3
max = avg + (avg / 2)
}

const sizepow = Math.floor(Math.log2(avg))
const rabin = createRabin({
min: min,
max: max,
bits: sizepow,
window: options.window || 16,
polynomial: options.polynomial || '0x3DF305DFB2A805'
})

return toPull.duplex(rabin)
}
5 changes: 1 addition & 4 deletions src/importer/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,7 @@ const assert = require('assert')
const setImmediate = require('async/setImmediate')
const DAGBuilder = require('../builder')
const createTreeBuilder = require('./tree-builder')

const chunkers = {
fixed: require('../chunker/fixed-size')
}
const chunkers = require('../chunker')

const defaultOptions = {
chunker: 'fixed',
Expand Down
62 changes: 62 additions & 0 deletions test/chunker-rabin.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
/* eslint-env mocha */
'use strict'

const chunker = require('./../src/chunker/rabin')
const chai = require('chai')
chai.use(require('dirty-chai'))
const expect = chai.expect
const pull = require('pull-stream')
const loadFixture = require('aegir/fixtures')

const rawFile = loadFixture('test/fixtures/1MiB.txt')

describe('chunker: rabin', function () {
this.timeout(30000)

it('chunks non flat buffers', (done) => {
const b1 = Buffer.alloc(2 * 256)
const b2 = Buffer.alloc(1 * 256)
const b3 = Buffer.alloc(5 * 256)

b1.fill('a')
b2.fill('b')
b3.fill('c')

pull(
pull.values([b1, b2, b3]),
chunker({minChunkSize: 48, avgChunkSize: 96, maxChunkSize: 192}),
pull.collect((err, chunks) => {
expect(err).to.not.exist()
chunks.forEach((chunk) => {
expect(chunk).to.have.length.gte(48)
expect(chunk).to.have.length.lte(192)
})
done()
})
)
})

it('256 KiB avg chunks of non scalar filesize', (done) => {
const KiB256 = 262144
let file = Buffer.concat([rawFile, Buffer.from('hello')])
const opts = {
minChunkSize: KiB256 / 3,
avgChunkSize: KiB256,
maxChunkSize: KiB256 + (KiB256 / 2)
}
pull(
pull.values([file]),
chunker(opts),
pull.collect((err, chunks) => {
expect(err).to.not.exist()

chunks.forEach((chunk) => {
expect(chunk).to.have.length.gte(opts.minChunkSize)
expect(chunk).to.have.length.lte(opts.maxChunkSize)
})

done()
})
)
})
})

0 comments on commit 510b9a6

Please sign in to comment.