diff --git a/.gitignore b/.gitignore index ec572e1..c220d89 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ node_modules/ .nyc_output/ coverage/ example.car +build/ +dist/ diff --git a/README.md b/README.md index 8f63a72..42f362e 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# datastore-car (js-datastore-car) [![Build Status](https://github.com/rvagg/js-datastore-car/workflows/CI/badge.svg)](https://github.com/rvagg/js-datastore-car/actions?workflow=CI) +# datastore-car (js-datastore-car) [![Build Status](https://github.com/ipld/js-datastore-car/workflows/CI/badge.svg)](https://github.com/ipld/js-datastore-car/actions?workflow=CI) [![NPM](https://nodei.co/npm/datastore-car.svg)](https://nodei.co/npm/datastore-car/) @@ -9,13 +9,19 @@ The interface wraps a [Datastore](https://github.com/ipfs/interface-datastore), ## Example ```js -const fs = require('fs') -const CarDatastore = require('datastore-car') -const Block = require('@ipld/block') +import fs from 'fs' +import multiformats from 'multiformats/basics' +import car from 'datastore-car' +import dagCbor from '@ipld/dag-cbor' + +// dag-cbor is required for the CAR root block +multiformats.add(dagCbor) +const CarDatastore = car(multiformats) async function example () { - const block = Block.encoder(Buffer.from('random meaningless bytes'), 'raw') - const cid = await block.cid() + const binary = new TextEncoder().encode('random meaningless bytes') + const mh = await multiformats.multihash.hash(binary, 'sha2-256') + const cid = multiformats.CID.create(1, multiformats.get('raw').code, mh) const outStream = fs.createWriteStream('example.car') const writeDs = await CarDatastore.writeStream(outStream) @@ -23,7 +29,7 @@ async function example () { // set the header with a single root await writeDs.setRoots(cid) // store a new block, creates a new file entry in the CAR archive - await writeDs.put(cid, await block.encode()) + await writeDs.put(cid, binary) await writeDs.close() const inStream = fs.createReadStream('example.car') @@ -34,12 +40,12 @@ async function example () { // read the list of roots from the header const roots = await readDs.getRoots() - // retrieve a block, as a UInt8Array, reading from the CAR archive + // retrieve a block, as a UInt8Array, reading from the ZIP archive const got = await readDs.get(roots[0]) - // also possible: for await (const {key, data} = readDs.query()) { ... } + // also possible: for await (const { key, value } of readDs.query()) { ... } console.log('Retrieved [%s] from example.car with CID [%s]', - Buffer.from(got).toString(), + new TextDecoder().decode(got), roots[0].toString()) await readDs.close() @@ -59,13 +65,14 @@ Retrieved [random meaningless bytes] from example.car with CID [bafkreihwkf6mtnj In this example, the `writeStream()` create-mode is used to generate the CAR file, this allows for an iterative write process where first the roots are set (`setRoots()`) and then all of the blocks are written (`put()`). After it is created, we use the `readStreamComplete()` create-mode to read the contents. Other create-modes are useful where the environment, data and needs demand: -* **[`CarDatastore.readBuffer(buffer)`](#CarDatastore__readBuffer)**: read a CAR archive from a `Buffer` or `Uint8Array`. Does not support mutation operations, only reads. This mode is not efficient for large data sets but does support `get()` and `has()` operations since it caches the entire archive in memory. This mode is the only mode _available_ in a browser environment +* **[`CarDatastore.readBuffer(buffer)`](#CarDatastore__readBuffer)**: read a CAR archive from a `Uint8Array`. Does not support mutation operations, only reads. This mode is not efficient for large data sets but does support `get()` and `has()` operations since it caches the entire archive in memory. This mode is the only mode _available_ in a browser environment * **[`CarDatastore.readFileComplete(file)`](#CarDatastore__readFileComplete)**: read a CAR archive directly from a file. Does not support mutation operations, only reads. This mode is not efficient for large data sets but does support `get()` and `has()` operations since it caches the entire archive in memory. This mode is _not available_ in a browser environment. * **[`CarDatastore.readStreamComplete(stream)`](#CarDatastore__readStreamComplete)**: read a CAR archive directly from a stream. Does not support mutation operations, only reads. This mode is not efficient for large data sets but does support `get()` and `has()` operations since it caches the entire archive in memory. This mode is _not available_ in a browser environment. * **[`CarDatastore.readStreaming(stream)`](#CarDatastore__readStreaming)**: read a CAR archive directly from a stream. Does not support mutation operations, and only supports iterative reads via `query()` (i.e. no `get()` and `has()`). This mode is very efficient for large data sets. This mode is _not available_ in a browser environment. +* **[`async CarDatastore.readFileIndexed(stream)`](#CarDatastore__readFileIndexed)**: read a CAR archive from a local file, index its contents and use that index to support random access reads (`has()`, `get()` and `query()`) without fitting the entire contents in memory as `readFileComplete()` does. Uses more memory than `readStreaming()` and less than `readFileComplete()`. Will be slower to initialize than `readStreaming()` but suitable where random access reads are required from a large file. * **[`CarDatastore.writeStream(stream)`](#CarDatastore__writeStream)**: write a CAR archive to a stream (e.g. `fs.createWriteStream(file)`). Does not support read operations, only writes, and the writes are append-only (i.e. no `delete()`). However, this mode is very efficient for dumping large data sets, with no caching and streaming writes. This mode is _not available_ in a browser environment. -Other create-modes may be supported in the future, such as writing to a Buffer (although this is already possible if you couple `writeStream()` with a [`BufferListStream`](https://ghub.io/bl)) or a read/write mode such as datastore-zipcar makes available. +Other create-modes may be supported in the future, such as writing to a Uint8Array (although this is already possible if you couple `writeStream()` with a [`BufferListStream`](https://ghub.io/bl)) or a read/write mode such as datastore-zipcar makes available. ## API @@ -75,6 +82,7 @@ Other create-modes may be supported in the future, such as writing to a Buffer ( * [`async CarDatastore.readFileComplete(file)`](#CarDatastore__readFileComplete) * [`async CarDatastore.readStreamComplete(stream)`](#CarDatastore__readStreamComplete) * [`async CarDatastore.readStreaming(stream)`](#CarDatastore__readStreaming) + * [`async CarDatastore.readFileIndexed(stream)`](#CarDatastore__readFileIndexed) * [`async CarDatastore.writeStream(stream)`](#CarDatastore__writeStream) * [`async CarDatastore.completeGraph(root, get, car[, concurrency])`](#CarDatastore__completeGraph) * [`class CarDatastore`](#CarDatastore) @@ -92,7 +100,7 @@ Other create-modes may be supported in the future, such as writing to a Buffer ( ### `async CarDatastore.readBuffer(buffer)` -Read a CarDatastore from a Buffer containing the contents of an existing +Read a CarDatastore from a Uint8Array containing the contents of an existing CAR archive. Mutation operations (`put()`, `delete()` and `setRoots()`) are not available. @@ -106,7 +114,7 @@ environment. **Parameters:** -* **`buffer`** _(`Buffer|Uint8Array`)_: the byte contents of a CAR archive +* **`buffer`** _(`Uint8Array`)_: the byte contents of a CAR archive **Return value** _(`CarDatastore`)_: a read-only CarDatastore. @@ -115,7 +123,7 @@ environment. Read a CAR archive from a file and return a CarDatastore. The CarDatastore returned will _only_ support read operations: `getRoots()`, `get()`, `has()` -and `query()`. Caching makes `get()` and `has()` possible as the entire +and `query()`. Caching makes `get()` and `has()`. This is possible as the entire file is read and decoded before the CarDatastore is returned. mutation operations (`put()`, `delete()` and `setRoots()`) are not available as there is no ability to modify the archive. @@ -145,7 +153,7 @@ This create-mode is not available in the browser environment. Read a CAR archive as a CarDataStore from a ReadableStream. The CarDatastore returned will _only_ support read operations: `getRoots()`, `get()`, `has()` -and `query()`. Caching makes `get()` and `has()` possible as the entire +and `query()`. Caching makes `get()` and `has()`. This is possible as the entire stream is read and decoded before the CarDatastore is returned. Mutation operations (`put()`, `delete()` and `setRoots()`) are not available as there is no ability to modify the archive. @@ -191,6 +199,36 @@ This create-mode is not available in the browser environment. **Return value** _(`CarDatastore`)_: a read-only CarDatastore. + +### `async CarDatastore.readFileIndexed(stream)` + +Read a CAR archive as a CarDataStore from a local file. The CarDatastore +returned will _only_ support read operations: `getRoots()`, `get()`, `has()` +and `query()`. Caching makes `get()` and `has()`. This is possible as the entire +stream is read and _indexed_ before the CarDatastore is returned. Mutation +operations (`put()`, `delete()` and `setRoots()`) are not available as there +is no ability to modify the archive. + +The indexing operation uses [`indexer`](#indexer) to catalogue the contents of the +CAR and store a mapping of CID to byte locations for each entry. This method +of parsing is not as memory intensive as [`readStreamComplete`](#readStreamComplete) as only +the index is stored in memory. When blocks are read, the index tells the +reader where to fetch the block from within the CAR file. + +This mode is suitable for large files where random-access operations are +required. Where a full sequential read is only required, use +[`createReadStreaming`](#createReadStreaming) which consumes the file in a single pass with no +memory used for indexing. + +This create-mode is not available in the browser environment. + +**Parameters:** + +* **`stream`** _(`ReadableStream`)_: a ReadableStream that provides an entire CAR + archive as a binary stream. + +**Return value** _(`CarDatastore`)_: a read-only CarDatastore. + ### `async CarDatastore.writeStream(stream)` @@ -224,8 +262,8 @@ a CAR file. **Parameters:** -* **`root`** _(`Block`)_: the root of the graph to start at, this block will be - included in the CAR and its CID will be set as the single root. +* **`root`** _(`CID`)_: the CID of the root of the graph to start at, this block + will be included in the CAR and the CID will be set as the single root. * **`get`** _(`AsyncFunction`)_: an `async` function that takes a CID and returns a `Block`. Can be used to attach to an arbitrary data store. * **`car`** _(`CarDatastore`)_: a writable `CarDatastore` that has not yet been @@ -238,8 +276,8 @@ a CAR file. ### `class CarDatastore` CarDatastore is a class to manage reading from, and writing to a CAR archives -using [CID](https://github.com/multiformats/js-cid)s as keys and file names -in the CAR and binary block data as the file contents. +using [CID](https://github.com/multiformats/js-multiformats)s as keys and +file names in the CAR and binary block data as the file contents. ### `async CarDatastore#get(key)` @@ -256,7 +294,7 @@ may throw an error if unsupported. * **`key`** _(`string|Key|CID`)_: a `CID` or `CID`-convertable object to identify the block. -**Return value** _(`Buffer`)_: the IPLD block data referenced by the CID. +**Return value** _(`Uint8Array`)_: the IPLD block data referenced by the CID. ### `async CarDatastore#has(key)` @@ -290,7 +328,7 @@ and an Error will be thrown when it is called. * **`key`** _(`string|Key|CID`)_: a `CID` or `CID`-convertable object to identify the `value`. -* **`value`** _(`Buffer|Uint8Array`)_: an IPLD block matching the given `key` +* **`value`** _(`Uint8Array`)_: an IPLD block matching the given `key` `CID`. @@ -374,15 +412,19 @@ used to read individual blocks directly from the car (using `CarDatastore.readRaw()`). ```js -const { indexer } = require('datastore-car') +// full multiformats omitted, you'll need codecs, bases and hashes that +// appear in your CAR files if you want full information +const multiformats = ... +const { indexer } = require('datastore-car')(multiformats) async function run () { + const cidStr = (cid) => `${multiformats.get(cid.code).name}:${cid.toString()}` const index = await indexer('big.car') - index.roots = index.roots.map((cid) => cid.toString()) + index.roots = index.roots.map(cidStr) console.log('roots:', index.roots) for await (const blockIndex of index.iterator) { - blockIndex.cid = blockIndex.cid.toString() - console.log('block:', blockIndex) + blockIndex.cid = cidStr(blockIndex.cid) + console.log(JSON.toString(blockIndex)) } } @@ -396,24 +438,17 @@ Might output something like: ``` roots: [ - 'bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm', - 'bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm' + 'dag-cbor:bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm', + 'dag-cbor:bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm' ] -block: { - cid: 'bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm', - length: 55, - offset: 137 -} -block: { - cid: 'QmNX6Tffavsya4xgBi2VJQnSuqy9GsxongxZZ9uZBqp16d', - length: 97, - offset: 228 -} -block: { - cid: 'bafkreifw7plhl6mofk6sfvhnfh64qmkq73oeqwl6sloru6rehaoujituke', - length: 4, - offset: 362 -} +{"cid":"dag-cbor:bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm","length":92,"blockLength":55,"offset":100,"blockOffset":137} +{"cid":"dag-pb:QmNX6Tffavsya4xgBi2VJQnSuqy9GsxongxZZ9uZBqp16d","length":133,"blockLength":97,"offset":192,"blockOffset":228} +{"cid":"raw:bafkreifw7plhl6mofk6sfvhnfh64qmkq73oeqwl6sloru6rehaoujituke","length":41,"blockLength":4,"offset":325,"blockOffset":362} +{"cid":"dag-pb:QmWXZxVQ9yZfhQxLD35eDR8LiMRsYtHxYqTFCBbJoiJVys","length":130,"blockLength":94,"offset":366,"blockOffset":402} +{"cid":"raw:bafkreiebzrnroamgos2adnbpgw5apo3z4iishhbdx77gldnbk57d4zdio4","length":41,"blockLength":4,"offset":496,"blockOffset":533} +{"cid":"dag-pb:QmdwjhxpxzcMsR3qUuj7vUL8pbA7MgR3GAxWi2GLHjsKCT","length":82,"blockLength":47,"offset":537,"blockOffset":572} +{"cid":"raw:bafkreidbxzk2ryxwwtqxem4l3xyyjvw35yu4tcct4cqeqxwo47zhxgxqwq","length":41,"blockLength":4,"offset":619,"blockOffset":656} +{"cid":"dag-cbor:bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm","length":55,"blockLength":18,"offset":660,"blockOffset":697} ... ``` @@ -432,16 +467,16 @@ read into memory. **Return value** _(`Object., iterator:AsyncIterator>`)_: an object containing a `roots` array of CIDs and an `iterator` AsyncIterator that will yield - Objects of the form `{ cid:CID, offset:number, length:number }` indicating - the CID of the block located at start=`offset` with a length of `number` in - the CAR archive provided. + Objects of the form `{ cid:CID, offset:number, length:number, byteOffset:number, byteLength:number }` + indicating the CID of the block located at `blockOffset` with a length of + `blockLength` in the CAR archive provided. ### `async CarDatastore.readRaw(fd, blockIndex)` Read a block directly from a CAR file given an block index provided by -`CarDatastore.indexer()` (i.e. an object of the form: -`{ cid:CID, offset:number, length:number }`). +`CarDatastore.indexer()` (i.e. an object with the minimal form: +`{ cid:CID, blockOffset:number, blockLength:number }`). **Parameters:** @@ -450,7 +485,7 @@ Read a block directly from a CAR file given an block index provided by * **`blockIndex`** _(`Object`)_: an index object of the style provided by `CarDatastore.indexer()` (`{ cid, offset, length }`). -**Return value** _(`Block`)_: an IPLD [Block](https://ghub.io/@ipld/block) object. +**Return value** _(`object`)_: an IPLD block of the form `{ cid, binary }`. ## License and Copyright diff --git a/car-browser.js b/car-browser.js index 3c183e5..af91c99 100644 --- a/car-browser.js +++ b/car-browser.js @@ -1,11 +1,11 @@ -const { NoWriter } = require('./lib/reader-writer-iface') -const { createBufferReader } = require('./lib/reader-browser') -const CarDatastore = require('./datastore') +import { NoWriter } from './lib/reader-writer-iface.js' +import { createBufferReader } from './lib/reader-browser.js' +import CarDatastore from './datastore.js' /** * @name CarDatastore.readBuffer * @description - * Read a CarDatastore from a Buffer containing the contents of an existing + * Read a CarDatastore from a Uint8Array containing the contents of an existing * CAR archive. Mutation operations (`put()`, `delete()` and `setRoots()`) are * not available. * @@ -20,13 +20,25 @@ const CarDatastore = require('./datastore') * @memberof CarDatastore * @static * @async - * @param {Buffer|Uint8Array} buffer the byte contents of a CAR archive + * @param {Uint8Array} buffer the byte contents of a CAR archive * @returns {CarDatastore} a read-only CarDatastore. */ -async function readBuffer (buffer) { - const reader = await createBufferReader(buffer) +async function readBuffer (multiformats, buffer) { + const reader = await createBufferReader(multiformats, buffer) const writer = new NoWriter() - return new CarDatastore(reader, writer) + return new CarDatastore(multiformats, reader, writer) } -module.exports.readBuffer = readBuffer +export default (multiformats) => { + function wrap (fn) { + return function (...args) { + return fn(multiformats, ...args) + } + } + + return { + readBuffer: wrap(readBuffer) + } +} + +export { readBuffer } diff --git a/car.js b/car.js index 8cbbb83..9c12d19 100644 --- a/car.js +++ b/car.js @@ -1,16 +1,16 @@ -const { Reader, NoWriter } = require('./lib/reader-writer-iface') -const { createStreamCompleteReader, createStreamingReader, createFileReader } = require('./lib/reader') -const createStreamWriter = require('./lib/writer-stream') -const CarDatastore = require('./datastore') -const { readBuffer } = require('./car-browser') -const { indexer, readRaw } = require('./lib/raw') +import { Reader, NoWriter } from './lib/reader-writer-iface.js' +import { createStreamCompleteReader, createStreamingReader, createFileReader, createFileIndexedReader } from './lib/reader.js' +import createStreamWriter from './lib/writer-stream.js' +import CarDatastore from './datastore.js' +import browser from './car-browser.js' +import { indexer, readRaw } from './lib/raw.js' /** * @name CarDatastore.readFileComplete * @description * Read a CAR archive from a file and return a CarDatastore. The CarDatastore * returned will _only_ support read operations: `getRoots()`, `get()`, `has()` - * and `query()`. Caching makes `get()` and `has()` possible as the entire + * and `query()`. Caching makes `get()` and `has()`. This is possible as the entire * file is read and decoded before the CarDatastore is returned. mutation * operations (`put()`, `delete()` and `setRoots()`) are not available as there * is no ability to modify the archive. @@ -35,10 +35,10 @@ const { indexer, readRaw } = require('./lib/raw') * @param {string} file a path to a file containing CAR archive data. * @returns {CarDatastore} a read-only CarDatastore. */ -async function readFileComplete (file) { - const reader = await createFileReader(file) +async function readFileComplete (multiformats, file) { + const reader = await createFileReader(multiformats, file) const writer = new NoWriter() - return new CarDatastore(reader, writer) + return new CarDatastore(multiformats, reader, writer) } /** @@ -46,7 +46,7 @@ async function readFileComplete (file) { * @description * Read a CAR archive as a CarDataStore from a ReadableStream. The CarDatastore * returned will _only_ support read operations: `getRoots()`, `get()`, `has()` - * and `query()`. Caching makes `get()` and `has()` possible as the entire + * and `query()`. Caching makes `get()` and `has()`. This is possible as the entire * stream is read and decoded before the CarDatastore is returned. Mutation * operations (`put()`, `delete()` and `setRoots()`) are not available as there * is no ability to modify the archive. @@ -65,10 +65,10 @@ async function readFileComplete (file) { * archive as a binary stream. * @returns {CarDatastore} a read-only CarDatastore. */ -async function readStreamComplete (stream) { - const reader = await createStreamCompleteReader(stream) +async function readStreamComplete (multiformats, stream) { + const reader = await createStreamCompleteReader(multiformats, stream) const writer = new NoWriter() - return new CarDatastore(reader, writer) + return new CarDatastore(multiformats, reader, writer) } /** @@ -98,10 +98,46 @@ async function readStreamComplete (stream) { * archive as a binary stream. * @returns {CarDatastore} a read-only CarDatastore. */ -async function readStreaming (stream) { - const reader = await createStreamingReader(stream) +async function readStreaming (multiformats, stream) { + const reader = await createStreamingReader(multiformats, stream) const writer = new NoWriter() - return new CarDatastore(reader, writer) + return new CarDatastore(multiformats, reader, writer) +} + +/** + * @name CarDatastore.readFileIndexed + * @description + * Read a CAR archive as a CarDataStore from a local file. The CarDatastore + * returned will _only_ support read operations: `getRoots()`, `get()`, `has()` + * and `query()`. Caching makes `get()` and `has()`. This is possible as the entire + * stream is read and _indexed_ before the CarDatastore is returned. Mutation + * operations (`put()`, `delete()` and `setRoots()`) are not available as there + * is no ability to modify the archive. + * + * The indexing operation uses {@link indexer} to catalogue the contents of the + * CAR and store a mapping of CID to byte locations for each entry. This method + * of parsing is not as memory intensive as {@link readStreamComplete} as only + * the index is stored in memory. When blocks are read, the index tells the + * reader where to fetch the block from within the CAR file. + * + * This mode is suitable for large files where random-access operations are + * required. Where a full sequential read is only required, use + * {@link createReadStreaming} which consumes the file in a single pass with no + * memory used for indexing. + * + * This create-mode is not available in the browser environment. + * @function + * @memberof CarDatastore + * @static + * @async + * @param {ReadableStream} stream a ReadableStream that provides an entire CAR + * archive as a binary stream. + * @returns {CarDatastore} a read-only CarDatastore. + */ +async function readFileIndexed (multiformats, filePath) { + const reader = await createFileIndexedReader(multiformats, filePath) + const writer = new NoWriter() + return new CarDatastore(multiformats, reader, writer) } /** @@ -129,17 +165,17 @@ async function readStreaming (stream) { * @param {WritableStream} stream a writable stream * @returns {CarDatastore} an append-only, streaming CarDatastore. */ -async function writeStream (stream) { +async function writeStream (multiformats, stream) { const reader = new Reader() - const writer = await createStreamWriter(stream) - return new CarDatastore(reader, writer) + const writer = await createStreamWriter(multiformats, stream) + return new CarDatastore(multiformats, reader, writer) } async function traverseBlock (block, get, car, concurrency = 1, seen = new Set()) { const cid = await block.cid() await car.put(cid, block.encodeUnsafe()) seen.add(cid.toString('base58btc')) - if (cid.codec === 'raw') { + if (cid.code === 0x55) { // raw return } const reader = block.reader() @@ -168,8 +204,8 @@ async function traverseBlock (block, get, car, concurrency = 1, seen = new Set() * @memberof CarDatastore * @static * @async - * @param {Block} root the root of the graph to start at, this block will be - * included in the CAR and its CID will be set as the single root. + * @param {CID} root the CID of the root of the graph to start at, this block + * will be included in the CAR and the CID will be set as the single root. * @param {AsyncFunction} get an `async` function that takes a CID and returns * a `Block`. Can be used to attach to an arbitrary data store. * @param {CarDatastore} car a writable `CarDatastore` that has not yet been @@ -184,11 +220,23 @@ async function completeGraph (root, get, car, concurrency) { await car.close() } -module.exports.readBuffer = readBuffer -module.exports.readFileComplete = readFileComplete -module.exports.readStreamComplete = readStreamComplete -module.exports.readStreaming = readStreaming -module.exports.writeStream = writeStream -module.exports.indexer = indexer -module.exports.readRaw = readRaw -module.exports.completeGraph = completeGraph +function create (multiformats) { + function wrap (fn) { + return function (...args) { + return fn(multiformats, ...args) + } + } + + return Object.assign(browser(multiformats), { + readFileComplete: wrap(readFileComplete), + readStreamComplete: wrap(readStreamComplete), + readStreaming: wrap(readStreaming), + readFileIndexed: wrap(readFileIndexed), + writeStream: wrap(writeStream), + indexer: wrap(indexer), + readRaw, + completeGraph + }) +} + +export default create diff --git a/datastore.js b/datastore.js index 39cac55..407e3c4 100644 --- a/datastore.js +++ b/datastore.js @@ -1,15 +1,17 @@ -const { filter, map } = require('interface-datastore').utils -const { toKey } = require('./lib/util') +import interfaceDatastore from 'interface-datastore' + +const { filter, map } = interfaceDatastore.utils /** * CarDatastore is a class to manage reading from, and writing to a CAR archives - * using [CID](https://github.com/multiformats/js-cid)s as keys and file names - * in the CAR and binary block data as the file contents. + * using [CID](https://github.com/multiformats/js-multiformats)s as keys and + * file names in the CAR and binary block data as the file contents. * * @class */ class CarDatastore { - constructor (reader, writer) { + constructor (multiformats, reader, writer) { + this.multiformats = multiformats this._reader = reader this._writer = writer } @@ -28,10 +30,10 @@ class CarDatastore { * @memberof CarDatastore * @param {string|Key|CID} key a `CID` or `CID`-convertable object to identify * the block. - * @return {Buffer} the IPLD block data referenced by the CID. + * @return {Uint8Array} the IPLD block data referenced by the CID. */ async get (key) { - key = toKey(key, 'get') + key = toKey(this.multiformats, key, 'get') return this._reader.get(key) } @@ -52,7 +54,7 @@ class CarDatastore { * @return {boolean} indicating whether the key exists in this Datastore. */ async has (key) { - key = toKey(key, 'has') + key = toKey(this.multiformats, key, 'has') return this._reader.has(key) } @@ -71,11 +73,11 @@ class CarDatastore { * @memberof CarDatastore * @param {string|Key|CID} key a `CID` or `CID`-convertable object to identify * the `value`. - * @param {Buffer|Uint8Array} value an IPLD block matching the given `key` + * @param {Uint8Array} value an IPLD block matching the given `key` * `CID`. */ async put (key, value) { - key = toKey(key, 'put') + key = toKey(this.multiformats, key, 'put') if (!(value instanceof Uint8Array)) { throw new TypeError('put() can only receive Uint8Arrays or Buffers') } @@ -94,7 +96,7 @@ class CarDatastore { * the block. */ async delete (key) { - key = toKey(key, 'delete') + key = toKey(this.multiformats, key, 'delete') return this._writer.delete(key) } @@ -224,4 +226,12 @@ class CarDatastore { } } -module.exports = CarDatastore +function toKey (multiformats, key, method) { + try { + return multiformats.CID.from(key.toString()) + } catch (e) { + throw new TypeError(`${method}() only accepts CIDs or CID strings`) + } +} + +export default CarDatastore diff --git a/example.js b/example.js index 59ba82b..20eda4f 100644 --- a/example.js +++ b/example.js @@ -1,10 +1,16 @@ -const fs = require('fs') -const CarDatastore = require('./') -const Block = require('@ipld/block') +import fs from 'fs' +import multiformats from 'multiformats/basics' +import car from 'datastore-car' +import dagCbor from '@ipld/dag-cbor' + +// dag-cbor is required for the CAR root block +multiformats.add(dagCbor) +const CarDatastore = car(multiformats) async function example () { - const block = Block.encoder(Buffer.from('random meaningless bytes'), 'raw') - const cid = await block.cid() + const binary = new TextEncoder().encode('random meaningless bytes') + const mh = await multiformats.multihash.hash(binary, 'sha2-256') + const cid = multiformats.CID.create(1, multiformats.get('raw').code, mh) const outStream = fs.createWriteStream('example.car') const writeDs = await CarDatastore.writeStream(outStream) @@ -12,7 +18,7 @@ async function example () { // set the header with a single root await writeDs.setRoots(cid) // store a new block, creates a new file entry in the CAR archive - await writeDs.put(cid, await block.encode()) + await writeDs.put(cid, binary) await writeDs.close() const inStream = fs.createReadStream('example.car') @@ -25,10 +31,10 @@ async function example () { const roots = await readDs.getRoots() // retrieve a block, as a UInt8Array, reading from the ZIP archive const got = await readDs.get(roots[0]) - // also possible: for await (const {key, data} = readDs.query()) { ... } + // also possible: for await (const { key, value } of readDs.query()) { ... } console.log('Retrieved [%s] from example.car with CID [%s]', - Buffer.from(got).toString(), + new TextDecoder().decode(got), roots[0].toString()) await readDs.close() diff --git a/lib/coding-browser.js b/lib/coding-browser.js index f9b335c..1da311c 100644 --- a/lib/coding-browser.js +++ b/lib/coding-browser.js @@ -1,7 +1,10 @@ -const varint = require('varint') -const CID = require('cids') -const multicodec = require('multicodec') -const Block = require('@ipld/block') +import varint from 'varint' + +const CIDV0_BYTES = { + SHA2_256: 0x12, + LENGTH: 0x20, + DAG_PB: 0x70 +} function bufferReader (buf) { let pos = 0 @@ -13,7 +16,7 @@ function bufferReader (buf) { async exactly (length) { if (length > buf.length - pos) { - throw new Error('Unexpected end of Buffer') + throw new Error('Unexpected end of data') } return buf.slice(pos, pos + length) }, @@ -37,12 +40,11 @@ async function readVarint (reader) { return i } -async function readHeader (reader) { +async function readHeader (multiformats, reader) { const length = await readVarint(reader) const header = await reader.exactly(length) reader.seek(length) - const block = Block.decoder(header, 'dag-cbor') - return block.decode() + return multiformats.decode(header, 'dag-cbor') } async function readMultihash (reader) { @@ -61,13 +63,13 @@ async function readMultihash (reader) { return multihash } -async function readCid (reader) { +async function readCid (multiformats, reader) { const first = await reader.exactly(2) - if (first[0] === 0x12 && first[1] === 0x20) { + if (first[0] === CIDV0_BYTES.SHA2_256 && first[1] === CIDV0_BYTES.LENGTH) { // cidv0 32-byte sha2-256 const bytes = await reader.exactly(34) reader.seek(34) - return new CID(bytes) + return multiformats.CID.create(0, CIDV0_BYTES.DAG_PB, Uint8Array.from(bytes)) } const version = await readVarint(reader) @@ -76,42 +78,44 @@ async function readCid (reader) { } const codec = await readVarint(reader) const multihash = await readMultihash(reader) - return new CID(version, multicodec.getName(codec), multihash) + return multiformats.CID.create(version, codec, Uint8Array.from(multihash)) } -async function readBlockHead (reader) { - // length includes a CID + Block, where CID has a variable length +async function readBlockHead (multiformats, reader) { + // length includes a CID + Binary, where CID has a variable length // we have to deal with - const totalLength = await readVarint(reader) const start = reader.pos - const cid = await readCid(reader) - const length = totalLength - (reader.pos - start) // subtract CID length + const length = await readVarint(reader) + (reader.pos - start) + const cid = await readCid(multiformats, reader) + const blockLength = length - (reader.pos - start) // subtract CID length - return { cid, length } + return { cid, length, blockLength } } -async function readBlock (reader) { - const { cid, length } = await readBlockHead(reader) - const buf = await reader.exactly(length) - reader.seek(length) - return Block.create(buf, cid) +async function readBlock (multiformats, reader) { + const { cid, blockLength } = await readBlockHead(multiformats, reader) + const binary = await reader.exactly(blockLength) + reader.seek(blockLength) + return { cid, binary } } -async function readBlockIndex (reader) { - const head = await readBlockHead(reader) - head.offset = reader.pos - reader.seek(head.length) +async function readBlockIndex (multiformats, reader) { + const offset = reader.pos + const head = await readBlockHead(multiformats, reader) + head.offset = offset + head.blockOffset = reader.pos + reader.seek(head.blockLength) return head } -function Decoder (reader) { - const headerPromise = readHeader(reader) +function Decoder (multiformats, reader) { + const headerPromise = readHeader(multiformats, reader) function blockReader (index) { return async function * blockIterator () { await headerPromise try { while ((await reader.upTo(8)).length > 0) { - yield await (index ? readBlockIndex(reader) : readBlock(reader)) + yield await (index ? readBlockIndex(multiformats, reader) : readBlock(multiformats, reader)) } } finally { await reader.close() @@ -125,8 +129,8 @@ function Decoder (reader) { } } -async function decode (reader) { - const decoder = Decoder(reader) +async function decode (multiformats, reader) { + const decoder = Decoder(multiformats, reader) const header = await decoder.header() const decoded = { version: header.version, @@ -139,7 +143,7 @@ async function decode (reader) { return decoded } -function Encoder (writer) { +function Encoder (multiformats, writer) { // none of this is wrapped in a mutex, that needs to happen above this to // avoid overwrites return { @@ -147,32 +151,34 @@ function Encoder (writer) { if (!Array.isArray(roots)) { roots = [roots] } - for (const root of roots) { - if (!CID.isCID(root)) { - throw new TypeError('Roots must be CIDs') + for (let i = 0; i < roots.length; i++) { + roots[i] = multiformats.CID.asCID(roots[i]) + if (!roots[i]) { + throw new TypeError('Roots must be CIDs') // or gently coercable to CIDs } } - const header = Block.encoder({ version: 1, roots }, 'dag-cbor').encode() - await writer(Buffer.from(varint.encode(header.length))) + const header = await multiformats.encode({ version: 1, roots }, 'dag-cbor') + await writer(new Uint8Array(varint.encode(header.length))) await writer(header) }, async writeBlock (block) { - if (!Block.isBlock(block)) { - throw new TypeError('Block list must contain @ipld/block objects') + if (typeof block !== 'object' || + !(block.binary instanceof Uint8Array) || + !block.cid || + !(block.cid.bytes instanceof Uint8Array)) { + throw new TypeError('Block list must be of type { cid, binary }') } - const cid = await block.cid() - const encoded = block.encode() - await writer(Buffer.from(varint.encode(cid.buffer.length + encoded.length))) - await writer(cid.buffer) - await writer(encoded) + await writer(new Uint8Array(varint.encode(block.cid.bytes.length + block.binary.length))) + await writer(block.cid.bytes) + await writer(block.binary) } } } -async function encode (writer, roots, blocks) { - const encoder = Encoder(writer) +async function encode (multiformats, writer, roots, blocks) { + const encoder = Encoder(multiformats, writer) await encoder.setRoots(roots) for (const block of blocks) { await encoder.writeBlock(block) @@ -182,14 +188,14 @@ async function encode (writer, roots, blocks) { /** * @name Car.decodeBuffer * @description - * Decode a `Buffer` representation of a Content ARchive (CAR) into an in-memory - * representation: + * Decode a `Uint8Array` representation of a Content ARchive (CAR) into an + * in-memory representation: * * `{ version, roots[], blocks[] }`. * * Where `version` is always `1`, `roots` is an array of - * [CID](https://ghub.io/cids)s and `blocks` is an array of IPLD - * [Block](https://ghub.io/@ipld/block)s. + * [CID](https://ghub.io/cids)s and `blocks` is an array of IPLD blocks of the + * form `{ cid, binary }`. * * Not intended to be part of the public API of datastore-car but may currently be * invoked via `require('datastore-car/lib/coding').decodeBuffer`, or @@ -199,14 +205,14 @@ async function encode (writer, roots, blocks) { * @memberof Car * @static * @async - * @param {Buffer} buf the contents of a CAR + * @param {Uint8Array} buf the contents of a CAR * @returns {Car} an in-memory representation of a CAR file: * `{ version, roots[], blocks[] }`. */ -async function decodeBuffer (buf) { +async function decodeBuffer (multiformats, buf) { const reader = bufferReader(buf) - return decode(reader) + return decode(multiformats, reader) } /* unnecessary, but this is possible: @@ -219,8 +225,8 @@ function BufferDecoder (buf) { /** * @name Car.encodeBuffer * @description - * Encode a set of IPLD [Block](https://ghub.io/@ipld/block)s in CAR format, - * returning it as a single `Buffer`. + * Encode a set of IPLD blocks of the form `{ cid, binary }` in CAR format, + * returning it as a single `Uint8Array` * @function * @memberof Car * @static @@ -228,21 +234,22 @@ function BufferDecoder (buf) { * @param {CID[]} roots an array of root [CID](https://ghub.io/cids)s to set in * the header of the archive. These are intended to be the merkle roots of all * blocks. - * @param {Block[]} blocks an array of IPLD [Block](https://ghub.io/@ipld/block)s + * @param {object[]} blocks an array of IPLD blocks of the form `{ cid, binary }` * to append to the archive. - * @returns {Buffer} a `Buffer` representing the created archive. + * @returns {Uint8Array} a binary representation of the created archive. */ -async function encodeBuffer (roots, blocks) { +async function encodeBuffer (multiformats, roots, blocks) { const bl = [] const writer = (buf) => { bl.push(buf) } - await encode(writer, roots, blocks) - return Buffer.concat(bl) + await encode(multiformats, writer, roots, blocks) + const ret = new Uint8Array(bl.reduce((p, c) => p + c.length, 0)) + let off = 0 + for (const b of bl) { + ret.set(b, off) + off += b.length + } + return ret } -module.exports.encode = encode -module.exports.Encoder = Encoder -module.exports.decode = decode -module.exports.Decoder = Decoder -module.exports.decodeBuffer = decodeBuffer -module.exports.encodeBuffer = encodeBuffer +export { encode, Encoder, decode, Decoder, decodeBuffer, encodeBuffer } diff --git a/lib/coding.js b/lib/coding.js index 873fc96..334d6fa 100644 --- a/lib/coding.js +++ b/lib/coding.js @@ -1,9 +1,15 @@ -const fs = require('fs').promises -fs.createWriteStream = require('fs').createWriteStream -const { promisify } = require('util') -const { PassThrough, pipeline } = require('stream') +import fsCb from 'fs' +import { promisify } from 'util' +import stream from 'readable-stream' +import { Encoder, Decoder, encode, decode, encodeBuffer, decodeBuffer } from './coding-browser.js' + +const { PassThrough, pipeline } = stream const pipelineAsync = promisify(pipeline) -const { Encoder, Decoder, encode, decode, encodeBuffer, decodeBuffer } = require('./coding-browser') +let fs +if (!process.browser) { + fs = fsCb.promises + fs.createWriteStream = fsCb.createWriteStream +} // reusable reader for streams and files, we just need a way to read an // additional chunk (of some undetermined size) and a way to close the @@ -12,7 +18,7 @@ function chunkReader (readChunk, closer) { let pos = 0 let have = 0 let offset = 0 - let currentChunk = Buffer.alloc(0) + let currentChunk = new Uint8Array(0) const read = async (length) => { have = currentChunk.length - offset @@ -22,10 +28,22 @@ function chunkReader (readChunk, closer) { if (chunk.length === 0) { break } + if (have < 0) { // because of a seek() + // istanbul ignore next toohard to test the else + if (chunk.length > have) { + bufa.push(chunk.slice(-have)) + } // else discard + } else { + bufa.push(chunk) + } have += chunk.length - bufa.push(chunk) } - currentChunk = Buffer.concat(bufa) + currentChunk = new Uint8Array(bufa.reduce((p, c) => p + c.length, 0)) + let off = 0 + for (const b of bufa) { + currentChunk.set(b, off) + off += b.length + } offset = 0 } @@ -69,7 +87,7 @@ function streamReader (stream) { async function readChunk () { const next = await iterator.next() if (next.done) { - return Buffer.alloc(0) + return new Uint8Array(0) } return next.value } @@ -78,14 +96,18 @@ function streamReader (stream) { } async function fileReader (file, options) { + /* c8 ignore next 3 */ + if (!fs) { + throw new Error('fileReader() not supported in a browser environment') + } const fd = await fs.open(file, 'r') const bufferSize = typeof options === 'object' && typeof options.bufferSize === 'number' ? options.bufferSize : 1024 - const readerChunk = Buffer.alloc(bufferSize) + const readerChunk = new Uint8Array(bufferSize) async function readChunk () { const { bytesRead } = await fd.read(readerChunk, 0, readerChunk.length) if (!bytesRead) { - return Buffer.alloc(0) + return new Uint8Array(0) } return Uint8Array.prototype.slice.call(readerChunk, 0, bytesRead) } @@ -106,7 +128,7 @@ async function fileReader (file, options) { * * Where `version` is always `1`, `roots` is an array of * [CID](https://ghub.io/cids)s and `blocks` is an array of IPLD - * [Block](https://ghub.io/@ipld/block)s. + * blocks of the form `{ cid, binary }`. * * Not intended to be part of the public API of datastore-car but may currently be * invoked via `require('datastore-car/lib/coding').decodeFile`. @@ -119,14 +141,14 @@ async function fileReader (file, options) { * `{ version, roots[], blocks[] }`. */ -async function decodeFile (file, options) { +async function decodeFile (multiformats, file, options) { const reader = await fileReader(file, options) - return decode(reader) + return decode(multiformats, reader) } -async function FileDecoder (file, options) { +async function FileDecoder (multiformats, file, options) { const reader = await fileReader(file, options) - return Decoder(reader) + return Decoder(multiformats, reader) } /** @@ -139,7 +161,7 @@ async function FileDecoder (file, options) { * * Where `version` is always `1`, `roots` is an array of * [CID](https://ghub.io/cids)s and `blocks` is an array of IPLD - * [Block](https://ghub.io/@ipld/block)s. + * blocks of the form `{ cid, binary }`. * * Not intended to be part of the public API of datastore-car but may currently be * invoked via `require('datastore-car/lib/coding').decodeStream`. @@ -152,20 +174,20 @@ async function FileDecoder (file, options) { * `{ version, roots[], blocks[] }`. */ -async function decodeStream (stream) { +async function decodeStream (multiformats, stream) { const reader = streamReader(stream) - return decode(reader) + return decode(multiformats, reader) } -function StreamDecoder (stream) { +function StreamDecoder (multiformats, stream) { const reader = streamReader(stream) - return Decoder(reader) + return Decoder(multiformats, reader) } /** * @name coding.encodeFile * @description - * Encode a set of IPLD [Block](https://ghub.io/@ipld/block)s in CAR format, + * Encode a set of IPLD blocks of the form `{ cid, binary }` in CAR format, * writing to a file. * * Not intended to be part of the public API of datastore-car but may currently be @@ -178,18 +200,22 @@ function StreamDecoder (stream) { * @param {CID[]} roots an array of root [CID](https://ghub.io/cids)s to set in * the header of the archive. These are intended to be the merkle roots of all * blocks. - * @param {Block[]} blocks an array of IPLD [Block](https://ghub.io/@ipld/block)s + * @param {object[]} blocks an array of IPLD blocks of the form `{ cid, binary }` * to append to the archive. */ -function encodeFile (file, roots, blocks) { - return pipelineAsync(encodeStream(roots, blocks), fs.createWriteStream(file)) +async function encodeFile (multiformats, file, roots, blocks) { + /* c8 ignore next 3 */ + if (!fs) { + throw new Error('encodeFile() not supported in a browser environment') + } + return pipelineAsync(encodeStream(multiformats, roots, blocks), fs.createWriteStream(file)) } /** * @name coding.encodeStream * @description - * Encode a set of IPLD [Block](https://ghub.io/@ipld/block)s in CAR format, + * Encode a set of IPLD blocks of the form `{ cid, binary }`, * writing the data to a stream. * * There is currently no method to stream blocks into an encodeStream so you @@ -204,15 +230,15 @@ function encodeFile (file, roots, blocks) { * @param {CID[]} roots an array of root [CID](https://ghub.io/cids)s to set in * the header of the archive. These are intended to be the merkle roots of all * blocks. - * @param {Block[]} blocks an array of IPLD [Block](https://ghub.io/@ipld/block)s + * @param {object[]} blocks an array of IPLD blocks of the form `{ cid, binary }`. * to append to the archive. * @returns {ReadableStream} a stream that the CAR will be written to. */ -function encodeStream (roots, blocks) { +function encodeStream (multiformats, roots, blocks) { const stream = new PassThrough() const writer = createStreamWriter(stream) - encode(writer, roots, blocks) + encode(multiformats, writer, roots, blocks) .then(() => { stream.end() }).catch((err) => { @@ -238,14 +264,16 @@ function createStreamWriter (stream) { } } -module.exports.Encoder = Encoder -module.exports.Decoder = Decoder -module.exports.decodeFile = decodeFile -module.exports.decodeStream = decodeStream -module.exports.StreamDecoder = StreamDecoder -module.exports.FileDecoder = FileDecoder -module.exports.decodeBuffer = decodeBuffer -module.exports.encodeFile = encodeFile -module.exports.encodeStream = encodeStream -module.exports.encodeBuffer = encodeBuffer -module.exports.createStreamWriter = createStreamWriter +export { + Encoder, + Decoder, + decodeFile, + decodeStream, + StreamDecoder, + FileDecoder, + decodeBuffer, + encodeFile, + encodeStream, + encodeBuffer, + createStreamWriter +} diff --git a/lib/raw.js b/lib/raw.js index 9b71b1e..e997558 100644 --- a/lib/raw.js +++ b/lib/raw.js @@ -1,8 +1,10 @@ -const { promisify } = require('util') -const fs = require('fs') -fs.read = promisify(fs.read) -const Block = require('@ipld/block') -const { StreamDecoder, FileDecoder } = require('./coding') +import { promisify } from 'util' +import fs from 'fs' +import { StreamDecoder, FileDecoder } from './coding.js' + +if (!process.browser) { + fs.read = promisify(fs.read) +} /** * @name CarDatastore.indexer @@ -17,15 +19,19 @@ const { StreamDecoder, FileDecoder } = require('./coding') * `CarDatastore.readRaw()`). * * ```js - * const { indexer } = require('datastore-car') + * // full multiformats omitted, you'll need codecs, bases and hashes that + * // appear in your CAR files if you want full information + * const multiformats = ... + * const { indexer } = require('datastore-car')(multiformats) * * async function run () { + * const cidStr = (cid) => `${multiformats.get(cid.code).name}:${cid.toString()}` * const index = await indexer('big.car') - * index.roots = index.roots.map((cid) => cid.toString()) + * index.roots = index.roots.map(cidStr) * console.log('roots:', index.roots) * for await (const blockIndex of index.iterator) { - * blockIndex.cid = blockIndex.cid.toString() - * console.log('block:', blockIndex) + * blockIndex.cid = cidStr(blockIndex.cid) + * console.log(JSON.toString(blockIndex)) * } * } * @@ -39,24 +45,17 @@ const { StreamDecoder, FileDecoder } = require('./coding') * * ``` * roots: [ - * 'bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm', - * 'bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm' + * 'dag-cbor:bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm', + * 'dag-cbor:bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm' * ] - * block: { - * cid: 'bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm', - * length: 55, - * offset: 137 - * } - * block: { - * cid: 'QmNX6Tffavsya4xgBi2VJQnSuqy9GsxongxZZ9uZBqp16d', - * length: 97, - * offset: 228 - * } - * block: { - * cid: 'bafkreifw7plhl6mofk6sfvhnfh64qmkq73oeqwl6sloru6rehaoujituke', - * length: 4, - * offset: 362 - * } + * {"cid":"dag-cbor:bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm","length":92,"blockLength":55,"offset":100,"blockOffset":137} + * {"cid":"dag-pb:QmNX6Tffavsya4xgBi2VJQnSuqy9GsxongxZZ9uZBqp16d","length":133,"blockLength":97,"offset":192,"blockOffset":228} + * {"cid":"raw:bafkreifw7plhl6mofk6sfvhnfh64qmkq73oeqwl6sloru6rehaoujituke","length":41,"blockLength":4,"offset":325,"blockOffset":362} + * {"cid":"dag-pb:QmWXZxVQ9yZfhQxLD35eDR8LiMRsYtHxYqTFCBbJoiJVys","length":130,"blockLength":94,"offset":366,"blockOffset":402} + * {"cid":"raw:bafkreiebzrnroamgos2adnbpgw5apo3z4iishhbdx77gldnbk57d4zdio4","length":41,"blockLength":4,"offset":496,"blockOffset":533} + * {"cid":"dag-pb:QmdwjhxpxzcMsR3qUuj7vUL8pbA7MgR3GAxWi2GLHjsKCT","length":82,"blockLength":47,"offset":537,"blockOffset":572} + * {"cid":"raw:bafkreidbxzk2ryxwwtqxem4l3xyyjvw35yu4tcct4cqeqxwo47zhxgxqwq","length":41,"blockLength":4,"offset":619,"blockOffset":656} + * {"cid":"dag-cbor:bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm","length":55,"blockLength":18,"offset":660,"blockOffset":697} * ... * ``` * @@ -75,16 +74,16 @@ const { StreamDecoder, FileDecoder } = require('./coding') * or a ReadableStream that provides CAR archive data. * @returns {Object} an object containing a * `roots` array of CIDs and an `iterator` AsyncIterator that will yield - * Objects of the form `{ cid:CID, offset:number, length:number }` indicating - * the CID of the block located at start=`offset` with a length of `number` in - * the CAR archive provided. + * Objects of the form `{ cid:CID, offset:number, length:number, byteOffset:number, byteLength:number }` + * indicating the CID of the block located at `blockOffset` with a length of + * `blockLength` in the CAR archive provided. */ -async function indexer (inp) { +async function indexer (multiformats, inp) { let decoder if (typeof inp === 'string') { - decoder = await FileDecoder(inp) + decoder = await FileDecoder(multiformats, inp) } else if (typeof inp === 'object' && typeof inp.pipe === 'function') { - decoder = StreamDecoder(inp) + decoder = StreamDecoder(multiformats, inp) } else { throw new TypeError('indexer() requires a file path or a ReadableStream') } @@ -97,8 +96,8 @@ async function indexer (inp) { * @name CarDatastore.readRaw * @description * Read a block directly from a CAR file given an block index provided by - * `CarDatastore.indexer()` (i.e. an object of the form: - * `{ cid:CID, offset:number, length:number }`). + * `CarDatastore.indexer()` (i.e. an object with the minimal form: + * `{ cid:CID, blockOffset:number, blockLength:number }`). * @function * @memberof CarDatastore * @static @@ -107,24 +106,27 @@ async function indexer (inp) { * `fs.open()` or a `FileHandle` on `fs.promises.open()`. * @param {Object} blockIndex an index object of the style provided by * `CarDatastore.indexer()` (`{ cid, offset, length }`). - * @returns {Block} an IPLD [Block](https://ghub.io/@ipld/block) object. + * @returns {object} an IPLD block of the form `{ cid, binary }`. */ async function readRaw (fd, blockIndex) { - const cid = blockIndex.cid - const buf = Buffer.alloc(blockIndex.length) + /* c8 ignore next 3 */ + if (process.browser) { + throw new Error('readRaw() not supported in a browser environment') + } + const { cid, blockLength, blockOffset } = blockIndex + const binary = new Uint8Array(blockLength) let read if (typeof fd === 'number') { - read = (await fs.read(fd, buf, 0, blockIndex.length, blockIndex.offset)).bytesRead + read = (await fs.read(fd, binary, 0, blockLength, blockOffset)).bytesRead } else if (typeof fd === 'object' && typeof fd.read === 'function') { - read = (await fd.read(buf, 0, blockIndex.length, blockIndex.offset)).bytesRead + read = (await fd.read(binary, 0, blockLength, blockOffset)).bytesRead } else { throw new TypeError('Bad fd') } - if (read !== blockIndex.length) { - throw new Error(`Failed to read entire block (${read} instead of ${blockIndex.length})`) + if (read !== blockLength) { + throw new Error(`Failed to read entire block (${read} instead of ${blockLength})`) } - return Block.create(buf, cid) + return { cid, binary } } -module.exports.indexer = indexer -module.exports.readRaw = readRaw +export { indexer, readRaw } diff --git a/lib/reader-browser.js b/lib/reader-browser.js index b2e4c96..82a2839 100644 --- a/lib/reader-browser.js +++ b/lib/reader-browser.js @@ -1,9 +1,9 @@ -const coding = require('./coding-browser') -const { createFromDecoded } = require('./reader-writer-iface') +import * as coding from './coding-browser.js' +import { createFromDecoded } from './reader-writer-iface.js' -async function createBufferReader (data) { - const decoded = await coding.decodeBuffer(data) - return createFromDecoded(decoded) +async function createBufferReader (multiformats, data) { + const decoded = await coding.decodeBuffer(multiformats, data) + return createFromDecoded(multiformats, decoded) } -module.exports.createBufferReader = createBufferReader +export { createBufferReader } diff --git a/lib/reader-writer-iface.js b/lib/reader-writer-iface.js index 16dc326..44c6b02 100644 --- a/lib/reader-writer-iface.js +++ b/lib/reader-writer-iface.js @@ -1,7 +1,12 @@ -const { Errors } = require('interface-datastore') -const { verifyRoots, cidToKey } = require('./util') +import interfaceDatastore from 'interface-datastore' + +const { Errors } = interfaceDatastore class Reader { + constructor (multiformats) { + this.multiformats = multiformats + } + /* c8 ignore next 3 */ get () { throw new Error('Unimplemented method') @@ -25,6 +30,10 @@ class Reader { } class Writer { + constructor (multiformats) { + this.multiformats = multiformats + } + put () { throw new Error('Unimplemented method') } @@ -34,7 +43,13 @@ class Writer { } setRoots (roots) { - this.roots = verifyRoots(roots) + for (let i = 0; i < roots.length; i++) { + roots[i] = this.multiformats.CID.asCID(roots[i]) + if (!roots[i]) { + throw new TypeError('Roots must be CIDs') // or gently coercable to CIDs + } + } + this.roots = roots } close () {} @@ -47,21 +62,21 @@ class NoWriter extends Writer { } class DecodedReader extends Reader { - constructor (carData) { - super() + constructor (multiformats, carData) { + super(multiformats) this._carData = carData } has (key) { - return this._carData.keys.indexOf(key) > -1 + return this._carData.keys.indexOf(key.toString()) > -1 } async get (key) { - const index = this._carData.keys.indexOf(key) + const index = this._carData.keys.indexOf(key.toString()) if (index < 0) { throw Errors.notFoundError() } - return this._carData.blocks[index].encode() + return this._carData.blocks[index].binary } keys () { @@ -74,8 +89,8 @@ class DecodedReader extends Reader { } class StreamingReader extends Reader { - constructor (decoder) { - super() + constructor (multiformats, decoder) { + super(multiformats) this._decoder = decoder } @@ -94,25 +109,20 @@ class StreamingReader extends Reader { async * iterator (keysOnly) { // TODO: optimise `keysOnly` case by skipping over decoding blocks and just read the CIDs for await (const block of this._decoder.blocks()) { - const key = cidToKey(await block.cid()) + const key = block.cid.toString() if (keysOnly) { yield { key } } else { - const value = block.encode() - yield { key, value } + yield { key, value: block.binary } } } } } -async function createFromDecoded (decoded) { - const cids = await Promise.all(decoded.blocks.map((b) => b.cid())) +async function createFromDecoded (multiformats, decoded) { + const cids = decoded.blocks.map((b) => b.cid) decoded.keys = cids.map((c) => c.toString()) - return new DecodedReader(decoded) + return new DecodedReader(multiformats, decoded) } -module.exports.createFromDecoded = createFromDecoded -module.exports.Reader = Reader -module.exports.Writer = Writer -module.exports.StreamingReader = StreamingReader -module.exports.NoWriter = NoWriter +export { createFromDecoded, Reader, Writer, StreamingReader, NoWriter } diff --git a/lib/reader.js b/lib/reader.js index 999ba7f..4020989 100644 --- a/lib/reader.js +++ b/lib/reader.js @@ -1,21 +1,89 @@ -const coding = require('./coding') -const { createFromDecoded, StreamingReader } = require('./reader-writer-iface') +import { promises as fs } from 'fs' +import interfaceDatastore from 'interface-datastore' +import * as coding from './coding.js' +import { createFromDecoded, StreamingReader, Reader } from './reader-writer-iface.js' +import { indexer, readRaw } from './raw.js' -async function createStreamCompleteReader (stream) { - const decoded = await coding.decodeStream(stream) - return createFromDecoded(decoded) +const { Errors } = interfaceDatastore + +async function createStreamCompleteReader (multiformats, stream) { + const decoded = await coding.decodeStream(multiformats, stream) + return createFromDecoded(multiformats, decoded) +} + +async function createStreamingReader (multiformats, stream) { + const decoder = coding.StreamDecoder(multiformats, stream) + return new StreamingReader(multiformats, decoder) +} + +async function createFileReader (multiformats, data) { + const decoded = await coding.decodeFile(multiformats, data) + return createFromDecoded(multiformats, decoded) } -async function createStreamingReader (stream) { - const decoder = coding.StreamDecoder(stream) - return new StreamingReader(decoder) +async function createFileIndexedReader (multiformats, path) { + const { roots, iterator } = await indexer(multiformats, path) + const index = new Map() + const order = [] + for await (const { cid, blockLength, blockOffset } of iterator) { + const cidStr = cid.toString() + index.set(cidStr, { cid, blockLength, blockOffset }) + order.push(cidStr) + } + return new IndexedReader(multiformats, path, roots, index, order) } -async function createFileReader (data) { - const decoded = await coding.decodeFile(data) - return createFromDecoded(decoded) +class IndexedReader extends Reader { + constructor (multiformats, path, roots, index, order) { + super(multiformats) + this._path = path + this._roots = roots + this._index = index + this._order = order + this._fd = null + } + + has (key) { + return this._index.has(key.toString()) + } + + async get (key) { + const blockIndex = this._index.get(key.toString()) + if (!blockIndex) { + throw Errors.notFoundError() + } + if (!this._fd) { + this._fd = await fs.open(this._path) + } + return (await readRaw(this._fd, blockIndex)).binary + } + + async * iterator (keysOnly) { + if (keysOnly) { + for (const key of this._order) { + yield { key } + } + } else { + if (!this._fd) { + this._fd = await fs.open(this._path) + } + for (const cidStr of this._order) { + const blockIndex = this._index.get(cidStr) + const { binary } = await readRaw(this._fd, blockIndex) + yield { key: cidStr, value: binary } + } + } + } + + get roots () { + return this._roots + } + + async close () { + if (this._fd) { + return this._fd.close() + } + } } -module.exports.createStreamCompleteReader = createStreamCompleteReader -module.exports.createStreamingReader = createStreamingReader -module.exports.createFileReader = createFileReader +export { createStreamCompleteReader, createStreamingReader, createFileReader, createFileIndexedReader } diff --git a/lib/util.js b/lib/util.js deleted file mode 100644 index 1efb88b..0000000 --- a/lib/util.js +++ /dev/null @@ -1,35 +0,0 @@ -const CID = require('cids') - -function toKey (key, method) { - if (!CID.isCID(key)) { - try { - key = new CID(key.toString()) - } catch (e) { - throw new TypeError(`${method}() only accepts CIDs or CID strings`) - } - } - - return cidToKey(key) -} - -function cidToKey (cid) { - // toBaseEncodedString() is supposed to do this automatically but let's be explicit to be - // sure & future-proof - return cid.toBaseEncodedString(cid.version === 0 ? 'base58btc' : 'base32') -} - -function verifyRoots (roots) { - if (!Array.isArray(roots)) { - roots = [roots] - } - for (const root of roots) { - if (!CID.isCID(root)) { - throw new TypeError('Roots may only be a CID or an array of CIDs') - } - } - return roots -} - -module.exports.toKey = toKey -module.exports.cidToKey = cidToKey -module.exports.verifyRoots = verifyRoots diff --git a/lib/writer-stream.js b/lib/writer-stream.js index 6456a2f..0dd7ac3 100644 --- a/lib/writer-stream.js +++ b/lib/writer-stream.js @@ -1,14 +1,12 @@ -const CID = require('cids') -const Block = require('@ipld/block') -const { Writer } = require('./reader-writer-iface') -const { Encoder, createStreamWriter } = require('./coding') +import { Writer } from './reader-writer-iface.js' +import { Encoder, createStreamWriter } from './coding.js' class StreamWriter extends Writer { - constructor (outStream) { - super() + constructor (multiformats, outStream) { + super(multiformats) this._outStream = outStream const writer = createStreamWriter(outStream) - this._encoder = Encoder(writer) + this._encoder = Encoder(multiformats, writer) this._mutex = null } @@ -33,7 +31,7 @@ class StreamWriter extends Writer { // no roots, too late to set any now but we need to write the header this.setRoots([]) } - this._mutex = this._mutex.then(() => this._encoder.writeBlock(Block.create(value, new CID(key)))) + this._mutex = this._mutex.then(() => this._encoder.writeBlock({ cid: key, binary: value })) return this._mutex } @@ -49,8 +47,8 @@ class StreamWriter extends Writer { } } -function create (stream) { - return new StreamWriter(stream) +function create (multiformats, stream) { + return new StreamWriter(multiformats, stream) } -module.exports = create +export default create diff --git a/package.json b/package.json index 3563019..488adad 100644 --- a/package.json +++ b/package.json @@ -3,17 +3,21 @@ "version": "1.2.0", "description": "Content ARchive format reader and writer", "main": "car.js", - "browser": "car-browser.js", + "type": "module", + "browser": "./dist/cjs/car-browser.js", "directories": { "test": "test" }, "scripts": { + "build": "npm_config_yes=true ipjs build --tests", + "prepublishOnly": "npm run build", "lint": "standard", - "test:browser": "polendina --cleanup test/test-readbuffer.js test/test-query.js", + "test:cjs": "rm -rf dist && npm run build && cp test/go.car dist/cjs/node-test/ && mocha dist/cjs/node-test/test-*.js && npm run test:cjs:browser", "test:node": "hundreds mocha test/test-*.js", - "test": "npm run lint && npm run test:node && npm run test:browser", - "docs": "jsdoc4readme --readme *.js lib/raw.js", - "coverage": "c8 --reporter=html --reporter=text mocha test/test-*.js && npx st -d coverage -p 8888" + "test:cjs:browser": "polendina --cleanup dist/cjs/browser-test/test-readbuffer.js dist/cjs/browser-test/test-query.js", + "test": "npm run lint && npm run test:node && npm run test:cjs", + "coverage": "c8 --reporter=html --reporter=text mocha test/test-*.js && npx st -d coverage -p 8888", + "docs": "jsdoc4readme --readme *.js lib/raw.js" }, "repository": { "type": "git", @@ -31,19 +35,32 @@ }, "homepage": "https://github.com/rvagg/js-datastore-car#readme", "devDependencies": { - "bl": "^4.0.2", + "@ipld/block": "^6.0.3", + "@ipld/dag-cbor": "^1.1.10", + "assert": "^2.0.0", + "bl": "^4.0.3", + "chai": "^4.2.0", + "chai-as-promised": "^7.1.1", + "cids": "^1.0.0", "garbage": "0.0.0", - "hundreds": "0.0.7", + "hundreds": "0.0.8", + "ipjs": "^3.1.11", + "ipld-dag-pb": "^0.20.0", "jsdoc4readme": "^1.3.0", - "mocha": "^7.2.0", - "polendina": "^1.0.0", + "mocha": "^8.1.2", + "multiformats": "^3.0.3", + "polendina": "^1.1.0", "standard": "^14.3.4" }, "dependencies": { - "@ipld/block": "^4.0.0", - "cids": "^0.8.0", - "interface-datastore": "^1.0.4", - "multicodec": "^1.0.1", + "interface-datastore": "^2.0.0", + "readable-stream": "^3.6.0", "varint": "^5.0.0" + }, + "exports": { + ".": { + "import": "./car.js", + "browser": "./car-browser.js" + } } } diff --git a/test/fixture-data.js b/test/fixture-data.js index eaa2e4a..c532cac 100644 --- a/test/fixture-data.js +++ b/test/fixture-data.js @@ -1,54 +1,80 @@ -const assert = require('assert') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import IpldDagPb from 'ipld-dag-pb' +import CID from 'cids' +import multiformats from 'multiformats/basics' -const CID = require('cids') -const Block = require('@ipld/block') -const { DAGNode, DAGLink } = require('ipld-dag-pb') -const pbUtil = require('ipld-dag-pb').util +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' -const rawBlocks = 'aaaa bbbb cccc zzzz'.split(' ').map((s) => Block.encoder(Buffer.from(s), 'raw')) +chai.use(chaiAsPromised) +const { assert } = chai + +const { DAGNode, DAGLink, util: pbUtil } = IpldDagPb + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) + +let rawBlocks const pbBlocks = [] const cborBlocks = [] -const allBlocks = [['raw', rawBlocks.slice(0, 3)], ['pb', pbBlocks], ['cbor', cborBlocks]] +let allBlocks let allBlocksFlattened -const acid = new CID('bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm') +const acid = multiformats.CID.from('bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm') function toCBORStruct (name, link) { return { name, link } } -async function toBlock (pnd) { - const buf = pbUtil.serialize(pnd) - const cid = await pbUtil.cid(buf, { cidVersion: 0 }) - return Block.create(buf, cid) +async function toPbBlock (pnd) { + const binary = pbUtil.serialize(pnd) + const cid = multiformats.CID.from((await pbUtil.cid(binary, { cidVersion: 0 })).toString()) + return { cid, binary } +} + +async function toCborBlock (cb) { + const binary = await multiformats.encode(cb, 'dag-cbor') + const mh = await multiformats.multihash.hash(binary, 'sha2-256') + const cid = multiformats.CID.create(1, multiformats.get('dag-cbor').code, mh) + return { cid, binary } +} + +async function toRawBlock (binary) { + const mh = await multiformats.multihash.hash(binary, 'sha2-256') + const cid = multiformats.CID.create(1, multiformats.get('raw').code, mh) + return { cid, binary } } async function makeData () { - if (!pbBlocks.length) { + if (!rawBlocks) { + rawBlocks = await Promise.all('aaaa bbbb cccc zzzz'.split(' ').map((s) => toRawBlock(new TextEncoder().encode(s)))) + const pnd1 = new DAGNode(null, [ - new DAGLink('cat', await (rawBlocks[0].encode()).byteLength, await rawBlocks[0].cid()) + new DAGLink('cat', rawBlocks[0].binary.byteLength, new CID(rawBlocks[0].cid.toString())) ]) - pbBlocks.push(await toBlock(pnd1)) + pbBlocks.push(await toPbBlock(pnd1)) const pnd2 = new DAGNode(null, [ - new DAGLink('dog', await (rawBlocks[1].encode()).byteLength, await rawBlocks[1].cid()), - new DAGLink('first', pnd1.size, await pbBlocks[0].cid()) + new DAGLink('dog', rawBlocks[1].binary.byteLength, new CID(rawBlocks[1].cid.toString())), + new DAGLink('first', pnd1.size, new CID(pbBlocks[0].cid.toString()).toString()) ]) - pbBlocks.push(await toBlock(pnd2)) + pbBlocks.push(await toPbBlock(pnd2)) const pnd3 = new DAGNode(null, [ - new DAGLink('bear', await (rawBlocks[2].encode()).byteLength, await rawBlocks[2].cid()), - new DAGLink('second', pnd2.size, await pbBlocks[1].cid()) + new DAGLink('bear', rawBlocks[2].binary.byteLength, new CID(rawBlocks[2].cid.toString())), + new DAGLink('second', pnd2.size, new CID(pbBlocks[1].cid.toString())) ]) - pbBlocks.push(await toBlock(pnd3)) + pbBlocks.push(await toPbBlock(pnd3)) - const cbstructs = [toCBORStruct('blip', await pbBlocks[2].cid()), toCBORStruct('limbo', null)] + const cbstructs = [toCBORStruct('blip', pbBlocks[2].cid), toCBORStruct('limbo', null)] for (const b of cbstructs) { - cborBlocks.push(Block.encoder(b, 'dag-cbor')) + cborBlocks.push(await toCborBlock(b)) } - } - allBlocksFlattened = allBlocks.reduce((p, c) => p.concat(c[1]), []) + allBlocks = [['raw', rawBlocks.slice(0, 3)], ['pb', pbBlocks], ['cbor', cborBlocks]] + allBlocksFlattened = allBlocks.reduce((p, c) => p.concat(c[1]), []) + } return { rawBlocks, @@ -64,23 +90,26 @@ async function verifyDecoded (decoded, singleRoot) { assert.strictEqual(decoded.version, 1) assert.strictEqual(decoded.roots.length, singleRoot ? 1 : 2) - assert.strictEqual(decoded.roots[0].toString(), (await cborBlocks[0].cid()).toString()) + assert.strictEqual(decoded.roots[0].toString(), cborBlocks[0].cid.toString()) if (!singleRoot) { - assert.strictEqual(decoded.roots[1].toString(), (await cborBlocks[1].cid()).toString()) + assert.strictEqual(decoded.roots[1].toString(), cborBlocks[1].cid.toString()) } assert.strictEqual(decoded.blocks.length, allBlocksFlattened.length) const expectedBlocks = allBlocksFlattened.slice() const expectedCids = [] for (const block of expectedBlocks) { - expectedCids.push((await block.cid()).toString()) + expectedCids.push(block.cid.toString()) } for (const block of decoded.blocks) { - const cid = await block.cid() + const cid = block.cid const index = expectedCids.indexOf(cid.toString()) assert.ok(index >= 0, 'got expected block') - assert.strictEqual(expectedBlocks[index].encode().toString('hex'), block.encode().toString('hex'), 'got expected block content') + assert.strictEqual( + toHex(expectedBlocks[index].binary), + toHex(block.binary), + 'got expected block content') expectedBlocks.splice(index, 1) expectedCids.splice(index, 1) } @@ -101,31 +130,31 @@ async function verifyHas (carDs, modified) { for (let i = 0; i < blocks.length; i++) { if (modified && i === 1) { // second of each type is removed from modified - await verifyHasnt(await blocks[i].cid(), `block #${i} (${type} / ${await blocks[i].cid()})`) + await verifyHasnt(blocks[i].cid, `block #${i} (${type} / ${blocks[i].cid})`) } else { - await verifyHas(await blocks[i].cid(), `block #${i} (${type} / ${await blocks[i].cid()})`) + await verifyHas(blocks[i].cid, `block #${i} (${type} / ${blocks[i].cid})`) } } if (modified && type === 'raw') { - await verifyHas(await rawBlocks[3].cid(), `block #3 (${type})`) // zzzz + await verifyHas(rawBlocks[3].cid, `block #3 (${type})`) // zzzz } } // not a block we have - await verifyHasnt(await Block.encoder(Buffer.from('dddd'), 'raw').cid(), 'dddd') + await verifyHasnt((await toRawBlock(new TextEncoder().encode('dddd'))).cid, 'dddd') } function compareBlockData (actual, expected, id) { - assert.strictEqual(Buffer.from(actual).toString('hex'), Buffer.from(expected).toString('hex'), `comparing block as hex ${id}`) + assert.strictEqual(toHex(actual), toHex(expected), `comparing block as hex ${id}`) } async function verifyBlocks (carDs, modified) { async function verifyBlock (block, index, type) { - const expected = await block.encode() + const expected = block.binary let actual try { - actual = await carDs.get(await block.cid()) + actual = await carDs.get(block.cid) } catch (err) { assert.ifError(err, `get block length #${index} (${type})`) } @@ -137,7 +166,7 @@ async function verifyBlocks (carDs, modified) { const block = blocks[i] if (modified && i === 1) { - await assert.rejects(carDs.get(await block.cid()), { + await assert.isRejected(carDs.get(block.cid), { name: 'Error', message: 'Not Found' }) @@ -154,19 +183,20 @@ async function verifyBlocks (carDs, modified) { } async function verifyRoots (carDs, modified) { - // const expected = await cborBlocks[modified ? 1 : 2].cid() - const expected = [await cborBlocks[0].cid(), await cborBlocks[1].cid()] - assert.deepStrictEqual(await carDs.getRoots(), expected) + // using toString() for now, backing buffers in Uint8Arrays are getting in the way + // in the browser + const expected = [cborBlocks[0].cid.toString(), cborBlocks[1].cid.toString()] + assert.deepStrictEqual((await carDs.getRoots()).map((c) => c.toString()), expected) } -module.exports.makeData = makeData -module.exports.verifyBlocks = verifyBlocks -module.exports.verifyHas = verifyHas -module.exports.verifyRoots = verifyRoots -module.exports.acid = acid -module.exports.compareBlockData = compareBlockData +function toHex (b) { + return b.reduce((hex, byte) => hex + byte.toString(16).padStart(2, '0'), '') +} + +function fromHex (str) { + return new Uint8Array(str.match(/../g).map(b => parseInt(b, 16))) +} -// TODO: delete? -module.exports.verifyDecoded = verifyDecoded +const car = fromHex('63a265726f6f747382d82a58250001711220f88bc853804cf294fe417e4fa83028689fcdb1b1592c5102e1474dbc200fab8bd82a5825000171122069ea0740f9807a28f4d932c62e7c1c83be055e55072c90266ab3e79df63a365b6776657273696f6e01280155122061be55a8e2f6b4e172338bddf184d6dbee29c98853e0a0485ecee7f27b9af0b461616161280155122081cc5b17018674b401b42f35ba07bb79e211239c23bffe658da1577e3e646877626262622801551220b6fbd675f98e2abd22d4ed29fdc83150fedc48597e92dd1a7a24381d44a2745163636363511220e7dc486e97e6ebe5cdabab3e392bdad128b6e09acc94bb4e2aa2af7b986d24d0122d0a240155122061be55a8e2f6b4e172338bddf184d6dbee29c98853e0a0485ecee7f27b9af0b4120363617418048001122079a982de3c9907953d4d323cee1d0fb1ed8f45f8ef02870c0cb9e09246bd530a122d0a240155122081cc5b17018674b401b42f35ba07bb79e211239c23bffe658da1577e3e6468771203646f671804122d0a221220e7dc486e97e6ebe5cdabab3e392bdad128b6e09acc94bb4e2aa2af7b986d24d01205666972737418338301122002acecc5de2438ea4126a3010ecb1f8a599c8eff22fff1a1dcffe999b27fd3de122e0a2401551220b6fbd675f98e2abd22d4ed29fdc83150fedc48597e92dd1a7a24381d44a274511204626561721804122f0a22122079a982de3c9907953d4d323cee1d0fb1ed8f45f8ef02870c0cb9e09246bd530a12067365636f6e641895015b01711220f88bc853804cf294fe417e4fa83028689fcdb1b1592c5102e1474dbc200fab8ba2646c696e6bd82a582300122002acecc5de2438ea4126a3010ecb1f8a599c8eff22fff1a1dcffe999b27fd3de646e616d6564626c6970360171122069ea0740f9807a28f4d932c62e7c1c83be055e55072c90266ab3e79df63a365ba2646c696e6bf6646e616d65656c696d626f') -module.exports.car = Buffer.from('63a265726f6f747382d82a58250001711220f88bc853804cf294fe417e4fa83028689fcdb1b1592c5102e1474dbc200fab8bd82a5825000171122069ea0740f9807a28f4d932c62e7c1c83be055e55072c90266ab3e79df63a365b6776657273696f6e01280155122061be55a8e2f6b4e172338bddf184d6dbee29c98853e0a0485ecee7f27b9af0b461616161280155122081cc5b17018674b401b42f35ba07bb79e211239c23bffe658da1577e3e646877626262622801551220b6fbd675f98e2abd22d4ed29fdc83150fedc48597e92dd1a7a24381d44a2745163636363511220e7dc486e97e6ebe5cdabab3e392bdad128b6e09acc94bb4e2aa2af7b986d24d0122d0a240155122061be55a8e2f6b4e172338bddf184d6dbee29c98853e0a0485ecee7f27b9af0b4120363617418048001122079a982de3c9907953d4d323cee1d0fb1ed8f45f8ef02870c0cb9e09246bd530a122d0a240155122081cc5b17018674b401b42f35ba07bb79e211239c23bffe658da1577e3e6468771203646f671804122d0a221220e7dc486e97e6ebe5cdabab3e392bdad128b6e09acc94bb4e2aa2af7b986d24d01205666972737418338301122002acecc5de2438ea4126a3010ecb1f8a599c8eff22fff1a1dcffe999b27fd3de122e0a2401551220b6fbd675f98e2abd22d4ed29fdc83150fedc48597e92dd1a7a24381d44a274511204626561721804122f0a22122079a982de3c9907953d4d323cee1d0fb1ed8f45f8ef02870c0cb9e09246bd530a12067365636f6e641895015b01711220f88bc853804cf294fe417e4fa83028689fcdb1b1592c5102e1474dbc200fab8ba2646c696e6bd82a582300122002acecc5de2438ea4126a3010ecb1f8a599c8eff22fff1a1dcffe999b27fd3de646e616d6564626c6970360171122069ea0740f9807a28f4d932c62e7c1c83be055e55072c90266ab3e79df63a365ba2646c696e6bf6646e616d65656c696d626f', 'hex') +export { makeData, verifyBlocks, verifyHas, verifyRoots, acid, compareBlockData, verifyDecoded, toHex, fromHex, car } diff --git a/test/test-complete-graph.js b/test/test-complete-graph.js index 30e4cc9..e21f43f 100644 --- a/test/test-complete-graph.js +++ b/test/test-complete-graph.js @@ -1,34 +1,21 @@ /* eslint-env mocha */ -const assert = require('assert') -const { writeStream, readBuffer, completeGraph } = require('../') -const Block = require('@ipld/block') -const { PassThrough } = require('stream') +import chai from 'chai' +import multiformats from 'multiformats/basics' +import stream from 'readable-stream' -const same = assert.deepStrictEqual +import IpldBlock from '@ipld/block' +import Car from 'datastore-car' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' -function all (car) { - const _traverse = async function * (link, seen = new Set()) { - link = await link - seen.add(link.toString('base64')) - const encoded = await car.get(link) - const block = Block.create(encoded, link) - yield block - const cid = await block.cid() - if (cid.codec === 'raw') { - return - } +const { assert } = chai - for (const [, link] of block.reader().links()) { - if (seen.has(link.toString('base64'))) { - continue - } - yield * _traverse(link, seen) - } - } - - return _traverse(car.getRoots().then(([root]) => root)) -} +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { writeStream, readBuffer, completeGraph } = Car(multiformats) +const Block = IpldBlock(multiformats) +const { PassThrough } = stream async function createGet (blocks) { const db = new Map() @@ -43,14 +30,20 @@ async function concat (stream) { for await (const buffer of stream) { buffers.push(buffer) } - return Buffer.concat(buffers) + const ret = new Uint8Array(buffers.reduce((p, c) => p + c.length, 0)) + let off = 0 + for (const b of buffers) { + ret.set(b, off) + off += b.length + } + return ret } describe('Create car for full graph', () => { it('small graph', async () => { const leaf1 = Block.encoder({ hello: 'world' }, 'dag-cbor') const leaf2 = Block.encoder({ test: 1 }, 'dag-cbor') - const raw = Block.encoder(Buffer.from('test'), 'raw') + const raw = Block.encoder(new TextEncoder().encode('test'), 'raw') const root = Block.encoder( { one: await leaf1.cid(), @@ -60,20 +53,21 @@ describe('Create car for full graph', () => { }, 'dag-cbor') const expected = [root, leaf1, leaf2, raw] + const get = await createGet(expected) const stream = new PassThrough() const car = await writeStream(stream) await completeGraph(await root.cid(), get, car) const data = await concat(stream) - const reader = await readBuffer(data) - const [readRoot, ...more] = await reader.getRoots() - same(more.length, 0) - assert.ok(readRoot.equals(await root.cid())) + const carDs = await readBuffer(data) + const roots = await carDs.getRoots() + assert.strictEqual(roots.length, 1) + assert.deepStrictEqual(roots[0], await root.cid()) - for await (const block of all(reader)) { + for await (const { key: cid } of carDs.query()) { const expectedBlock = expected.shift() - assert.ok((await expectedBlock.cid()).equals(await block.cid())) + assert.strictEqual(cid, (await expectedBlock.cid()).toString()) } }) }) diff --git a/test/test-decode.js b/test/test-decode.js index aab7b14..d14287f 100644 --- a/test/test-decode.js +++ b/test/test-decode.js @@ -1,47 +1,61 @@ /* eslint-env mocha */ -const assert = require('assert') -const { promisify } = require('util') -const path = require('path') -const fs = require('fs') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import { promisify } from 'util' +import path from 'path' +import fs from 'fs' +import multiformats from 'multiformats/basics' +import { makeData, verifyDecoded } from './fixture-data.js' +import * as coding from '../lib/coding.js' +import dagCbor from '@ipld/dag-cbor' +import { fileURLToPath } from 'url' + +chai.use(chaiAsPromised) +const { assert } = chai + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + fs.readFile = promisify(fs.readFile) -const { verifyDecoded } = require('./fixture-data') -const coding = require('../lib/coding') +multiformats.add(dagCbor) describe('Decode', () => { + before(makeData) + it('decodeFile', async () => { - const decoded = await coding.decodeFile(path.join(__dirname, 'go.car')) + const decoded = await coding.decodeFile(multiformats, path.join(__dirname, 'go.car')) return verifyDecoded(decoded) }) it('decodeFile small buffer', async () => { - const decoded = await coding.decodeFile(path.join(__dirname, 'go.car'), { bufferSize: 8 }) + const decoded = await coding.decodeFile(multiformats, path.join(__dirname, 'go.car'), { bufferSize: 8 }) return verifyDecoded(decoded) }) it('decodeBuffer', async () => { - const decoded = await coding.decodeBuffer(await fs.readFile(path.join(__dirname, 'go.car'))) + const decoded = await coding.decodeBuffer(multiformats, await fs.readFile(path.join(__dirname, 'go.car'))) return verifyDecoded(decoded) }) it('decodeStream', async () => { - const decoded = await coding.decodeStream(fs.createReadStream(path.join(__dirname, 'go.car'))) + const decoded = await coding.decodeStream(multiformats, fs.createReadStream(path.join(__dirname, 'go.car'))) return verifyDecoded(decoded) }) it('decode errors', async () => { const buf = await fs.readFile(path.join(__dirname, 'go.car')) // truncated - await assert.rejects(coding.decodeBuffer(buf.slice(0, buf.length - 10)), { + await assert.isRejected(coding.decodeBuffer(multiformats, buf.slice(0, buf.length - 10)), { name: 'Error', - message: 'Unexpected end of Buffer' + message: 'Unexpected end of data' }) // cid v0 - const buf2 = Buffer.alloc(buf.length) + const buf2 = new Uint8Array(buf.length) buf.copy(buf2) buf2[101] = 0 // first block's CID - await assert.rejects(coding.decodeBuffer(buf2), { + await assert.isRejected(coding.decodeBuffer(multiformats, buf2), { name: 'Error', message: 'Unexpected CID version (0)' }) diff --git a/test/test-encode.js b/test/test-encode.js index 3258689..41404cd 100644 --- a/test/test-encode.js +++ b/test/test-encode.js @@ -1,13 +1,21 @@ /* eslint-env mocha */ -const assert = require('assert') -const { promisify } = require('util') -const fs = require('fs') -const bl = require('bl') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import { promisify } from 'util' +import fs from 'fs' +import bl from 'bl' +import multiformats from 'multiformats/basics' +import { verifyDecoded, makeData } from './fixture-data.js' +import * as coding from '../lib/coding.js' +import dagCbor from '@ipld/dag-cbor' + +chai.use(chaiAsPromised) +const { assert } = chai + fs.readFile = promisify(fs.readFile) fs.unlink = promisify(fs.unlink) -const { verifyDecoded, makeData } = require('./fixture-data') -const coding = require('../lib/coding') +multiformats.add(dagCbor) describe('Encode', () => { let roots, allBlocks @@ -21,7 +29,7 @@ describe('Encode', () => { allBlocks = data.allBlocksFlattened roots = [] for (const block of data.cborBlocks) { - roots.push(await block.cid()) + roots.push(block.cid) } }) @@ -29,26 +37,26 @@ describe('Encode', () => { after(clean) it('encodeFile', async () => { - await coding.encodeFile('test.car', roots, allBlocks) - const decoded = await coding.decodeFile('test.car') + await coding.encodeFile(multiformats, 'test.car', roots, allBlocks) + const decoded = await coding.decodeFile(multiformats, 'test.car') return verifyDecoded(decoded) }) it('encodeBuffer', async () => { - const buf = await coding.encodeBuffer(roots, allBlocks) - const decoded = await coding.decodeBuffer(buf) + const buf = await coding.encodeBuffer(multiformats, roots, allBlocks) + const decoded = await coding.decodeBuffer(multiformats, buf) return verifyDecoded(decoded) }) it('encodeBuffer single root', async () => { - const buf = await coding.encodeBuffer(roots[0], allBlocks) - const decoded = await coding.decodeBuffer(buf) + const buf = await coding.encodeBuffer(multiformats, roots[0], allBlocks) + const decoded = await coding.decodeBuffer(multiformats, buf) return verifyDecoded(decoded, true) }) it('encodeStream', async () => { const stream = bl() - const carStream = coding.encodeStream(roots, allBlocks) + const carStream = coding.encodeStream(multiformats, roots, allBlocks) carStream.pipe(stream) await new Promise((resolve, reject) => { carStream.on('finish', resolve) @@ -56,19 +64,19 @@ describe('Encode', () => { stream.on('error', reject) }) - const decoded = await coding.decodeStream(stream) + const decoded = await coding.decodeStream(multiformats, stream) return verifyDecoded(decoded) }) it('encode errors', async () => { - await assert.rejects(coding.encodeBuffer(['blip'], allBlocks), { + await assert.isRejected(coding.encodeBuffer(multiformats, ['blip'], allBlocks), { name: 'TypeError', message: 'Roots must be CIDs' }) - await assert.rejects(coding.encodeBuffer(roots, ['blip']), { + await assert.isRejected(coding.encodeBuffer(multiformats, roots, ['blip']), { name: 'TypeError', - message: 'Block list must contain @ipld/block objects' + message: 'Block list must be of type { cid, binary }' }) }) }) diff --git a/test/test-errors.js b/test/test-errors.js index edd82d6..3f7d17c 100644 --- a/test/test-errors.js +++ b/test/test-errors.js @@ -1,30 +1,44 @@ /* eslint-env mocha */ -const fs = require('fs').promises -const path = require('path') -const assert = require('assert') -const { readBuffer, readFileComplete, writeStream } = require('../') -const { acid, car } = require('./fixture-data') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import { promises as fs } from 'fs' +import path from 'path' +import multiformats from 'multiformats/basics' +import { acid, car } from './fixture-data.js' +import dagCbor from '@ipld/dag-cbor' +import Car from 'datastore-car' +import { fileURLToPath } from 'url' + +chai.use(chaiAsPromised) +const { assert } = chai + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + +multiformats.add(dagCbor) +const { readBuffer, readFileComplete, writeStream } = Car(multiformats) describe('Errors', () => { it('unimplemented methods', async () => { const carDs = await readBuffer(car) - await assert.rejects(carDs.batch()) - await assert.rejects(carDs.batch('foo')) + await assert.isRejected(carDs.batch()) + await assert.isRejected(carDs.batch('foo')) await carDs.close() }) it('bad gets', async () => { const carDs = await readBuffer(car) - await assert.rejects(carDs.get('blip')) // not a CID key - await assert.doesNotReject(carDs.get(acid)) // sanity check + await assert.isRejected(carDs.get('blip'), /only accepts CIDs or CID strings/) // not a CID key + await assert.isRejected(carDs.get(null)) // not a CID key + await assert.isFulfilled(carDs.get(acid)) // sanity check await carDs.close() }) it('bad has\'', async () => { const carDs = await readBuffer(car) - await assert.rejects(carDs.has('blip')) // not a CID key - await assert.doesNotReject(carDs.has(acid)) // sanity check + await assert.isRejected(carDs.has('blip'), /only accepts CIDs or CID strings/) // not a CID key + await assert.isFulfilled(carDs.has(acid)) // sanity check await carDs.close() }) @@ -38,23 +52,23 @@ describe('Errors', () => { it('bad root type', async () => { const carDs = await writeStream(fs.createWriteStream('test.car')) - assert.rejects(carDs.setRoots('blip')) - assert.rejects(carDs.setRoots(['blip'])) - assert.rejects(carDs.setRoots([acid, false])) + assert.isRejected(carDs.setRoots('blip')) + assert.isRejected(carDs.setRoots(['blip'])) + assert.isRejected(carDs.setRoots([acid, false])) await carDs.close() }) it('bad puts', async () => { const carDs = await writeStream(fs.createWriteStream('test.car')) - await assert.rejects(carDs.put(acid, 'blip')) // not a Buffer value - await assert.rejects(carDs.put('blip', Buffer.from('blip'))) // not a CID key + await assert.isRejected(carDs.put(acid, 'blip')) // not a Uint8Array value + await assert.isRejected(carDs.put('blip', new TextEncoder().encode('blip')), /only accepts CIDs or CID strings/) // not a CID key await carDs.close() }) it('truncated file', async () => { const data = await fs.readFile(path.join(__dirname, 'go.car')) await fs.writeFile('test.car', data.slice(0, data.length - 5)) - await assert.rejects(readFileComplete('test.car'), { + await assert.isRejected(readFileComplete('test.car'), { name: 'Error', message: 'Unexpected end of file' }) diff --git a/test/test-large.js b/test/test-large.js index b950784..ee03764 100644 --- a/test/test-large.js +++ b/test/test-large.js @@ -1,29 +1,71 @@ /* eslint-env mocha */ -const assert = require('assert') -const fs = require('fs') -const unlink = require('util').promisify(require('fs').unlink) -const garbage = require('garbage') -const Block = require('@ipld/block') -const { writeStream, readFileComplete, readStreaming } = require('../') +import chai from 'chai' +import fs from 'fs' +import garbage from 'garbage' +import varint from 'varint' +import multiformats from 'multiformats/basics' +import dagCbor from '@ipld/dag-cbor' +import Car from 'datastore-car' +import { promisify } from 'util' + +const { assert } = chai + +const unlink = promisify(fs.unlink) + +multiformats.add(dagCbor) +const { writeStream, readFileComplete, readStreaming, indexer } = Car(multiformats) describe('Large CAR', () => { const objects = [] const cids = [] + const expectedIndex = [] it('create, no roots', async () => { const carDs = await writeStream(fs.createWriteStream('./test.car')) + // offset starts at header length + let offset = multiformats.encode({ version: 1, roots: [] }, 'dag-cbor').length + offset += varint.encode(offset.length).length + for (let i = 0; i < 500; i++) { const obj = garbage.object(1000) objects.push(obj) - const block = Block.encoder(obj, 'dag-cbor') - const cid = await block.cid() + const binary = await multiformats.encode(obj, 'dag-cbor') + const mh = await multiformats.multihash.hash(binary, 'sha2-256') + const cid = multiformats.CID.create(1, multiformats.get('dag-cbor').code, mh) cids.push(cid.toString()) - await carDs.put(cid, block.encode()) + const blockLength = binary.length + let length = cid.bytes.length + blockLength + const lengthLength = varint.encode(length).length + length += lengthLength + const blockOffset = offset + lengthLength + cid.bytes.length + expectedIndex.push({ cid, offset, length, blockOffset, blockLength }) + offset += length + await carDs.put(cid, binary) } - return carDs.close() + await carDs.close() + }) + + it('indexer stream', async () => { + const index = await indexer(fs.createReadStream('./test.car')) + assert.deepStrictEqual(index.roots, []) + let i = 0 + for await (const blockIndex of index.iterator) { + assert.deepStrictEqual(blockIndex, expectedIndex[i]) + i++ + } + }) + + it('indexer file', async () => { + const index = await indexer('./test.car') + assert.deepStrictEqual(index.roots, []) + let i = 0 + for await (const blockIndex of index.iterator) { + assert.deepStrictEqual(blockIndex, expectedIndex[i]) + i++ + } }) it('readFileComplete', async () => { @@ -31,7 +73,8 @@ describe('Large CAR', () => { let i = 0 for await (const { key, value } of carDs.query()) { assert.strictEqual(key, cids[i], `cid #${i} ${key} <> ${cids[i]}`) - assert.deepStrictEqual(Block.create(value, key).decode(), objects[i], `object #${i}`) + const obj = await multiformats.decode(value, 'dag-cbor') + assert.deepStrictEqual(obj, objects[i], `object #${i}`) i++ } @@ -43,7 +86,8 @@ describe('Large CAR', () => { let i = 0 for await (const { key, value } of carDs.query()) { assert.strictEqual(key, cids[i], `cid #${i} ${key} <> ${cids[i]}`) - assert.deepStrictEqual(Block.create(value, key).decode(), objects[i], `object #${i}`) + const obj = await multiformats.decode(value, 'dag-cbor') + assert.deepStrictEqual(obj, objects[i], `object #${i}`) i++ } diff --git a/test/test-query.js b/test/test-query.js index 7e30317..926c179 100644 --- a/test/test-query.js +++ b/test/test-query.js @@ -1,21 +1,20 @@ /* eslint-env mocha */ -const assert = require('assert') -const path = require('path') -const { readBuffer, readFile } = require('../') -const { car, makeData, compareBlockData } = require('./fixture-data') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import path from 'path' +import multiformats from 'multiformats/basics' +import { car, makeData, compareBlockData } from './fixture-data.js' +import Car from 'datastore-car' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' -if (!assert.rejects) { - // browser polyfill is incomplete - assert.rejects = async (promise, msg) => { - try { - await promise - } catch (err) { - return - } - assert.fail(`Promise did not reject: ${msg}`) - } -} +chai.use(chaiAsPromised) +const { assert } = chai + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { readBuffer, readFile } = Car(multiformats) const factories = [['readBuffer', () => readBuffer(car)]] if (readFile) { // not in browser @@ -36,7 +35,7 @@ for (const [factoryName, factoryFn] of factories) { const blocks_ = blocks.slice() const cids = [] for (const block of blocks) { - cids.push((await block.cid()).toString()) + cids.push(block.cid.toString()) } let i = 0 for await (const entry of carDs.query()) { @@ -44,7 +43,7 @@ for (const [factoryName, factoryFn] of factories) { if (foundIndex < 0) { assert.fail(`Unexpected CID/key found: ${entry.key}`) } - compareBlockData(entry.value, blocks_[foundIndex].encode(), `#${i++}`) + compareBlockData(entry.value, blocks_[foundIndex].binary, `#${i++}`) cids.splice(foundIndex, 1) blocks_.splice(foundIndex, 1) } @@ -57,7 +56,7 @@ for (const [factoryName, factoryFn] of factories) { const blocks_ = blocks.slice() const cids = [] for (const block of blocks) { - cids.push((await block.cid()).toString()) + cids.push(block.cid.toString()) } for await (const entry of carDs.query({ keysOnly: true })) { const foundIndex = cids.findIndex((cid) => cid === entry.key) @@ -77,8 +76,8 @@ for (const [factoryName, factoryFn] of factories) { const blocks_ = [] const cids = [] for (const block of blocks) { - const cid = await block.cid() - if (cid.codec === 'dag-cbor') { + const cid = multiformats.CID.from(block.cid.toString()) + if (cid.code === multiformats.get('dag-cbor').code) { cids.push(cid.toString()) blocks_.push(block) } @@ -90,7 +89,7 @@ for (const [factoryName, factoryFn] of factories) { if (foundIndex < 0) { assert.fail(`Unexpected CID/key found: ${entry.key}`) } - compareBlockData(entry.value, blocks_[foundIndex].encode(), `#${i++}`) + compareBlockData(entry.value, blocks_[foundIndex].binary, `#${i++}`) cids.splice(foundIndex, 1) blocks_.splice(foundIndex, 1) } diff --git a/test/test-raw.js b/test/test-raw.js index 8f4563b..30c696c 100644 --- a/test/test-raw.js +++ b/test/test-raw.js @@ -1,29 +1,44 @@ /* eslint-env mocha */ -const assert = require('assert') -const { promisify } = require('util') -const path = require('path') -const fs = require('fs') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import { promisify } from 'util' +import path from 'path' +import fs from 'fs' +import multiformats from 'multiformats/basics' +import { makeData, toHex } from './fixture-data.js' +import Car from 'datastore-car' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' +import { fileURLToPath } from 'url' + +chai.use(chaiAsPromised) +const { assert } = chai + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + fs.open = promisify(fs.open) fs.close = promisify(fs.close) -const CID = require('cids') -const { indexer, readRaw } = require('../') -const { makeData } = require('./fixture-data') + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { indexer, readRaw } = Car(multiformats) describe('Raw', () => { const expectedRoots = [ - new CID('bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm'), - new CID('bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm') + multiformats.CID.from('bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm'), + multiformats.CID.from('bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm') ] const expectedIndex = [ - { cid: new CID('bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm'), length: 55, offset: 137 }, - { cid: new CID('QmNX6Tffavsya4xgBi2VJQnSuqy9GsxongxZZ9uZBqp16d'), length: 97, offset: 228 }, - { cid: new CID('bafkreifw7plhl6mofk6sfvhnfh64qmkq73oeqwl6sloru6rehaoujituke'), length: 4, offset: 362 }, - { cid: new CID('QmWXZxVQ9yZfhQxLD35eDR8LiMRsYtHxYqTFCBbJoiJVys'), length: 94, offset: 402 }, - { cid: new CID('bafkreiebzrnroamgos2adnbpgw5apo3z4iishhbdx77gldnbk57d4zdio4'), length: 4, offset: 533 }, - { cid: new CID('QmdwjhxpxzcMsR3qUuj7vUL8pbA7MgR3GAxWi2GLHjsKCT'), length: 47, offset: 572 }, - { cid: new CID('bafkreidbxzk2ryxwwtqxem4l3xyyjvw35yu4tcct4cqeqxwo47zhxgxqwq'), length: 4, offset: 656 }, - { cid: new CID('bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm'), length: 18, offset: 697 } + { cid: multiformats.CID.from('bafyreihyrpefhacm6kkp4ql6j6udakdit7g3dmkzfriqfykhjw6cad5lrm'), offset: 100, length: 92, blockOffset: 137, blockLength: 55 }, + { cid: multiformats.CID.from('QmNX6Tffavsya4xgBi2VJQnSuqy9GsxongxZZ9uZBqp16d'), offset: 192, length: 133, blockOffset: 228, blockLength: 97 }, + { cid: multiformats.CID.from('bafkreifw7plhl6mofk6sfvhnfh64qmkq73oeqwl6sloru6rehaoujituke'), offset: 325, length: 41, blockOffset: 362, blockLength: 4 }, + { cid: multiformats.CID.from('QmWXZxVQ9yZfhQxLD35eDR8LiMRsYtHxYqTFCBbJoiJVys'), offset: 366, length: 130, blockOffset: 402, blockLength: 94 }, + { cid: multiformats.CID.from('bafkreiebzrnroamgos2adnbpgw5apo3z4iishhbdx77gldnbk57d4zdio4'), offset: 496, length: 41, blockOffset: 533, blockLength: 4 }, + { cid: multiformats.CID.from('QmdwjhxpxzcMsR3qUuj7vUL8pbA7MgR3GAxWi2GLHjsKCT'), offset: 537, length: 82, blockOffset: 572, blockLength: 47 }, + { cid: multiformats.CID.from('bafkreidbxzk2ryxwwtqxem4l3xyyjvw35yu4tcct4cqeqxwo47zhxgxqwq'), offset: 619, length: 41, blockOffset: 656, blockLength: 4 }, + { cid: multiformats.CID.from('bafyreidj5idub6mapiupjwjsyyxhyhedxycv4vihfsicm2vt46o7morwlm'), offset: 660, length: 55, blockOffset: 697, blockLength: 18 } ] let allBlocksFlattened @@ -55,15 +70,18 @@ describe('Raw', () => { const expectedBlocks = allBlocksFlattened.slice() const expectedCids = [] for (const block of expectedBlocks) { - expectedCids.push((await block.cid()).toString()) + expectedCids.push(block.cid.toString()) } for (const blockIndex of expectedIndex) { const block = await readRaw(fd, blockIndex) - const cid = await block.cid() + const cid = block.cid const index = expectedCids.indexOf(cid.toString()) assert.ok(index >= 0, 'got expected block') - assert.strictEqual(expectedBlocks[index].encode().toString('hex'), block.encode().toString('hex'), 'got expected block content') + assert.strictEqual( + toHex(expectedBlocks[index].binary), + toHex(block.binary), + 'got expected block content') expectedBlocks.splice(index, 1) expectedCids.splice(index, 1) } @@ -83,22 +101,22 @@ describe('Raw', () => { }) it('errors', async () => { - await assert.rejects(indexer(), { + await assert.isRejected(indexer(), { name: 'TypeError', message: 'indexer() requires a file path or a ReadableStream' }) - await assert.rejects(readRaw(true, expectedIndex[0]), { + await assert.isRejected(readRaw(true, expectedIndex[0]), { name: 'TypeError', message: 'Bad fd' }) const badBlock = Object.assign({}, expectedIndex[expectedIndex.length - 1]) - badBlock.length += 10 + badBlock.blockLength += 10 const fd = await fs.open(path.join(__dirname, 'go.car')) - await assert.rejects(readRaw(fd, badBlock), { + await assert.isRejected(readRaw(fd, badBlock), { name: 'Error', - message: `Failed to read entire block (${badBlock.length - 10} instead of ${badBlock.length})` + message: `Failed to read entire block (${badBlock.blockLength - 10} instead of ${badBlock.blockLength})` }) await fs.close(fd) }) diff --git a/test/test-readbuffer.js b/test/test-readbuffer.js index 13ff1e2..8f1a23c 100644 --- a/test/test-readbuffer.js +++ b/test/test-readbuffer.js @@ -1,20 +1,19 @@ /* eslint-env mocha */ -const assert = require('assert') -const { readBuffer } = require('../') -const { acid, car, makeData, verifyBlocks, verifyHas, verifyRoots } = require('./fixture-data') - -if (!assert.rejects) { - // browser polyfill is incomplete - assert.rejects = async (promise, msg) => { - try { - await promise - } catch (err) { - return - } - assert.fail(`Promise did not reject: ${msg}`) - } -} +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import multiformats from 'multiformats/basics' +import { acid, car, makeData, verifyBlocks, verifyHas, verifyRoots } from './fixture-data.js' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' +import Car from 'datastore-car' + +chai.use(chaiAsPromised) +const { assert } = chai + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { readBuffer } = Car(multiformats) let rawBlocks @@ -29,7 +28,7 @@ describe('Read Buffer', () => { await verifyHas(carDs) await verifyBlocks(carDs) await verifyRoots(carDs) - await assert.rejects(carDs.get(await rawBlocks[3].cid())) // doesn't exist + await assert.isRejected(carDs.get(rawBlocks[3].cid)) // doesn't exist await carDs.close() }) @@ -40,12 +39,12 @@ describe('Read Buffer', () => { await carDs.close() }) - // when we instantiate from a Buffer, CarDatastore should be immutable + // when we instantiate from a Uint8Array, CarDatastore should be immutable it('immutable', async () => { const carDs = await readBuffer(car) - await assert.rejects(carDs.put(acid, Buffer.from('blip'))) - await assert.rejects(carDs.delete(acid, Buffer.from('blip'))) - await assert.rejects(carDs.setRoots(acid)) - await assert.rejects(carDs.setRoots([acid])) + await assert.isRejected(carDs.put(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.delete(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.setRoots(acid)) + await assert.isRejected(carDs.setRoots([acid])) }) }) diff --git a/test/test-readfile-writestream.js b/test/test-readfile-writestream.js index e238970..f517d2f 100644 --- a/test/test-readfile-writestream.js +++ b/test/test-readfile-writestream.js @@ -1,10 +1,23 @@ /* eslint-env mocha */ -const assert = require('assert') -const fs = require('fs') -const unlink = require('util').promisify(require('fs').unlink) -const { writeStream, readFileComplete } = require('../') -const { makeData, verifyBlocks, verifyHas, verifyRoots } = require('./fixture-data') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import fs from 'fs' +import multiformats from 'multiformats/basics' +import { makeData, verifyBlocks, verifyHas, verifyRoots } from './fixture-data.js' +import { promisify } from 'util' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' +import Car from 'datastore-car' + +chai.use(chaiAsPromised) +const { assert } = chai + +const unlink = promisify(fs.unlink) + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { writeStream, readFileComplete } = Car(multiformats) let rawBlocks let pbBlocks @@ -22,10 +35,10 @@ describe('Read File & Write Stream', () => { it('writeStream', async () => { const carDs = await writeStream(fs.createWriteStream('./test.car')) - await carDs.setRoots([await cborBlocks[0].cid(), await cborBlocks[1].cid()]) + await carDs.setRoots([cborBlocks[0].cid, cborBlocks[1].cid]) for (const block of rawBlocks.slice(0, 3).concat(pbBlocks).concat(cborBlocks)) { // add all but raw zzzz - await carDs.put(await block.cid(), block.encode()) + await carDs.put(block.cid, block.binary) } await carDs.close() }) @@ -39,10 +52,10 @@ describe('Read File & Write Stream', () => { }) it('writeStream no await', async () => { - const roots = [await cborBlocks[0].cid(), await cborBlocks[1].cid()] + const roots = [cborBlocks[0].cid, cborBlocks[1].cid] const blocks = [] for (const block of rawBlocks.slice(0, 3).concat(pbBlocks).concat(cborBlocks)) { - blocks.push([await block.cid(), block.encode()]) + blocks.push([block.cid, block.binary]) } const carDs = await writeStream(fs.createWriteStream('./test.car')) @@ -63,10 +76,10 @@ describe('Read File & Write Stream', () => { it('writeStream errors', async () => { const carDs = await writeStream(fs.createWriteStream('./test.car')) - await carDs.put(await cborBlocks[0].cid(), await cborBlocks[0].encode()) - await assert.rejects(carDs.delete(await cborBlocks[0].cid())) + await carDs.put(cborBlocks[0].cid, cborBlocks[0].binary) + await assert.isRejected(carDs.delete(cborBlocks[0].cid)) await carDs.close() - await assert.rejects(carDs.close()) + await assert.isRejected(carDs.close()) }) after(async () => { diff --git a/test/test-readfile.js b/test/test-readfile.js index 6e9ad23..43fc806 100644 --- a/test/test-readfile.js +++ b/test/test-readfile.js @@ -1,9 +1,24 @@ /* eslint-env mocha */ -const path = require('path') -const assert = require('assert') -const { readFileComplete } = require('../car') -const { acid, makeData, verifyBlocks, verifyHas, verifyRoots } = require('./fixture-data') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import path from 'path' +import multiformats from 'multiformats/basics' +import { acid, makeData, verifyBlocks, verifyHas, verifyRoots } from './fixture-data.js' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' +import Car from 'datastore-car' +import { fileURLToPath } from 'url' + +chai.use(chaiAsPromised) +const { assert } = chai + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { readFileComplete } = Car(multiformats) let rawBlocks @@ -18,7 +33,7 @@ describe('Read File', () => { await verifyHas(carDs) await verifyBlocks(carDs) await verifyRoots(carDs) - await assert.rejects(carDs.get(await rawBlocks[3].cid())) // doesn't exist + await assert.isRejected(carDs.get(rawBlocks[3].cid)) // doesn't exist await carDs.close() }) @@ -32,9 +47,9 @@ describe('Read File', () => { // when we instantiate from a File, CarDatastore should be immutable it('immutable', async () => { const carDs = await readFileComplete(path.join(__dirname, 'go.car')) - await assert.rejects(carDs.put(acid, Buffer.from('blip'))) - await assert.rejects(carDs.delete(acid, Buffer.from('blip'))) - await assert.rejects(carDs.setRoots(acid)) - await assert.rejects(carDs.setRoots([acid])) + await assert.isRejected(carDs.put(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.delete(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.setRoots(acid)) + await assert.isRejected(carDs.setRoots([acid])) }) }) diff --git a/test/test-readindexed.js b/test/test-readindexed.js new file mode 100644 index 0000000..198a1da --- /dev/null +++ b/test/test-readindexed.js @@ -0,0 +1,114 @@ +/* eslint-env mocha */ + +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import path from 'path' +import multiformats from 'multiformats/basics' +import { acid, makeData, compareBlockData, verifyBlocks, verifyHas, verifyRoots } from './fixture-data.js' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' +import Car from 'datastore-car' +import { fileURLToPath } from 'url' + +chai.use(chaiAsPromised) +const { assert } = chai + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { readFileIndexed } = Car(multiformats) + +let rawBlocks, allBlocks + +describe('Read Indexed', () => { + before(async () => { + const data = await makeData() + rawBlocks = data.rawBlocks + allBlocks = data.allBlocksFlattened + }) + + it('read existing', async () => { + const carDs = await readFileIndexed(path.join(__dirname, 'go.car')) + await verifyHas(carDs) + await verifyBlocks(carDs) + await verifyRoots(carDs) + await assert.isRejected(carDs.get(rawBlocks[3].cid)) // doesn't exist + await carDs.close() + }) + + it('verify only roots', async () => { + // tests deferred open for getRoots() + const carDs = await readFileIndexed(path.join(__dirname, 'go.car')) + await verifyRoots(carDs) + await carDs.close() + }) + + // when we instantiate from a Stream, CarDatastore should be immutable + it('immutable', async () => { + const carDs = await readFileIndexed(path.join(__dirname, 'go.car')) + await assert.isRejected(carDs.put(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.delete(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.setRoots(acid)) + await assert.isRejected(carDs.setRoots([acid])) + }) + + it('read existing (query())', async () => { + const carDs = await readFileIndexed(path.join(__dirname, 'go.car')) + + async function verify () { + const blocks_ = allBlocks.slice() + const cids = [] + for (const block of blocks_) { + cids.push(block.cid.toString()) + } + + let i = 0 + for await (const entry of carDs.query()) { + assert.deepStrictEqual(Object.keys(entry), ['key', 'value']) + const foundIndex = cids.findIndex((cid) => cid === entry.key) + if (foundIndex < 0) { + assert.fail(`Unexpected CID/key found: ${entry.key}`) + } + compareBlockData(entry.value, blocks_[foundIndex].binary, `#${i++}`) + cids.splice(foundIndex, 1) + blocks_.splice(foundIndex, 1) + } + assert.strictEqual(cids.length, 0, 'found all expected CIDs') + // test after + await verifyRoots(carDs) + } + + await verify() + await verify() // second pass, file should be open now + + await carDs.close() + }) + + it('read existing (query({keysOnly}))', async () => { + const blocks_ = allBlocks.slice() + const cids = [] + for (const block of blocks_) { + cids.push(await block.cid.toString()) + } + + const carDs = await readFileIndexed(path.join(__dirname, 'go.car')) + + // test before + await verifyRoots(carDs) + + for await (const entry of carDs.query({ keysOnly: true })) { + assert.deepStrictEqual(Object.keys(entry), ['key']) + const foundIndex = cids.findIndex((cid) => cid === entry.key) + if (foundIndex < 0) { + assert.fail(`Unexpected CID/key found: ${entry.key}`) + } + assert.strictEqual(entry.value, undefined, 'no `value`') + cids.splice(foundIndex, 1) + blocks_.splice(foundIndex, 1) + } + assert.strictEqual(cids.length, 0, 'found all expected CIDs') + await carDs.close() + }) +}) diff --git a/test/test-readstream.js b/test/test-readstream.js index 26d1c17..ffe4c0c 100644 --- a/test/test-readstream.js +++ b/test/test-readstream.js @@ -1,10 +1,25 @@ /* eslint-env mocha */ -const fs = require('fs') -const path = require('path') -const assert = require('assert') -const { readStreaming } = require('../car') -const { acid, makeData, compareBlockData, verifyRoots } = require('./fixture-data') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import fs from 'fs' +import path from 'path' +import multiformats from 'multiformats/basics' +import { acid, makeData, compareBlockData, verifyRoots } from './fixture-data.js' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' +import Car from 'datastore-car' +import { fileURLToPath } from 'url' + +chai.use(chaiAsPromised) +const { assert } = chai + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { readStreaming } = Car(multiformats) describe('Read Stream', () => { let allBlocks @@ -17,7 +32,7 @@ describe('Read Stream', () => { const blocks_ = allBlocks.slice() const cids = [] for (const block of blocks_) { - cids.push((await block.cid()).toString()) + cids.push(block.cid.toString()) } const carDs = await readStreaming(fs.createReadStream(path.join(__dirname, 'go.car'))) @@ -29,7 +44,7 @@ describe('Read Stream', () => { if (foundIndex < 0) { assert.fail(`Unexpected CID/key found: ${entry.key}`) } - compareBlockData(entry.value, blocks_[foundIndex].encode(), `#${i++}`) + compareBlockData(entry.value, blocks_[foundIndex].binary, `#${i++}`) cids.splice(foundIndex, 1) blocks_.splice(foundIndex, 1) } @@ -45,7 +60,7 @@ describe('Read Stream', () => { const blocks_ = allBlocks.slice() const cids = [] for (const block of blocks_) { - cids.push((await block.cid()).toString()) + cids.push(await block.cid.toString()) } const carDs = await readStreaming(fs.createReadStream(path.join(__dirname, 'go.car'))) @@ -75,13 +90,13 @@ describe('Read Stream', () => { it('errors & immutability', async () => { const carDs = await readStreaming(fs.createReadStream(path.join(__dirname, 'go.car'))) - await assert.rejects(carDs.has(await allBlocks[0].cid())) - await assert.rejects(carDs.get(await allBlocks[0].cid())) + await assert.isRejected(carDs.has(allBlocks[0].cid)) + await assert.isRejected(carDs.get(allBlocks[0].cid)) // when we instantiate from a Stream, CarDatastore should be immutable - await assert.rejects(carDs.put(acid, Buffer.from('blip'))) - await assert.rejects(carDs.delete(acid, Buffer.from('blip'))) - await assert.rejects(carDs.setRoots(acid)) - await assert.rejects(carDs.setRoots([acid])) + await assert.isRejected(carDs.put(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.delete(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.setRoots(acid)) + await assert.isRejected(carDs.setRoots([acid])) }) }) diff --git a/test/test-readstreamcomplete.js b/test/test-readstreamcomplete.js index c0a4d9f..0810235 100644 --- a/test/test-readstreamcomplete.js +++ b/test/test-readstreamcomplete.js @@ -1,10 +1,25 @@ /* eslint-env mocha */ -const fs = require('fs') -const path = require('path') -const assert = require('assert') -const { readStreamComplete } = require('../car') -const { acid, makeData, verifyBlocks, verifyHas, verifyRoots } = require('./fixture-data') +import chai from 'chai' +import chaiAsPromised from 'chai-as-promised' +import fs from 'fs' +import path from 'path' +import multiformats from 'multiformats/basics' +import { acid, makeData, verifyBlocks, verifyHas, verifyRoots } from './fixture-data.js' +import dagCbor from '@ipld/dag-cbor' +import base58 from 'multiformats/bases/base58' +import Car from 'datastore-car' +import { fileURLToPath } from 'url' + +chai.use(chaiAsPromised) +const { assert } = chai + +const __filename = fileURLToPath(import.meta.url) +const __dirname = path.dirname(__filename) + +multiformats.add(dagCbor) +multiformats.multibase.add(base58) +const { readStreamComplete } = Car(multiformats) let rawBlocks @@ -19,7 +34,7 @@ describe('Read Stream', () => { await verifyHas(carDs) await verifyBlocks(carDs) await verifyRoots(carDs) - await assert.rejects(carDs.get(await rawBlocks[3].cid())) // doesn't exist + await assert.isRejected(carDs.get(rawBlocks[3].cid)) // doesn't exist await carDs.close() }) @@ -33,9 +48,9 @@ describe('Read Stream', () => { // when we instantiate from a Stream, CarDatastore should be immutable it('immutable', async () => { const carDs = await readStreamComplete(fs.createReadStream(path.join(__dirname, 'go.car'))) - await assert.rejects(carDs.put(acid, Buffer.from('blip'))) - await assert.rejects(carDs.delete(acid, Buffer.from('blip'))) - await assert.rejects(carDs.setRoots(acid)) - await assert.rejects(carDs.setRoots([acid])) + await assert.isRejected(carDs.put(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.delete(acid, new TextEncoder().encode('blip'))) + await assert.isRejected(carDs.setRoots(acid)) + await assert.isRejected(carDs.setRoots([acid])) }) }) diff --git a/verify-car.js b/verify-car.js new file mode 100755 index 0000000..d635f54 --- /dev/null +++ b/verify-car.js @@ -0,0 +1,63 @@ +#!/usr/bin/env node + +// Verify a car file's blocks have round-tripishness +import fs from 'fs' +import multiformats from 'multiformats/basics' +import car from 'datastore-car' +import dagCbor from '@ipld/dag-cbor' +import blake from '../../multiformats/js-multihashing/src/blake.js' + +if (!process.argv[2]) { + console.log('Usage: verify-car.js ') + process.exit(1) +} + +multiformats.add(dagCbor) + +// for filecoin +const blake2b = (() => { + const table = {} + blake.addFuncs(table) + const B = table[0xb220] + return (bin) => B().update(bin).digest() +})() + +multiformats.multihash.add({ + name: 'blake2b-256', + encode: blake2b, + code: 0xb220 +}) + +const CarDatastore = car(multiformats) + +function toHex (b) { + return b.reduce((hex, byte) => hex + byte.toString(16).padStart(2, '0'), '') +} + +async function example () { + const inStream = fs.createReadStream(process.argv[2]) + const readDs = await CarDatastore.readStreaming(inStream) + let count = 0 + for await (const { key, value } of readDs.query()) { + const cid = multiformats.CID.from(key) + const obj = multiformats.decode(value, cid.code) + const reenc = multiformats.encode(obj, cid.code) + const hashFn = multiformats.multihash.decode(cid.multihash).code + const mh = await multiformats.multihash.hash(reenc, hashFn) + const recid = multiformats.CID.create(1, cid.code, mh) + if (!recid.equals(cid)) { + console.log(`\nMismatch: ${cid} <> ${recid}`) + console.log(`Orig:\n${toHex(value)}\nRe-encode:\n${toHex(reenc)}`) + } else { + if (count++ % 100 === 0) { + process.stdout.write('.') + } + } + } + await readDs.close() +} + +example().catch((err) => { + console.error(err) + process.exit(1) +})