Skip to content

Commit

Permalink
Simplify calculating hash from file with chunked-file-reader
Browse files Browse the repository at this point in the history
The chunked-file-reader comes with the functionality of reading a file
in chunks, so we can simplify the file example a lot by offloading this
logic to that package. I think this will make it much more approachable
for people wanting to reuse that code.

The chunked-file-reader package uses `readAsArrayBuffer()`, and we cannot
use it for tests that use `readAsBinaryString()`.

Also, chunked-file-reader always uses `File.prototype.slice`, but I think
that's ok now, since `blob.mozSlice()` is only needed for Firefox 12 and
earlier, though I don't on which version did Safari start supporting
`File.prototype.slice` (I tested that it works on Safari 11 which is the
current latest version).

Closes satazor#48
  • Loading branch information
janko committed Apr 27, 2018
1 parent a7ee21c commit 078eaaa
Show file tree
Hide file tree
Showing 5 changed files with 181 additions and 102 deletions.
51 changes: 18 additions & 33 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -48,44 +48,29 @@ var rawHash = spark.end(true); // OR raw hash (binary string)

### Hash a file incrementally

If you want to calculate an MD5 hash of a file, it's recommended to read the
file in chunks and calculate the hash incrementally. For reading a file in
chunks you can use the [chunked-file-reader](https://www.npmjs.com/package/chunked-file-reader)
package.

NOTE: If you test the code bellow using the file:// protocol in chrome you must start the browser with -allow-file-access-from-files argument.
Please see: http://code.google.com/p/chromium/issues/detail?id=60889

```js
document.getElementById('file').addEventListener('change', function () {
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
file = this.files[0],
chunkSize = 2097152, // Read in chunks of 2MB
chunks = Math.ceil(file.size / chunkSize),
currentChunk = 0,
spark = new SparkMD5.ArrayBuffer(),
fileReader = new FileReader();

fileReader.onload = function (e) {
console.log('read chunk nr', currentChunk + 1, 'of', chunks);
spark.append(e.target.result); // Append array buffer
currentChunk++;

if (currentChunk < chunks) {
loadNext();
} else {
console.log('finished loading');
console.info('computed hash', spark.end()); // Compute hash
}
};

fileReader.onerror = function () {
console.warn('oops, something went wrong.');
};

function loadNext() {
var start = currentChunk * chunkSize,
end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;

fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
}

loadNext();
var file = this.files[0],
spark = new SparkMD5.ArrayBuffer(),
reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 }); // https://www.npmjs.com/package/chunked-file-reader

reader.subscribe('chunk', function (e) {
spark.append(e.chunk);
});

reader.subscribe('end', function (e) {
console.info('computed hash', spark.end());
});

reader.readChunks(file);
});
```

Expand Down
50 changes: 17 additions & 33 deletions test/file_reader.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link rel="stylesheet" href="css/bootstrap-1.4.min.css">
<script src="../spark-md5.js" type="text/javascript"></script>
<script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>

<style type="text/css" media="screen">
.alert-message {
Expand Down Expand Up @@ -40,8 +41,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
<div id="log"></div>

<script type="text/javascript">
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
log = document.getElementById('log'),
var log = document.getElementById('log'),
input = document.getElementById('file'),
running = false,
ua = navigator.userAgent.toLowerCase();
Expand All @@ -64,20 +64,17 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
return;
}

var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
file = input.files[0],
chunkSize = 2097152, // read in chunks of 2MB
chunks = Math.ceil(file.size / chunkSize),
currentChunk = 0,
var file = input.files[0],
spark = new SparkMD5.ArrayBuffer(),
currentChunk = 0,
time,
uniqueId = 'chunk_' + (new Date().getTime()),
chunkId = null,
fileReader = new FileReader();
reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 });

fileReader.onload = function (e) {
reader.subscribe('chunk', function (e) {
if (currentChunk === 0) {
registerLog('Read chunk number <strong id="' + uniqueId + '">' + (currentChunk + 1) + '</strong> of <strong>' + chunks + '</strong><br/>', 'info');
registerLog('Read chunk number <strong id="' + uniqueId + '">' + (currentChunk + 1) + '</strong><br/>', 'info');
} else {
if (chunkId === null) {
chunkId = document.getElementById(uniqueId);
Expand All @@ -86,35 +83,22 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
chunkId.innerHTML = currentChunk + 1;
}

spark.append(e.target.result); // append array buffer
currentChunk += 1;
spark.append(e.chunk); // append array buffer

if (currentChunk < chunks) {
loadNext();
} else {
running = false;
registerLog('<strong>Finished loading!</strong><br/>', 'success');
registerLog('<strong>Computed hash:</strong> ' + spark.end() + '<br/>', 'success'); // compute hash
registerLog('<strong>Total time:</strong> ' + (new Date().getTime() - time) + 'ms<br/>', 'success');
}
};
currentChunk += 1;
});

fileReader.onerror = function () {
reader.subscribe('end', function (e) {
running = false;
registerLog('<strong>Oops, something went wrong.</strong>', 'error');
};

function loadNext() {
var start = currentChunk * chunkSize,
end = start + chunkSize >= file.size ? file.size : start + chunkSize;

fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
}
registerLog('<strong>Finished loading!</strong><br/>', 'success');
registerLog('<strong>Computed hash:</strong> ' + spark.end() + '<br/>', 'success'); // compute hash
registerLog('<strong>Total time:</strong> ' + (new Date().getTime() - time) + 'ms<br/>', 'success');
});

running = true;
registerLog('<p></p><strong>Starting incremental test (' + file.name + ')</strong><br/>', 'info');
time = new Date().getTime();
loadNext();
reader.readChunks(file);
}

function doNormalTest() {
Expand Down Expand Up @@ -160,7 +144,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
}
}

if (!('FileReader' in window) || !('File' in window) || !blobSlice) {
if (!('FileReader' in window) || !('File' in window) || !File.prototype.slice) {
registerLog('<p><strong>Your browser does not support the FileAPI or slicing of files.</strong></p>', 'error');
} else {
registerLog('Keep your devtools closed otherwise this example will be a LOT slower', 'info');
Expand Down
11 changes: 5 additions & 6 deletions test/file_reader_binary.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<link rel="stylesheet" href="css/bootstrap-1.4.min.css">
<script src="../spark-md5.js" type="text/javascript"></script>
<script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>

<style type="text/css" media="screen">
.alert-message {
Expand Down Expand Up @@ -40,8 +41,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
<div id="log"></div>

<script type="text/javascript">
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
log = document.getElementById('log'),
var log = document.getElementById('log'),
input = document.getElementById('file'),
running = false,
ua = navigator.userAgent.toLowerCase();
Expand All @@ -64,8 +64,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
return;
}

var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
file = input.files[0],
var file = input.files[0],
chunkSize = 2097152, // read in chunks of 2MB
chunks = Math.ceil(file.size / chunkSize),
currentChunk = 0,
Expand Down Expand Up @@ -108,7 +107,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
var start = currentChunk * chunkSize,
end = start + chunkSize >= file.size ? file.size : start + chunkSize;

fileReader.readAsBinaryString(blobSlice.call(file, start, end));
fileReader.readAsBinaryString(file.slice(start, end));
}

running = true;
Expand Down Expand Up @@ -160,7 +159,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
}
}

if (!('FileReader' in window) || !('File' in window) || !blobSlice) {
if (!('FileReader' in window) || !('File' in window) || !File.prototype.slice) {
registerLog('<p><strong>Your browser does not support the FileAPI or slicing of files.</strong></p>', 'error');
} else {
registerLog('Keep your devtools closed otherwise this example will be a LOT slower', 'info');
Expand Down
130 changes: 130 additions & 0 deletions test/js/chunked-file-reader-0.0.3.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
(function(root, factory){
if(typeof define === 'function' && define.amd)
{
define(factory);
}
else if(typeof exports === 'object' && typeof module != 'undefined')
{
module.exports= factory();
}
else
{
this.ChunkedFileReader= factory();
}
}(this, function(){
'use strict';

/**
* Create a new instance of ChunkedFileReader.
*
* @class ChunkedFileReader
* @constructor
* @param opts {object} The options.
* Valid options are:
* maxChunkSize - Maximum chunk size
*/
var ChunkedFileReader= function(opts){
opts || (opts= {});

this.maxChunkSize= (opts.maxChunkSize || 256 * 1024);
this.listeners= {};
};

/**
* Subscribe a event.
*
* @method subscribe
* @param eventName {string} The event name to be subscribed
* @param listener {function} The listener function to be invoked on events
* @param thisObj {any} The `this' object to be used for invoking listener function
*/
ChunkedFileReader.prototype.subscribe= function(eventName, listener, thisObj){
this.listeners[eventName]= (this.listeners[eventName] || []);
this.listeners[eventName].push({
ctx: thisObj,
fun: listener
});
};

/**
* **Internal use**
*
* @method publish
* @param eventName {string} The event name
* @param eventArgs {object} The event args to be passed each listeners
*/
ChunkedFileReader.prototype.publish= function(eventName, eventArgs){
(this.listeners[eventName] || []).forEach(function(listener){
listener.fun.call(listener.ctx, eventArgs);
}, this);
};

/**
* Read chunks from File object.
*
* It produces some events:<br>
* <ul>
* <li>"begin" - On started file reading.</li>
* <li>"progress" - On progress changed.</li>
* <li>"chunk" - On read a chunk.</li>
* <li>"end" - On Finished reading.</li>
* </ul>
*
* @method readChunks
* @param input {blob} The Blob (File) object
*/
ChunkedFileReader.prototype.readChunks= function(input){
var chunkSize= Math.min(this.maxChunkSize, input.size);
var remainingBytes= input.size;
var nchunks= (remainingBytes % chunkSize === 0)
? remainingBytes / chunkSize
: parseInt(remainingBytes / chunkSize) + 1;

var pos= 0;
var reader= new FileReader(input);
var seq= 1;
var that= this;
reader.onloadend= function(evt){
if(evt.target.readyState !== FileReader.DONE)
{
return;
}

that.publish('progress', {
nchunks: nchunks,
done: seq,
done_ratio: (seq / nchunks)
});
that.publish('chunk', {
seq: seq,
nchunks: nchunks,
chunk: evt.target.result
});
++seq;

pos+= chunkSize;
remainingBytes-= chunkSize;
if(remainingBytes < chunkSize)
{
chunkSize= remainingBytes;
}
if(remainingBytes > 0)
{
reader.readAsArrayBuffer(input.slice(pos, pos + chunkSize));
}
else
{
that.publish('end', {
nchunks: nchunks,
});
}
};

this.publish('begin', {
nchunks: nchunks
});

reader.readAsArrayBuffer(input.slice(pos, pos + chunkSize));
};
return ChunkedFileReader;
}));
41 changes: 11 additions & 30 deletions test/readme_example.html
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,26 @@
<title>SparkMD5 readme example</title>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<script src="../spark-md5.js"></script>
<script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>
</head>
<body onload="init()">
<input type="file" id="file" />
<script>
function init() {
document.getElementById('file').addEventListener('change', function () {
var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
file = this.files[0],
chunkSize = 2097152, // Read in chunks of 2MB
chunks = Math.ceil(file.size / chunkSize),
currentChunk = 0,
spark = new SparkMD5.ArrayBuffer(),
fileReader = new FileReader();
var file = this.files[0],
spark = new SparkMD5.ArrayBuffer(),
reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 }); // https://www.npmjs.com/package/chunked-file-reader

fileReader.onload = function (e) {
console.log('read chunk nr', currentChunk + 1, 'of', chunks);
spark.append(e.target.result); // Append array buffer
currentChunk++;
reader.subscribe('chunk', function (e) {
spark.append(e.chunk);
});

if (currentChunk < chunks) {
loadNext();
} else {
console.log('finished loading');
console.info('computed hash', spark.end()); // Compute hash
}
};
reader.subscribe('end', function (e) {
console.info('computed hash', spark.end());
});

fileReader.onerror = function () {
console.warn('oops, something went wrong.');
};

function loadNext() {
var start = currentChunk * chunkSize,
end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;

fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
}

loadNext();
reader.readChunks(file);
});
}
</script>
Expand Down

0 comments on commit 078eaaa

Please sign in to comment.