Simplify calculating hash from file with chunked-file-reader

The chunked-file-reader comes with the functionality of reading a file in chunks, so we can simplify the file example a lot by offloading this logic to that package. I think this will make it much more approachable for people wanting to reuse that code. The chunked-file-reader package uses `readAsArrayBuffer()`, and we cannot use it for tests that use `readAsBinaryString()`. Also, chunked-file-reader always uses `File.prototype.slice`, but I think that's ok now, since `blob.mozSlice()` is only needed for Firefox 12 and earlier, though I don't on which version did Safari start supporting `File.prototype.slice` (I tested that it works on Safari 11 which is the current latest version). Closes satazor#48
janko · Apr 27, 2018 · 078eaaa · 078eaaa
1 parent a7ee21c
commit 078eaaa
Show file tree

Hide file tree

Showing 5 changed files with 181 additions and 102 deletions.
diff --git a/README.md b/README.md
@@ -48,44 +48,29 @@ var rawHash = spark.end(true);                  // OR raw hash (binary string)
 
 ### Hash a file incrementally
 
+If you want to calculate an MD5 hash of a file, it's recommended to read the
+file in chunks and calculate the hash incrementally. For reading a file in
+chunks you can use the [chunked-file-reader](https://www.npmjs.com/package/chunked-file-reader)
+package.
+
 NOTE: If you test the code bellow using the file:// protocol in chrome you must start the browser with -allow-file-access-from-files argument.
       Please see: http://code.google.com/p/chromium/issues/detail?id=60889
 
 ```js
 document.getElementById('file').addEventListener('change', function () {
-    var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
-        file = this.files[0],
-        chunkSize = 2097152,                             // Read in chunks of 2MB
-        chunks = Math.ceil(file.size / chunkSize),
-        currentChunk = 0,
-        spark = new SparkMD5.ArrayBuffer(),
-        fileReader = new FileReader();
-
-    fileReader.onload = function (e) {
-        console.log('read chunk nr', currentChunk + 1, 'of', chunks);
-        spark.append(e.target.result);                   // Append array buffer
-        currentChunk++;
-
-        if (currentChunk < chunks) {
-            loadNext();
-        } else {
-            console.log('finished loading');
-            console.info('computed hash', spark.end());  // Compute hash
-        }
-    };
-
-    fileReader.onerror = function () {
-        console.warn('oops, something went wrong.');
-    };
-
-    function loadNext() {
-        var start = currentChunk * chunkSize,
-            end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;
-
-        fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
-    }
-
-    loadNext();
+    var file   = this.files[0],
+        spark  = new SparkMD5.ArrayBuffer(),
+        reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 }); // https://www.npmjs.com/package/chunked-file-reader
+
+    reader.subscribe('chunk', function (e) {
+        spark.append(e.chunk);
+    });
+
+    reader.subscribe('end', function (e) {
+        console.info('computed hash', spark.end());
+    });
+
+    reader.readChunks(file);
 });
 ```
 

diff --git a/test/file_reader.html b/test/file_reader.html
@@ -5,6 +5,7 @@
         <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
         <link rel="stylesheet" href="css/bootstrap-1.4.min.css">
         <script src="../spark-md5.js" type="text/javascript"></script>
+        <script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>
 
         <style type="text/css" media="screen">
             .alert-message {
@@ -40,8 +41,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
         <div id="log"></div>
 
         <script type="text/javascript">
-            var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
-                log = document.getElementById('log'),
+            var log = document.getElementById('log'),
                 input = document.getElementById('file'),
                 running = false,
                 ua = navigator.userAgent.toLowerCase();
@@ -64,20 +64,17 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
                     return;
                 }
 
-                var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
-                    file = input.files[0],
-                    chunkSize = 2097152,                           // read in chunks of 2MB
-                    chunks = Math.ceil(file.size / chunkSize),
-                    currentChunk = 0,
+                var file = input.files[0],
                     spark = new SparkMD5.ArrayBuffer(),
+                    currentChunk = 0,
                     time,
                     uniqueId = 'chunk_' + (new Date().getTime()),
                     chunkId = null,
-                    fileReader = new FileReader();
+                    reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 });
 
-                fileReader.onload = function (e) {
+                reader.subscribe('chunk', function (e) {
                     if (currentChunk === 0) {
-                        registerLog('Read chunk number <strong id="' + uniqueId + '">' + (currentChunk + 1) + '</strong> of <strong>' + chunks + '</strong><br/>', 'info');
+                        registerLog('Read chunk number <strong id="' + uniqueId + '">' + (currentChunk + 1) + '</strong><br/>', 'info');
                     } else {
                         if (chunkId === null) {
                             chunkId = document.getElementById(uniqueId);
@@ -86,35 +83,22 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
                         chunkId.innerHTML = currentChunk + 1;
                     }
 
-                    spark.append(e.target.result);                 // append array buffer
-                    currentChunk += 1;
+                    spark.append(e.chunk); // append array buffer
 
-                    if (currentChunk < chunks) {
-                        loadNext();
-                    } else {
-                        running = false;
-                        registerLog('<strong>Finished loading!</strong><br/>', 'success');
-                        registerLog('<strong>Computed hash:</strong> ' + spark.end() + '<br/>', 'success'); // compute hash
-                        registerLog('<strong>Total time:</strong> ' + (new Date().getTime() - time) + 'ms<br/>', 'success');
-                    }
-                };
+                    currentChunk += 1;
+                });
 
-                fileReader.onerror = function () {
+                reader.subscribe('end', function (e) {
                     running = false;
-                    registerLog('<strong>Oops, something went wrong.</strong>', 'error');
-                };
-
-                function loadNext() {
-                    var start = currentChunk * chunkSize,
-                        end = start + chunkSize >= file.size ? file.size : start + chunkSize;
-
-                    fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
-                }
+                    registerLog('<strong>Finished loading!</strong><br/>', 'success');
+                    registerLog('<strong>Computed hash:</strong> ' + spark.end() + '<br/>', 'success'); // compute hash
+                    registerLog('<strong>Total time:</strong> ' + (new Date().getTime() - time) + 'ms<br/>', 'success');
+                });
 
                 running = true;
                 registerLog('<p></p><strong>Starting incremental test (' + file.name + ')</strong><br/>', 'info');
                 time = new Date().getTime();
-                loadNext();
+                reader.readChunks(file);
             }
 
             function doNormalTest() {
@@ -160,7 +144,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
                 }
             }
 
-            if (!('FileReader' in window) || !('File' in window) || !blobSlice) {
+            if (!('FileReader' in window) || !('File' in window) || !File.prototype.slice) {
                 registerLog('<p><strong>Your browser does not support the FileAPI or slicing of files.</strong></p>', 'error');
             } else {
                 registerLog('Keep your devtools closed otherwise this example will be a LOT slower', 'info');

diff --git a/test/file_reader_binary.html b/test/file_reader_binary.html
@@ -5,6 +5,7 @@
         <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
         <link rel="stylesheet" href="css/bootstrap-1.4.min.css">
         <script src="../spark-md5.js" type="text/javascript"></script>
+        <script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>
 
         <style type="text/css" media="screen">
             .alert-message {
@@ -40,8 +41,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
         <div id="log"></div>
 
         <script type="text/javascript">
-            var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
-                log = document.getElementById('log'),
+            var log = document.getElementById('log'),
                 input = document.getElementById('file'),
                 running = false,
                 ua = navigator.userAgent.toLowerCase();
@@ -64,8 +64,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
                     return;
                 }
 
-                var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
-                    file = input.files[0],
+                var file = input.files[0],
                     chunkSize = 2097152,                           // read in chunks of 2MB
                     chunks = Math.ceil(file.size / chunkSize),
                     currentChunk = 0,
@@ -108,7 +107,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
                     var start = currentChunk * chunkSize,
                         end = start + chunkSize >= file.size ? file.size : start + chunkSize;
 
-                    fileReader.readAsBinaryString(blobSlice.call(file, start, end));
+                    fileReader.readAsBinaryString(file.slice(start, end));
                 }
 
                 running = true;
@@ -160,7 +159,7 @@ <h4>Please note that the advantage of doing an incremental md5 is to keep memory
                 }
             }
 
-            if (!('FileReader' in window) || !('File' in window) || !blobSlice) {
+            if (!('FileReader' in window) || !('File' in window) || !File.prototype.slice) {
                 registerLog('<p><strong>Your browser does not support the FileAPI or slicing of files.</strong></p>', 'error');
             } else {
                 registerLog('Keep your devtools closed otherwise this example will be a LOT slower', 'info');

diff --git a/test/js/chunked-file-reader-0.0.3.js b/test/js/chunked-file-reader-0.0.3.js
@@ -0,0 +1,130 @@
+(function(root, factory){
+    if(typeof define === 'function' && define.amd)
+    {
+        define(factory);
+    }
+    else if(typeof exports === 'object' && typeof module != 'undefined')
+    {
+        module.exports= factory();
+    }
+    else
+    {
+        this.ChunkedFileReader= factory();
+    }
+}(this, function(){
+    'use strict';
+
+    /**
+     * Create a new instance of ChunkedFileReader.
+     *
+     * @class ChunkedFileReader
+     * @constructor
+     * @param opts {object} The options.
+     *   Valid options are:
+     *     maxChunkSize - Maximum chunk size
+     */
+    var ChunkedFileReader= function(opts){
+        opts || (opts= {});
+
+        this.maxChunkSize= (opts.maxChunkSize || 256 * 1024);
+        this.listeners= {};
+    };
+
+    /**
+     * Subscribe a event.
+     *
+     * @method subscribe
+     * @param eventName {string} The event name to be subscribed
+     * @param listener {function} The listener function to be invoked on events
+     * @param thisObj {any} The `this' object to be used for invoking listener function
+     */
+    ChunkedFileReader.prototype.subscribe= function(eventName, listener, thisObj){
+        this.listeners[eventName]= (this.listeners[eventName] || []);
+        this.listeners[eventName].push({
+            ctx: thisObj,
+            fun: listener
+        });
+    };
+
+    /**
+     * **Internal use**
+     *
+     * @method publish
+     * @param eventName {string} The event name
+     * @param eventArgs {object} The event args to be passed each listeners
+     */
+    ChunkedFileReader.prototype.publish= function(eventName, eventArgs){
+        (this.listeners[eventName] || []).forEach(function(listener){
+            listener.fun.call(listener.ctx, eventArgs);
+        }, this);
+    };
+
+    /**
+     * Read chunks from File object.
+     *
+     * It produces some events:<br>
+     * <ul>
+     *   <li>"begin" - On started file reading.</li>
+     *   <li>"progress" - On progress changed.</li>
+     *   <li>"chunk" - On read a chunk.</li>
+     *   <li>"end" - On Finished reading.</li>
+     * </ul>
+     *
+     * @method readChunks
+     * @param input {blob} The Blob (File) object
+     */
+    ChunkedFileReader.prototype.readChunks= function(input){
+        var chunkSize= Math.min(this.maxChunkSize, input.size);
+        var remainingBytes= input.size;
+        var nchunks= (remainingBytes % chunkSize === 0)
+            ? remainingBytes / chunkSize
+            : parseInt(remainingBytes / chunkSize) + 1;
+
+        var pos= 0;
+        var reader= new FileReader(input);
+        var seq= 1;
+        var that= this;
+        reader.onloadend= function(evt){
+            if(evt.target.readyState !== FileReader.DONE)
+            {
+                return;
+            }
+
+            that.publish('progress', {
+                nchunks: nchunks,
+                done: seq,
+                done_ratio: (seq / nchunks)
+            });
+            that.publish('chunk', {
+                seq: seq,
+                nchunks: nchunks,
+                chunk: evt.target.result
+            });
+            ++seq;
+
+            pos+= chunkSize;
+            remainingBytes-= chunkSize;
+            if(remainingBytes < chunkSize)
+            {
+                chunkSize= remainingBytes;
+            }
+            if(remainingBytes > 0)
+            {
+                reader.readAsArrayBuffer(input.slice(pos, pos + chunkSize));
+            }
+            else
+            {
+                that.publish('end', {
+                    nchunks: nchunks,
+                });
+            }
+        };
+
+        this.publish('begin', {
+            nchunks: nchunks
+        });
+
+        reader.readAsArrayBuffer(input.slice(pos, pos + chunkSize));
+    };
+    return ChunkedFileReader;
+}));
diff --git a/test/readme_example.html b/test/readme_example.html
@@ -4,45 +4,26 @@
         <title>SparkMD5 readme example</title>
         <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
         <script src="../spark-md5.js"></script>
+        <script src="js/chunked-file-reader-0.0.3.js" type="text/javascript"></script>
     </head>
     <body onload="init()">
         <input type="file" id="file" />
         <script>
             function init() {
                 document.getElementById('file').addEventListener('change', function () {
-                    var blobSlice = File.prototype.slice || File.prototype.mozSlice || File.prototype.webkitSlice,
-                        file = this.files[0],
-                        chunkSize = 2097152,                             // Read in chunks of 2MB
-                        chunks = Math.ceil(file.size / chunkSize),
-                        currentChunk = 0,
-                        spark = new SparkMD5.ArrayBuffer(),
-                        fileReader = new FileReader();
+                    var file   = this.files[0],
+                        spark  = new SparkMD5.ArrayBuffer(),
+                        reader = new ChunkedFileReader({ maxChunkSize: 2*1024*1024 }); // https://www.npmjs.com/package/chunked-file-reader
 
-                    fileReader.onload = function (e) {
-                        console.log('read chunk nr', currentChunk + 1, 'of', chunks);
-                        spark.append(e.target.result);                   // Append array buffer
-                        currentChunk++;
+                    reader.subscribe('chunk', function (e) {
+                        spark.append(e.chunk);
+                    });
 
-                        if (currentChunk < chunks) {
-                            loadNext();
-                        } else {
-                            console.log('finished loading');
-                            console.info('computed hash', spark.end());  // Compute hash
-                        }
-                    };
+                    reader.subscribe('end', function (e) {
+                        console.info('computed hash', spark.end());
+                    });
 
-                    fileReader.onerror = function () {
-                        console.warn('oops, something went wrong.');
-                    };
-
-                    function loadNext() {
-                        var start = currentChunk * chunkSize,
-                            end = ((start + chunkSize) >= file.size) ? file.size : start + chunkSize;
-
-                        fileReader.readAsArrayBuffer(blobSlice.call(file, start, end));
-                    }
-
-                    loadNext();
+                    reader.readChunks(file);
                 });
             }
         </script>