From 3705e5e45986d66c5bb644f22b17de8e341839c9 Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Fri, 24 Mar 2017 17:24:30 +0100 Subject: [PATCH 1/2] Use a proper `MessageHandler` for `PartialEvaluator.getTextContent` to avoid errors for fonts relying on built-in CMap files (PR 8064 follow-up) *My apologies for inadvertently breaking this in PR 8064; apparently we don't have any tests that cover this use-case :(* Without this patch `getTextContent` will fail if called before `getOperatorList`, since loading of fonts during text-extraction may require fetching of built-in CMap files. *Please note:* The `text` test added here, which uses an already existing PDF file, fails without this patch. --- src/core/document.js | 7 +------ src/core/worker.js | 2 +- test/test_manifest.json | 9 ++++++++- 3 files changed, 10 insertions(+), 8 deletions(-) diff --git a/src/core/document.js b/src/core/document.js index 716b9a21dd9d4..0cb7dfe0e7f6e 100644 --- a/src/core/document.js +++ b/src/core/document.js @@ -310,14 +310,9 @@ var Page = (function PageClosure() { }); }, - extractTextContent: function Page_extractTextContent(task, + extractTextContent: function Page_extractTextContent(handler, task, normalizeWhitespace, combineTextItems) { - var handler = { - on: function nullHandlerOn() {}, - send: function nullHandlerSend() {} - }; - var self = this; var pdfManager = this.pdfManager; diff --git a/src/core/worker.js b/src/core/worker.js index 756c9e8a602ce..ec8aa4b1c18fa 100644 --- a/src/core/worker.js +++ b/src/core/worker.js @@ -906,7 +906,7 @@ var WorkerMessageHandler = { startWorkerTask(task); var pageNum = pageIndex + 1; var start = Date.now(); - return page.extractTextContent(task, normalizeWhitespace, + return page.extractTextContent(handler, task, normalizeWhitespace, combineTextItems).then( function(textContent) { finishWorkerTask(task); diff --git a/test/test_manifest.json b/test/test_manifest.json index a266aea45a2ca..e99a638f6e1fc 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -2757,10 +2757,17 @@ "md5": "797093d67c4d4d4231ac6e1fb66bf6c3", "rounds": 1, "link": true, - "firstPage": 1, "lastPage": 1, "type": "eq" }, + { "id": "mao-text", + "file": "pdfs/mao.pdf", + "md5": "797093d67c4d4d4231ac6e1fb66bf6c3", + "rounds": 1, + "link": true, + "lastPage": 1, + "type": "text" + }, { "id": "noembed-identity", "file": "pdfs/noembed-identity.pdf", "md5": "05d3803b6c22451e18cb60d8d8c75c0c", From 5c0c122a7da425f84b675ed665dc14bd9b7d166e Mon Sep 17 00:00:00 2001 From: Jonas Jenwald Date: Sat, 25 Mar 2017 17:43:51 +0100 Subject: [PATCH 2/2] Ensure that the `XMLHttpRequest` is `open`ed before attempting to set the `responseType` in the `DOMCMapReaderFactory`, since IE fails otherwise (issue 8193) I really cannot understand why this change is necessary, since modern browsers such as Firefox and Chrome work just fine with the old code. Hence this is patch is yet another "hack" that's needed just because IE apparently cannot just work like you'd expect. For consistency, the Node factory used in the CMap unit-tests is changed as well. Fixes 8193. --- src/display/dom_utils.js | 13 +++++++------ test/unit/test_utils.js | 12 +++++------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/src/display/dom_utils.js b/src/display/dom_utils.js index 185303a28ac6f..37ea546438f20 100644 --- a/src/display/dom_utils.js +++ b/src/display/dom_utils.js @@ -76,15 +76,17 @@ var DOMCMapReaderFactory = (function DOMCMapReaderFactoryClosure() { DOMCMapReaderFactory.prototype = { fetch: function(params) { - if (!params.name) { + var name = params.name; + if (!name) { return Promise.reject(new Error('CMap name must be specified.')); } return new Promise(function (resolve, reject) { - var url = this.baseUrl + params.name; + var url = this.baseUrl + name + (this.isCompressed ? '.bcmap' : ''); var request = new XMLHttpRequest(); + request.open('GET', url, true); + if (this.isCompressed) { - url += '.bcmap'; request.responseType = 'arraybuffer'; } request.onreadystatechange = function () { @@ -105,12 +107,11 @@ var DOMCMapReaderFactory = (function DOMCMapReaderFactoryClosure() { return; } reject(new Error('Unable to load ' + - (this.isCompressed ? 'binary' : '') + - ' CMap at: ' + url)); + (this.isCompressed ? 'binary ' : '') + + 'CMap at: ' + url)); } }.bind(this); - request.open('GET', url, true); request.send(null); }.bind(this)); }, diff --git a/test/unit/test_utils.js b/test/unit/test_utils.js index 7d37c0e2166b5..bf28eeba86f5a 100644 --- a/test/unit/test_utils.js +++ b/test/unit/test_utils.js @@ -35,21 +35,19 @@ var NodeCMapReaderFactory = (function NodeCMapReaderFactoryClosure() { NodeCMapReaderFactory.prototype = { fetch: function(params) { - if (!params.name) { + var name = params.name; + if (!name) { return Promise.reject(new Error('CMap name must be specified.')); } return new Promise(function (resolve, reject) { - var url = this.baseUrl + params.name; + var url = this.baseUrl + name + (this.isCompressed ? '.bcmap' : ''); var fs = require('fs'); - if (this.isCompressed) { - url += '.bcmap'; - } fs.readFile(url, function (error, data) { if (error || !data) { reject(new Error('Unable to load ' + - (this.isCompressed ? 'binary' : '') + - ' CMap at: ' + url)); + (this.isCompressed ? 'binary ' : '') + + 'CMap at: ' + url)); return; } resolve({