Skip to content

Commit

Permalink
v1.1.1: fix issue #42, #43 and #54
Browse files Browse the repository at this point in the history
  • Loading branch information
modesty committed Mar 10, 2016
1 parent 133a285 commit bdf1007
Show file tree
Hide file tree
Showing 9 changed files with 40 additions and 24 deletions.
6 changes: 3 additions & 3 deletions base/shared/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,10 @@ function error(msg) {
// Join the arguments into a single string for the lines below.
msg = [].join.call(arguments, ' ');
} else {
log('Error: ' + msg);
//log('Error: ' + msg);
}
log(backtrace());
PDFJS.LogManager.notify('error', msg);
//log(backtrace());
//PDFJS.LogManager.notify('error', msg);
throw new Error(msg);
}

Expand Down
12 changes: 6 additions & 6 deletions lib/p2jcmd.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ let PDF2JSONUtil = (function () {

let _continue = function(callback, err) {
if (err)
console.warn(err);
console.error(err);
if (nodeUtil.isFunction(callback))
callback(err);
};
Expand Down Expand Up @@ -122,7 +122,7 @@ let PDF2JSONUtil = (function () {

async.series(outputTasks, function(err, results){
if (err) {
console.warn("Error: " + err);
console.error("Error: " + err);
} else {
console.log("Output files OK", results);
}
Expand Down Expand Up @@ -181,7 +181,7 @@ let PDF2JSONUtil = (function () {
retVal = "Input error: input file name doesn't have pdf extention - " + this.inputFile + ".";
else {
this.outputFile = path.basename(this.inputPath, inExtName) + ".json";
this.outputPath = this.outputDir + path.sep + this.outputFile;
this.outputPath = this.outputDir + this.outputFile;
if (fs.existsSync(this.outputPath))
console.log("Output file will be replaced - " + this.outputPath);
else {
Expand Down Expand Up @@ -291,8 +291,8 @@ let PDFProcessor = (function () {

process.nextTick( () => {
console.timeEnd(_PRO_TIMER);
let exitCode = (this.inputCount === this.successCount) ? 0 : 1;
process.exit(exitCode);
//let exitCode = (this.inputCount === this.successCount) ? 0 : 1;
process.exit(0);
});
};

Expand All @@ -302,7 +302,7 @@ let PDFProcessor = (function () {

this.inputCount = 1;
this.p2j = new PDF2JSONUtil(inputDir, inputFile, this);
this.p2j.processFile( () => this.complete() );
this.p2j.processFile( data => this.complete(data) );
};

cls.prototype.processFiles = function(inputDir, files) {
Expand Down
33 changes: 20 additions & 13 deletions lib/pdf.js
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,7 @@ let PDFPageParser = (function () {
},
err => console.error("pdfPage.getAnnotations error:" + err));
},
error => pageViewDrawCallback.call(this, error)
err => pageViewDrawCallback.call(this, err)
);
};

Expand Down Expand Up @@ -240,18 +240,25 @@ let PDFJSClass = (function () {
// inherit from event emitter
nodeUtil.inherits(cls, nodeEvents.EventEmitter);

cls.prototype.raiseErrorEvent = function(errMsg) {
console.error(errMsg);
process.nextTick( () => this.emit("pdfjs_parseDataError", errMsg));
return errMsg;
};

cls.prototype.raiseReadyEvent = function(data) {
process.nextTick( () => this.emit("pdfjs_parseDataReady", data) );
return data;
};


cls.prototype.parsePDFData = function(arrayBuffer) {
this.pdfDocument = null;

let parameters = {password: '', data: arrayBuffer};
PDFJS.getDocument(parameters).then(
pdfDocument => this.load(pdfDocument, 1),
error => {
let errMsg = "An error occurred while parsing the PDF: " + error;
nodeUtil.p2jwarn(errMsg);
this.emit("pdfjs_parseDataError", errMsg);
},
progressData => nodeUtil.p2jinfo("Loading progress: " + progressData.loaded / progressData.total + "%")
error => this.raiseErrorEvent("An error occurred while parsing the PDF: " + error)
);
};

Expand Down Expand Up @@ -289,7 +296,7 @@ let PDFJSClass = (function () {

pagesPromise.then(
promisedPages => this.parsePage(promisedPages, 0, 1.5),
error => console.log("pagesPromise error: " + error)
error => this.raiseErrorEvent("pagesPromise error: " + error)
);

pdfDocument.getMetadata().then(
Expand All @@ -298,7 +305,7 @@ let PDFJSClass = (function () {
this.metadata = data.metadata;
this.parseMetaData();
},
error => console.log("pdfDocument.getMetadata error: " + error)
error => this.raiseErrorEvent("pdfDocument.getMetadata error: " + error)
);
};

Expand All @@ -325,7 +332,7 @@ let PDFJSClass = (function () {
formAttr.Parent = _getMetaDataInt(metadata, 'pdfx:parent');
}

this.emit("pdfjs_parseDataReady", {Transcoder: _PARSER_SIG, Agency:pdfTile, Id: formAttr});
this.raiseReadyEvent({Transcoder: _PARSER_SIG, Agency:pdfTile, Id: formAttr});
};

cls.prototype.parsePage = function(promisedPages, id, scale) {
Expand All @@ -337,7 +344,7 @@ let PDFJSClass = (function () {
function continueOnNextPage() {
nodeUtil.p2jinfo("complete parsing page:" + (id+1));
if (id === (this.pdfDocument.numPages - 1) ) {
this.emit("pdfjs_parseDataReady", {Pages:this.pages, Width: this.pageWidth});
this.raiseReadyEvent({Pages:this.pages, Width: this.pageWidth});
}
else {
process.nextTick(() => this.parsePage(promisedPages, ++id, scale));
Expand Down Expand Up @@ -369,14 +376,14 @@ let PDFJSClass = (function () {
nodeUtil.p2jinfo("complete parsing raw text content:" + (id+1));
continueOnNextPage.call(this);
},
error => console.error("pdfPage.getTextContent error: " + error)
error => this.raiseErrorEvent("pdfPage.getTextContent error: " + error)
);
}
else {
continueOnNextPage.call(this);
}
},
errMsg => this.emit("pdfjs_parseDataError", errMsg)
errMsg => this.raiseErrorEvent("parsePage error:" + errMsg)
);
};

Expand Down
1 change: 1 addition & 0 deletions lib/pdffont.js
Original file line number Diff line number Diff line change
Expand Up @@ -216,6 +216,7 @@ let PDFFont = (function PFPFontClosure() {
if (!this.bold) {
this.bold = this.typeName.indexOf("bold") >= 0 || this.typeName.indexOf("black") >= 0;
}
this.italic = fontObj.italic; // fix https://github.com/modesty/pdf2json/issues/42

let typeName = this.subType;
if (fontObj.isSerifFont) {
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "pdf2json",
"version": "1.0.9",
"version": "1.1.1",
"description": "A PDF file parser that converts PDF binaries to text based JSON, powered by porting a fork of PDF.JS to Node.js",
"keywords": [
"pdf",
Expand All @@ -27,7 +27,8 @@
},
"main": "./pdfparser.js",
"scripts": {
"test": "cd ./test && sh p2j.forms.sh"
"test": "cd ./test && sh p2j.forms.sh",
"test-misc": "node pdf2json.js -f ./test/pdf/misc/ -o ./test/target/misc/ -c -m"
},
"engines": {
"node": ">=4.0"
Expand Down
7 changes: 7 additions & 0 deletions readme.md
Original file line number Diff line number Diff line change
Expand Up @@ -752,6 +752,13 @@ Or, from `npm scripts`:
npm test
````

Some testing PDFs are provided by bug reporters, like the "unsupported encryption" ([#43](https://github.com/modesty/pdf2json/issues/43)), "read property num from undefined" ([#26](https://github.com/modesty/pdf2json/issues/26)), and "excessive line breaks in text content" ([#28](https://github.com/modesty/pdf2json/issues/28)), their PDFs are all stored in `test/pdf/misc` directory. To run tests against these community contributed PDFs, run commandline:

````
npm run-script test-misc
````


## Upgrading to ~v1.0.x

If you have an early version of pdf2json, please remove your local `node_modules` directory and re-run `npm install` to upgrade to [email protected].
Expand Down
Binary file added test/pdf/misc/i26_crash_18277.pdf
Binary file not shown.
Binary file added test/pdf/misc/i28_line_break_210.pdf
Binary file not shown.
Binary file added test/pdf/misc/i43_encrypted.pdf
Binary file not shown.

0 comments on commit bdf1007

Please sign in to comment.