Skip to content

Commit

Permalink
v1.0.9: fixed issue #21 and #26
Browse files Browse the repository at this point in the history
  • Loading branch information
modesty committed Mar 9, 2016
1 parent 448f71f commit 133a285
Show file tree
Hide file tree
Showing 13 changed files with 208 additions and 210 deletions.
3 changes: 2 additions & 1 deletion base/core/obj.js
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,8 @@ var RefSetCache = (function RefSetCacheClosure() {
},

has: function RefSetCache_has(ref) {
return ('R' + ref.num + '.' + ref.gen) in this.dict;
//MQZ. 03/08/2016 fix https://github.com/modesty/pdf2json/issues/26
return !!ref ? ('R' + ref.num + '.' + ref.gen) in this.dict : false;
},

put: function RefSetCache_put(ref, obj) {
Expand Down
2 changes: 1 addition & 1 deletion lib/p2jcmd.js
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ let optimist = require('optimist')
.alias('t', 'fieldTypes')
.describe('t', '(optional) when specified, will generate .fields.json that includes fields ids and types.\n')
.alias('c', 'content')
.describe('c', '(optional) when specified, will generate .content.txt that includes text content from PDF (Experimental).\n')
.describe('c', '(optional) when specified, will generate .content.txt that includes text content from PDF.\n')
.alias('m', 'merge')
.describe('m', '(optional) when specified, will generate .merged.json that includes auto-merged broken text blocks from PDF (Experimental).\n');

Expand Down
20 changes: 18 additions & 2 deletions lib/pdf.js
Original file line number Diff line number Diff line change
Expand Up @@ -386,10 +386,26 @@ let PDFJSClass = (function () {
return retVal;

_.each(this.rawTextContents, function(textContent, index) {
let prevText = null;
_.each(textContent.bidiTexts, function(textObj, idx) {
retVal += textObj.str + "\r\n";
if (prevText) {
if (Math.abs(textObj.y - prevText.y) <= 20) {
prevText.str += textObj.str;
}
else {
retVal += prevText.str + "\r\n";
prevText = textObj;
}
}
else {
prevText = textObj;
}

});
retVal += "----------------Page (" + index + ") Break----------------\r\n";
if (prevText) {
retVal += prevText.str;
}
retVal += "\r\n----------------Page (" + index + ") Break----------------\r\n";
});

return retVal;
Expand Down
59 changes: 27 additions & 32 deletions lib/pdfanno.js
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
var nodeUtil = require("util"),
'use strict';

let nodeUtil = require("util"),
_ = require("underscore"),
PDFUnit = require('./pdfunit.js');

var PDFAnno = (function PDFAnnoClosure() {
'use strict';

let PDFAnno = (function PDFAnnoClosure() {
//BEGIN - MQZ 9/19/2012. Helper functions to parse acroForm elements
function setupRadioButton(annotation, item) {
var asName = '';
let asName = '';
//PDF Spec p.689: parent item's DV holds the item's value that is selected by default
var po = annotation.get('Parent');
let po = annotation.get('Parent');
if (po) {
po.forEach(function(key, val){
if (key === 'DV') {
Expand All @@ -25,9 +25,9 @@ var PDFAnno = (function PDFAnnoClosure() {
}

//PDF Spec p.606: get appearance dictionary
var ap = annotation.get('AP');
let ap = annotation.get('AP');
//PDF Spec p.614 get normal appearance
var nVal = ap.get('N');
let nVal = ap.get('N');
//PDF Spec p.689
nVal.forEach(function (key, value) {
if (key.toLowerCase() != "off") {
Expand All @@ -43,26 +43,26 @@ var PDFAnno = (function PDFAnnoClosure() {

function setupPushButton(annotation, item) {
//button label: PDF Spec p.640
var mk = annotation.get('MK');
let mk = annotation.get('MK');
item.value = mk.get('CA') || '';

//button action: url when mouse up: PDF Spec:p.642
item.FL = "";
var ap = annotation.get('A');
let ap = annotation.get('A');
if (ap) {
var sp = ap.get('S');
let sp = ap.get('S');
item.FL = ap.get(sp.name);
}
}

function setupCheckBox(annotation, item) {
//PDF Spec p.606: get appearance dictionary
var ap = annotation.get('AP');
let ap = annotation.get('AP');
//PDF Spec p.614 get normal appearance
var nVal = ap.get('N');
let nVal = ap.get('N');

//PDF Spec p.689
var i = 0;
let i = 0;
nVal.forEach(function (key, value) {
i++;
if (i == 1) //initial selection state
Expand All @@ -78,13 +78,13 @@ var PDFAnno = (function PDFAnnoClosure() {
function setupFieldAttributes(annotation, item) {
//MQZ. Jan.03.2013. additional-actions dictionary
//PDF Spec P.648. 8.5.2. Trigger Events
var aa = annotation.get('AA');
let aa = annotation.get('AA');
if (!aa) {
return;
}

//PDF Spec p.651 get format dictionary
var nVal = aa.get('F');
let nVal = aa.get('F');
if (!nVal) {
nVal = aa.get('K');
if (!nVal)
Expand All @@ -98,26 +98,26 @@ var PDFAnno = (function PDFAnnoClosure() {
});
}

var AFSpecial_Format = ['zip', 'zip', 'phone', 'ssn', ''];
// var AFNumber_Format = ['nDec', 'sepStyle', 'negStyle', 'currStyle', 'strCurrency', 'bCurrencyPrepend'];
let AFSpecial_Format = ['zip', 'zip', 'phone', 'ssn', ''];
// let AFNumber_Format = ['nDec', 'sepStyle', 'negStyle', 'currStyle', 'strCurrency', 'bCurrencyPrepend'];
//– nDec is the number of places after the decimal point;
//– sepStyle is an integer denoting whether to use a separator or not. If sepStyle=0, use commas. If sepStyle=1, do not separate.
//– negStyle is the formatting used for negative numbers: 0 = MinusBlack, 1 = Red, 2 = ParensBlack, 3 = ParensRed
//– currStyle is the currency style - not used
//- strCurrency is the currency symbol
//– bCurrencyPrepend
// var AFDate_FormatEx = ["m/d", "m/d/yy", "mm/dd/yy", "mm/yy", "d-mmm", "d-mmm-yy", "dd-mmm-yy", "yymm-dd", "mmm-yy", "mmmm-yy", "mmm d, yyyy", "mmmm d, yyyy", "m/d/yy h:MM tt", "m/d/yy HH:MM"];
// let AFDate_FormatEx = ["m/d", "m/d/yy", "mm/dd/yy", "mm/yy", "d-mmm", "d-mmm-yy", "dd-mmm-yy", "yymm-dd", "mmm-yy", "mmmm-yy", "mmm d, yyyy", "mmmm d, yyyy", "m/d/yy h:MM tt", "m/d/yy HH:MM"];

function processFieldAttribute(jsFuncName, item) {
if (item.hasOwnProperty('TName'))
return;

var vParts = jsFuncName.split('(');
let vParts = jsFuncName.split('(');
if (vParts.length !== 2)
return;

var funcName = vParts[0];
var funcParam = vParts[1].split(')')[0];
let funcName = vParts[0];
let funcParam = vParts[1].split(')')[0];

switch (funcName) {
case 'AFSpecial_Format':
Expand All @@ -136,7 +136,7 @@ var PDFAnno = (function PDFAnnoClosure() {
item.MV = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value
break;
case 'AFSpecial_KeystrokeEx': //special format: "arbitrary mask"
var maskValue = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value
let maskValue = funcParam.replace(/^'+|^"+|'+$|"+$/g,''); //mask value
if ((!!maskValue) && maskValue.length > 0 && maskValue.length < 64) {
item.TName = 'mask'; //fixed length input
item.MV = maskValue;
Expand All @@ -151,13 +151,13 @@ var PDFAnno = (function PDFAnnoClosure() {
//END - MQZ 9/19/2012. Helper functions to parse acroForm elements

// private static
var _nextId = 1;
var _name = 'PDFAnno';
let _nextId = 1;
let _name = 'PDFAnno';

// constructor
var cls = function (field, viewport, Fields, Boxsets) {
let cls = function (field, viewport, Fields, Boxsets) {
// private
var _id = _nextId++;
let _id = _nextId++;

// public (every instance will have their own copy of these methods, needs to be lightweight)
this.get_id = function () {
Expand All @@ -173,11 +173,6 @@ var PDFAnno = (function PDFAnnoClosure() {
delete this.get_name;
};

// public static
cls.get_nextId = function () {
return _name + _nextId;
};

cls.processAnnotation = function (annotation, item) {
if (item.fieldType == 'Btn') { //PDF Spec p.675
if (item.fieldFlags & 32768) {
Expand Down
Loading

0 comments on commit 133a285

Please sign in to comment.