Skip to content

Commit

Permalink
feat(debug-pelias-parser): print parser solutions in API debug output (
Browse files Browse the repository at this point in the history
  • Loading branch information
missinglink authored Nov 16, 2021
1 parent 35efea5 commit 758e574
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 19 deletions.
56 changes: 38 additions & 18 deletions sanitizer/_text_pelias_parser.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
const logger = require('pelias-logger').get('api');
const Debug = require('../helper/debug');
const debugLog = new Debug('santizer:text:pelias_parser');
const unicode = require('../helper/unicode');
const Tokenizer = require('pelias-parser/tokenization/Tokenizer');
const Solution = require('pelias-parser/solver/Solution');
Expand All @@ -19,7 +21,7 @@ const MAX_TEXT_LENGTH = 140;
**/

// validate texts, convert types and apply defaults
function _sanitize (raw, clean) {
function _sanitize (raw, clean, req) {
// error & warning messages
var messages = { errors: [], warnings: [] };

Expand All @@ -43,30 +45,48 @@ function _sanitize (raw, clean) {
text = text.substring(0, MAX_TEXT_LENGTH);
}

// tokenize text
const start = new Date();
const tokenizer = new Tokenizer(text);
parser.classify(tokenizer);
parser.solve(tokenizer);

// log summary info
logger.info('pelias_parser', {
response_time: (new Date()) - start,
params: clean,
solutions: tokenizer.solution.length,
text_length: _.get(clean, 'text.length', 0)
});

// add debugging info with all parser solutions
if (req) {
debugLog.push(req, () => {
try {
return tokenizer.solution.map(s => _.reduce(
s.pair,
(text, c, i) => {
const delim = (i === 0) ? '' : ',';
return `${text}${delim} ${c.classification.label}='${c.span.body}'`;
},
`${s.score.toFixed(2)} ➜`
));
} catch (e) {
return e.message;
}
});
}

// parse text with pelias/parser
clean.text = text;
clean.parser = 'pelias';
clean.parsed_text = parse(clean);
clean.parsed_text = parse(tokenizer);
}

return messages;
}

function parse (clean) {

// parse text
let start = new Date();
const t = new Tokenizer(clean.text);
parser.classify(t);
parser.solve(t);

// log summary info
logger.info('pelias_parser', {
response_time: (new Date()) - start,
params: clean,
solutions: t.solution.length,
text_length: _.get(clean, 'text.length', 0)
});
function parse (t) {

// only use the first solution generated
// @todo: we could expand this in the future to accomodate more solutions
Expand Down Expand Up @@ -225,7 +245,7 @@ function parse (clean) {
}
}
}

// unknown query type
else {
parsed_text.subject = t.span.body;
Expand Down
2 changes: 1 addition & 1 deletion sanitizer/sanitizeAll.js
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ function sanitize( req, sanitizers ){
const params = req.query || {};

for (let s in sanitizers) {
var sanity = sanitizers[s].sanitize( params, req.clean );
var sanity = sanitizers[s].sanitize( params, req.clean, req );

// if errors occurred then set them
// on the req object.
Expand Down

0 comments on commit 758e574

Please sign in to comment.