Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

autocomplete milestone #526

Merged
merged 23 commits into from
Apr 29, 2016
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
aa3e764
update analyzers to work with https://github.com/pelias/schema/pull/109
missinglink Mar 29, 2016
6d4e689
resolve merge conflict
missinglink Mar 29, 2016
f7c64cb
merge master
missinglink Apr 4, 2016
3a789b4
increase autocomplete 'phrase:slop' setting from 2->3
missinglink Apr 7, 2016
9a5d425
Merge branch 'master' of github.com:pelias/api into missinglink
missinglink Apr 13, 2016
e40c9ef
increase focus weight from 10->40 and simplify population/popularity …
missinglink Apr 15, 2016
b80efff
Merge branch 'master' of github.com:pelias/api into missinglink
missinglink Apr 18, 2016
30db744
Merge branch 'autocomplete_increase_slop' of github.com:pelias/api in…
missinglink Apr 19, 2016
25ab63c
change search analyzer to be more similar to what we had before the a…
missinglink Apr 21, 2016
3051885
Merge branch 'master' of github.com:pelias/api into missinglink
missinglink Apr 25, 2016
01a3233
add a view to boost exact matches
missinglink Apr 25, 2016
ca0c51b
don't strip single digits from query
missinglink Apr 25, 2016
b862fc8
refactor pop_subquery to be config driven
missinglink Apr 25, 2016
2398f05
fix borough matching for both autocomplete and search endpoints
missinglink Apr 25, 2016
da4c666
reduce admin:borough:boost from 800->600
missinglink Apr 28, 2016
9dbed08
remove duplicate entry for borough
missinglink Apr 28, 2016
e093a09
remove search related improvements from this PR
missinglink Apr 28, 2016
b771053
Merge branch 'master' of github.com:pelias/api into missinglink
missinglink Apr 28, 2016
ee73774
add tokenizer, refactor how we determine if a token is 'complete' or …
missinglink Apr 28, 2016
e6d9a0c
Merge pull request #529 from pelias/missinglink_complete_incomplete_r…
missinglink Apr 29, 2016
0524062
handle addressit case where parsed_text.street is produced and parsed…
missinglink Apr 29, 2016
1c9af40
remove query.tokens_complete and query.tokens_incomplete from geoJSON
missinglink Apr 29, 2016
979aab1
ensure that problematic single grams are removed from the query
missinglink Apr 29, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion middleware/geocodeJSON.js
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ function setup(peliasConfig, basePath) {
config: peliasConfig || require('pelias-config').generate().api,
basePath: basePath || '/'
};

function middleware(req, res, next) {
return convertToGeocodeJSON(req, res, next, opts);
}
Expand Down Expand Up @@ -56,6 +56,10 @@ function convertToGeocodeJSON(req, res, next, opts) {
// Helpful for debugging and understanding how the input impacts results.
res.body.geocoding.query = req.clean;

// remove arrays produced by the tokenizer (only intended to be used internally).
delete res.body.geocoding.query.tokens_complete;
delete res.body.geocoding.query.tokens_incomplete;

// OPTIONAL. Warnings and errors.
addMessages(req, 'warnings', res.body.geocoding);
addMessages(req, 'errors', res.body.geocoding);
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"precommit-hook": "^3.0.0",
"proxyquire": "^1.4.0",
"tap-dot": "1.0.5",
"tape": "^4.4.0"
"tape": "^4.5.1"
},
"pre-commit": [
"lint",
Expand Down
49 changes: 26 additions & 23 deletions query/autocomplete.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ var views = {
ngrams_strict: require('./view/ngrams_strict'),
focus_selected_layers: require('./view/focus_selected_layers'),
ngrams_last_token_only: require('./view/ngrams_last_token_only'),
phrase_first_tokens_only: require('./view/phrase_first_tokens_only')
phrase_first_tokens_only: require('./view/phrase_first_tokens_only'),
pop_subquery: require('./view/pop_subquery'),
boost_exact_matches: require('./view/boost_exact_matches')
};

//------------------------------
Expand All @@ -32,14 +34,16 @@ query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region') );
query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin('county') );
query.score( peliasQuery.view.admin('borough') );
query.score( peliasQuery.view.admin('localadmin') );
query.score( peliasQuery.view.admin('locality') );
query.score( peliasQuery.view.admin('neighbourhood') );

// scoring boost
query.score( views.boost_exact_matches );
query.score( views.focus_selected_layers( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.ngrams_strict ) );
query.score( peliasQuery.view.population( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.pop_subquery ) );
query.score( peliasQuery.view.population( views.pop_subquery ) );

// non-scoring hard filters
query.filter( peliasQuery.view.sources );
Expand All @@ -59,29 +63,28 @@ function generateQuery( clean ){
vs.var( 'sources', clean.sources );
}

// mark the name as incomplete (user has not yet typed a comma)
vs.var( 'input:name:isComplete', false );

// perform some operations on 'clean.text':
// 1. if there is a space followed by a single char, remove them.
// - this is required as the index uses 2grams and sending 1grams
// - to a 2gram index when using 'type:phrase' or 'operator:and' will
// - result in a complete failure of the query.
// 2. trim leading and trailing whitespace.
var text = clean.text.replace(/( .$)/g,'').trim();

// if the input parser has run and suggested a 'parsed_text.name' to use.
if( clean.hasOwnProperty('parsed_text') && clean.parsed_text.hasOwnProperty('name') ){

// mark the name as complete (user has already typed a comma)
vs.var( 'input:name:isComplete', true );

// use 'parsed_text.name' instead of 'clean.text'.
text = clean.parsed_text.name;
// pass the input tokens to the views so they can choose which tokens
// are relevant for their specific function.
if( check.array( clean.tokens ) ){
vs.var( 'input:name:tokens', clean.tokens );
vs.var( 'input:name:tokens_complete', clean.tokens_complete );
vs.var( 'input:name:tokens_incomplete', clean.tokens_incomplete );
}

// input text
vs.var( 'input:name', text );
vs.var( 'input:name', clean.text );

// if the tokenizer has run then we set 'input:name' to as the combination of the
// 'complete' tokens with the 'incomplete' tokens, the resuting array differs
// slightly from the 'input:name:tokens' array as some tokens might have been
// removed in the process; such as single grams which are not present in then
// ngrams index.
if( check.array( clean.tokens_complete ) && check.array( clean.tokens_incomplete ) ){
var combined = clean.tokens_complete.concat( clean.tokens_incomplete );
if( combined.length ){
vs.var( 'input:name', combined.join(' ') );
}
}

// focus point
if( check.number(clean['focus.point.lat']) &&
Expand Down
14 changes: 9 additions & 5 deletions query/autocomplete_defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,20 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true,

'ngram:analyzer': 'peliasPhrase',
'ngram:analyzer': 'peliasQueryPartialToken',
'ngram:field': 'name.default',
'ngram:boost': 100,

'phrase:analyzer': 'peliasPhrase',
'phrase:field': 'phrase.default',
'phrase:analyzer': 'peliasQueryFullToken',
'phrase:field': 'name.default',
'phrase:boost': 1,
'phrase:slop': 2,
'phrase:slop': 3,

'focus:function': 'linear',
'focus:offset': '0km',
'focus:scale': '250km',
'focus:decay': 0.5,
'focus:weight': 10,
'focus:weight': 40,

'function_score:score_mode': 'avg',
'function_score:boost_mode': 'multiply',
Expand Down Expand Up @@ -82,6 +82,10 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'admin:neighbourhood:field': 'parent.neighbourhood',
'admin:neighbourhood:boost': 200,

'admin:borough:analyzer': 'peliasAdmin',
'admin:borough:field': 'parent.borough',
'admin:borough:boost': 600,

'popularity:field': 'popularity',
'popularity:modifier': 'log1p',
'popularity:max_boost': 20,
Expand Down
2 changes: 1 addition & 1 deletion query/reverse_defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true,

'ngram:analyzer': 'peliasOneEdgeGram',
'ngram:analyzer': 'peliasQueryPartialToken',
'ngram:field': 'name.default',
'ngram:boost': 1,

Expand Down
2 changes: 1 addition & 1 deletion query/search_defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true,

'ngram:analyzer': 'peliasOneEdgeGram',
'ngram:analyzer': 'peliasIndexOneEdgeGram',
'ngram:field': 'name.default',
'ngram:boost': 1,

Expand Down
2 changes: 1 addition & 1 deletion query/text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ when we can't identify parts of an address. This shouldn't contain fields like c
or postalcode because we should only try to match those when we're sure that's what they are.
*/
var adminFields = placeTypes.concat([
'region_a',
'region_a'
]);

/**
Expand Down
40 changes: 40 additions & 0 deletions query/view/boost_exact_matches.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@

var peliasQuery = require('pelias-query'),
searchDefaults = require('../search_defaults');

/**
This view (unfortunately) requires autocomplete to use the phrase.* index.

ideally we wouldn't need to use this, but at time of writing we are unable
to distinguish between 'complete tokens' and 'grams' in the name.* index.

this view was introduced in order to score exact matches higher than partial
matches, without it we find results such as "Clayton Avenue" appearing first
in the results list for the query "Clay Av".

the view uses some of the values from the 'search_defaults.js' file to add an
additional 'SHOULD' condition which scores exact matches slighly higher
than partial matches.
**/

module.exports = function( vs ){

// make a copy of the variables so we don't interfere with the values
// passed to other views.
var vsCopy = new peliasQuery.Vars( vs.export() );

// copy phrase:* values from search defaults
vsCopy.var('phrase:analyzer').set(searchDefaults['phrase:analyzer']);
vsCopy.var('phrase:field').set(searchDefaults['phrase:field']);

// get a copy of the *complete* tokens produced from the input:name
var tokens = vs.var('input:name:tokens_complete').get();

// no valid tokens to use, fail now, don't render this view.
if( !tokens || tokens.length < 1 ){ return null; }

// set 'input:name' to be only the fully completed characters
vsCopy.var('input:name').set( tokens.join(' ') );

return peliasQuery.view.phrase( vsCopy );
};
17 changes: 6 additions & 11 deletions query/view/ngrams_last_token_only.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,29 +8,24 @@ var peliasQuery = require('pelias-query'),
eg. if the input was "100 foo str", then 'input:name' would only be 'str'
note: it is assumed that the rest of the input is matched using another view.

there is an additional flag 'input:name:isComplete' used to disable this view
selectively, see that section for more info.

code notes: this view makes a copy of the $vs object in order to change their
values without mutating the original values, which may be expected in their
unaltered form by other views.
**/

module.exports = function( vs ){

// Totally disable this view when bool value 'input:name:isComplete' is true.
// This is the case when the user has typed a comma, so we can assume
// that the 'name' part of the query is now complete.
if( vs.var('input:name:isComplete').get() ){ return null; }
// get a copy of the *tokens_incomplete* tokens produced from the input:name
var tokens = vs.var('input:name:tokens_incomplete').get();

// no valid tokens to use, fail now, don't render this view.
if( !tokens || tokens.length < 1 ){ return null; }

// make a copy Vars so we don't mutate the original
var vsCopy = new peliasQuery.Vars( vs.export() );

// get the input 'name' variable
var name = vs.var('input:name').get();

// set the 'name' variable in the copy to only the last token
vsCopy.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) );
vsCopy.var('input:name').set( tokens.join(' ') );

// return the view rendered using the copy
return ngrams_strict( vsCopy );
Expand Down
25 changes: 6 additions & 19 deletions query/view/phrase_first_tokens_only.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,24 @@ var peliasQuery = require('pelias-query');
eg. if the input was "100 foo str", then 'input:name' would only be '100 foo'
note: it is assumed that the rest of the input is matched using another view.

there is an additional flag 'input:name:isComplete' used to disable this view
selectively, see that section for more info.

code notes: this view makes a copy of the $vs object in order to change their
values without mutating the original values, which may be expected in their
unaltered form by other views.
**/

module.exports = function( vs ){

// Don't mutate the name variable when 'input:name:isComplete' is true.
// This is the case when the user has typed a comma, so we can assume
// that the 'name' part of the query is now complete.
if( vs.var('input:name:isComplete').get() ){
// return the view rendered using the original vars
return peliasQuery.view.phrase( vs );
}
// get a copy of the *complete* tokens produced from the input:name
var tokens = vs.var('input:name:tokens_complete').get();

// no valid tokens to use, fail now, don't render this view.
if( !tokens || tokens.length < 1 ){ return null; }

// make a copy Vars so we don't mutate the original
var vsCopy = new peliasQuery.Vars( vs.export() );

// get the input 'name' variable and split in to tokens
var name = vs.var('input:name').get(),
tokens = name.split(' ');

// single token only, abort (we don't want the *last* token)
// return null here will completely disable the view.
if( tokens.length < 2 ){ return null; }

// set the 'name' variable in the copy to all but the last token
vsCopy.var('input:name').set( name.substr( 0, name.lastIndexOf(' ') ) );
vsCopy.var('input:name').set( tokens.join(' ') );

// return the view rendered using the copy
return peliasQuery.view.phrase( vsCopy );
Expand Down
17 changes: 17 additions & 0 deletions query/view/pop_subquery.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@

var peliasQuery = require('pelias-query'),
check = require('check-types');

/**
Population / Popularity subquery
**/

module.exports = function( vs ){

var view = peliasQuery.view.ngrams( vs );

view.match['name.default'].analyzer = vs.var('phrase:analyzer');
delete view.match['name.default'].boost;

return view;
};
Loading