Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

autocomplete milestone #526

Merged
merged 23 commits into from
Apr 29, 2016
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
aa3e764
update analyzers to work with https://github.com/pelias/schema/pull/109
missinglink Mar 29, 2016
6d4e689
resolve merge conflict
missinglink Mar 29, 2016
f7c64cb
merge master
missinglink Apr 4, 2016
3a789b4
increase autocomplete 'phrase:slop' setting from 2->3
missinglink Apr 7, 2016
9a5d425
Merge branch 'master' of github.com:pelias/api into missinglink
missinglink Apr 13, 2016
e40c9ef
increase focus weight from 10->40 and simplify population/popularity …
missinglink Apr 15, 2016
b80efff
Merge branch 'master' of github.com:pelias/api into missinglink
missinglink Apr 18, 2016
30db744
Merge branch 'autocomplete_increase_slop' of github.com:pelias/api in…
missinglink Apr 19, 2016
25ab63c
change search analyzer to be more similar to what we had before the a…
missinglink Apr 21, 2016
3051885
Merge branch 'master' of github.com:pelias/api into missinglink
missinglink Apr 25, 2016
01a3233
add a view to boost exact matches
missinglink Apr 25, 2016
ca0c51b
don't strip single digits from query
missinglink Apr 25, 2016
b862fc8
refactor pop_subquery to be config driven
missinglink Apr 25, 2016
2398f05
fix borough matching for both autocomplete and search endpoints
missinglink Apr 25, 2016
da4c666
reduce admin:borough:boost from 800->600
missinglink Apr 28, 2016
9dbed08
remove duplicate entry for borough
missinglink Apr 28, 2016
e093a09
remove search related improvements from this PR
missinglink Apr 28, 2016
b771053
Merge branch 'master' of github.com:pelias/api into missinglink
missinglink Apr 28, 2016
ee73774
add tokenizer, refactor how we determine if a token is 'complete' or …
missinglink Apr 28, 2016
e6d9a0c
Merge pull request #529 from pelias/missinglink_complete_incomplete_r…
missinglink Apr 29, 2016
0524062
handle addressit case where parsed_text.street is produced and parsed…
missinglink Apr 29, 2016
1c9af40
remove query.tokens_complete and query.tokens_incomplete from geoJSON
missinglink Apr 29, 2016
979aab1
ensure that problematic single grams are removed from the query
missinglink Apr 29, 2016
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
"precommit-hook": "^3.0.0",
"proxyquire": "^1.4.0",
"tap-dot": "1.0.5",
"tape": "^4.4.0"
"tape": "^4.5.1"
},
"pre-commit": [
"lint",
Expand Down
13 changes: 9 additions & 4 deletions query/autocomplete.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,9 @@ var views = {
ngrams_strict: require('./view/ngrams_strict'),
focus_selected_layers: require('./view/focus_selected_layers'),
ngrams_last_token_only: require('./view/ngrams_last_token_only'),
phrase_first_tokens_only: require('./view/phrase_first_tokens_only')
phrase_first_tokens_only: require('./view/phrase_first_tokens_only'),
pop_subquery: require('./view/pop_subquery'),
boost_exact_matches: require('./view/boost_exact_matches')
};

//------------------------------
Expand All @@ -32,14 +34,16 @@ query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region') );
query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin('county') );
query.score( peliasQuery.view.admin('borough') );
query.score( peliasQuery.view.admin('localadmin') );
query.score( peliasQuery.view.admin('locality') );
query.score( peliasQuery.view.admin('neighbourhood') );

// scoring boost
query.score( views.boost_exact_matches );
query.score( views.focus_selected_layers( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.ngrams_strict ) );
query.score( peliasQuery.view.population( views.ngrams_strict ) );
query.score( peliasQuery.view.popularity( views.pop_subquery ) );
query.score( peliasQuery.view.population( views.pop_subquery ) );

// non-scoring hard filters
query.filter( peliasQuery.view.sources );
Expand Down Expand Up @@ -68,7 +72,8 @@ function generateQuery( clean ){
// - to a 2gram index when using 'type:phrase' or 'operator:and' will
// - result in a complete failure of the query.
// 2. trim leading and trailing whitespace.
var text = clean.text.replace(/( .$)/g,'').trim();
// note: single digit grams are now being produced in the name.* index
var text = clean.text.replace(/( [^0-9]$)/g,'').trim();

// if the input parser has run and suggested a 'parsed_text.name' to use.
if( clean.hasOwnProperty('parsed_text') && clean.parsed_text.hasOwnProperty('name') ){
Expand Down
14 changes: 9 additions & 5 deletions query/autocomplete_defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,20 +20,20 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true,

'ngram:analyzer': 'peliasPhrase',
'ngram:analyzer': 'peliasQueryPartialToken',
'ngram:field': 'name.default',
'ngram:boost': 100,

'phrase:analyzer': 'peliasPhrase',
'phrase:field': 'phrase.default',
'phrase:analyzer': 'peliasQueryFullToken',
'phrase:field': 'name.default',
'phrase:boost': 1,
'phrase:slop': 2,
'phrase:slop': 3,

'focus:function': 'linear',
'focus:offset': '0km',
'focus:scale': '250km',
'focus:decay': 0.5,
'focus:weight': 10,
'focus:weight': 40,

'function_score:score_mode': 'avg',
'function_score:boost_mode': 'multiply',
Expand Down Expand Up @@ -82,6 +82,10 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'admin:neighbourhood:field': 'parent.neighbourhood',
'admin:neighbourhood:boost': 200,

'admin:borough:analyzer': 'peliasAdmin',
'admin:borough:field': 'parent.borough',
'admin:borough:boost': 800,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why so high?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I simply copied the value from 'country', I will drop it down to a value for 'region' instead if you feel it's too high. What would you suggest as a good value here?


'popularity:field': 'popularity',
'popularity:modifier': 'log1p',
'popularity:max_boost': 20,
Expand Down
2 changes: 1 addition & 1 deletion query/reverse_defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true,

'ngram:analyzer': 'peliasOneEdgeGram',
'ngram:analyzer': 'peliasQueryPartialToken',
'ngram:field': 'name.default',
'ngram:boost': 1,

Expand Down
1 change: 1 addition & 0 deletions query/search.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ query.score( peliasQuery.view.admin('country_a') );
query.score( peliasQuery.view.admin('region') );
query.score( peliasQuery.view.admin('region_a') );
query.score( peliasQuery.view.admin('county') );
query.score( peliasQuery.view.admin('borough') );
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should we remove this to avoid having any effect on search from this PR?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

moving this to #527

query.score( peliasQuery.view.admin('localadmin') );
query.score( peliasQuery.view.admin('locality') );
query.score( peliasQuery.view.admin('neighbourhood') );
Expand Down
2 changes: 1 addition & 1 deletion query/search_defaults.js
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
'boundary:rect:type': 'indexed',
'boundary:rect:_cache': true,

'ngram:analyzer': 'peliasOneEdgeGram',
'ngram:analyzer': 'peliasIndexOneEdgeGram',
'ngram:field': 'name.default',
'ngram:boost': 1,

Expand Down
1 change: 1 addition & 0 deletions query/text_parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ or postalcode because we should only try to match those when we're sure that's w
*/
var adminFields = placeTypes.concat([
'region_a',
'borough'
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this doesn't need to be done since placeTypes already has borough in the list.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

good catch!

]);

/**
Expand Down
48 changes: 48 additions & 0 deletions query/view/boost_exact_matches.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@

var peliasQuery = require('pelias-query'),
searchDefaults = require('../search_defaults');

/**
This view (unfortunately) requires autocomplete to use the phrase.* index.

ideally we wouldn't need to use this, but at time of writing we are unable
to distinguish between 'complete tokens' and 'grams' in the name.* index.

this view was introduced in order to score exact matches higher than partial
matches, without it we find results such as "Clayton Avenue" appearing first
in the results list for the query "Clay Av".

the view uses some of the values from the 'search_defaults.js' file to add an
additional 'SHOULD' condition which scores exact matches slighly higher
than partial matches.
**/

module.exports = function( vs ){

// make a copy of the variables so we don't interfere with the values
// passed to other views.
var vsCopy = new peliasQuery.Vars( vs.export() );

// copy phrase:* values from search defaults
vsCopy.var('phrase:analyzer').set(searchDefaults['phrase:analyzer']);
vsCopy.var('phrase:field').set(searchDefaults['phrase:field']);

// split the 'input:name' on whitespace
var name = vs.var('input:name').get(),
tokens = name.split(' ');
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

perhaps split on any whitespace so that multiple spaces, tabs, or any other weird stuff is handled ok? Or would that be cleaned up by a sanitizer before this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this could result in empty tokens if there are multiple spaces in a row. just doesn't feel like a very clean solution.


// if the query is incomplete then we need to remove
// the final (incomplete) token as it will not match
// tokens in the phrase.* index.
if( !vs.var('input:name:isComplete').get() ){
tokens.pop();
}

// no valid tokens to use, fail now, don't render this view.
if( tokens.length < 1 ){ return null; }

// set 'input:name' to be only the fully completed characters
vsCopy.var('input:name').set( tokens.join(' ') );

return peliasQuery.view.phrase( vsCopy );
};
16 changes: 16 additions & 0 deletions query/view/pop_subquery.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@

var peliasQuery = require('pelias-query');

/**
Population / Popularity subquery
**/

module.exports = function( vs ){

var view = peliasQuery.view.ngrams( vs );

view.match['name.default'].analyzer = vs.var('phrase:analyzer');
delete view.match['name.default'].boost;

return view;
};
12 changes: 3 additions & 9 deletions test/unit/fixture/autocomplete_linguistic_final_token.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module.exports = {
'must': [{
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'analyzer': 'peliasQueryPartialToken',
'boost': 100,
'query': 'one',
'type': 'phrase',
Expand All @@ -20,11 +20,8 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'analyzer': 'peliasQueryFullToken',
'query': 'one',
'type': 'phrase',
'operator': 'and'
}
}
},
Expand All @@ -45,11 +42,8 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'analyzer': 'peliasQueryFullToken',
'query': 'one',
'type': 'phrase',
'operator': 'and'
}
}
},
Expand Down
16 changes: 5 additions & 11 deletions test/unit/fixture/autocomplete_linguistic_focus.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module.exports = {
'must': [{
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'analyzer': 'peliasQueryPartialToken',
'boost': 100,
'query': 'test',
'type': 'phrase',
Expand All @@ -20,7 +20,7 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'analyzer': 'peliasQueryPartialToken',
'boost': 100,
'query': 'test',
'type': 'phrase',
Expand All @@ -40,7 +40,7 @@ module.exports = {
'decay': 0.5
}
},
'weight': 10
'weight': 40
}],
'score_mode': 'avg',
'boost_mode': 'multiply',
Expand All @@ -64,11 +64,8 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'analyzer': 'peliasQueryFullToken',
'query': 'test',
'type': 'phrase',
'operator': 'and'
}
}
},
Expand All @@ -89,11 +86,8 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'analyzer': 'peliasQueryFullToken',
'query': 'test',
'type': 'phrase',
'operator': 'and'
}
}
},
Expand Down
16 changes: 5 additions & 11 deletions test/unit/fixture/autocomplete_linguistic_focus_null_island.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ module.exports = {
'must': [{
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'analyzer': 'peliasQueryPartialToken',
'boost': 100,
'query': 'test',
'type': 'phrase',
Expand All @@ -20,7 +20,7 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'analyzer': 'peliasQueryPartialToken',
'boost': 100,
'query': 'test',
'type': 'phrase',
Expand All @@ -40,7 +40,7 @@ module.exports = {
'decay': 0.5
}
},
'weight': 10
'weight': 40
}],
'score_mode': 'avg',
'boost_mode': 'multiply',
Expand All @@ -64,11 +64,8 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'analyzer': 'peliasQueryFullToken',
'query': 'test',
'type': 'phrase',
'operator': 'and'
}
}
},
Expand All @@ -89,11 +86,8 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'analyzer': 'peliasQueryFullToken',
'query': 'test',
'type': 'phrase',
'operator': 'and'
}
}
},
Expand Down
32 changes: 19 additions & 13 deletions test/unit/fixture/autocomplete_linguistic_multiple_tokens.js
Original file line number Diff line number Diff line change
Expand Up @@ -6,36 +6,45 @@ module.exports = {
'bool': {
'must': [{
'match': {
'phrase.default': {
'analyzer': 'peliasPhrase',
'name.default': {
'analyzer': 'peliasQueryFullToken',
'type': 'phrase',
'boost': 1,
'slop': 2,
'slop': 3,
'query': 'one two'
}
}
},
{
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'analyzer': 'peliasQueryPartialToken',
'boost': 100,
'query': 'three',
'type': 'phrase',
'operator': 'and'
}
}
}],
'should':[{
'should':[
{
'match': {
'phrase.default': {
'analyzer' : 'peliasPhrase',
'type' : 'phrase',
'boost' : 1,
'slop' : 3,
'query' : 'one two'
}
}
},
{
'function_score': {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'analyzer': 'peliasQueryFullToken',
'query': 'one two three',
'type': 'phrase',
'operator': 'and'
}
}
},
Expand All @@ -56,11 +65,8 @@ module.exports = {
'query': {
'match': {
'name.default': {
'analyzer': 'peliasPhrase',
'boost': 100,
'analyzer': 'peliasQueryFullToken',
'query': 'one two three',
'type': 'phrase',
'operator': 'and'
}
}
},
Expand Down
Loading