pelias · orangejulius · Apr 29, 2016 · Mar 29, 2016 · Mar 29, 2016 · Apr 4, 2016
diff --git a/middleware/geocodeJSON.js b/middleware/geocodeJSON.js
@@ -16,7 +16,7 @@ function setup(peliasConfig, basePath) {
     config: peliasConfig || require('pelias-config').generate().api,
     basePath: basePath || '/'
   };
-  
+
   function middleware(req, res, next) {
     return convertToGeocodeJSON(req, res, next, opts);
   }
@@ -56,6 +56,10 @@ function convertToGeocodeJSON(req, res, next, opts) {
   // Helpful for debugging and understanding how the input impacts results.
   res.body.geocoding.query = req.clean;
 
+  // remove arrays produced by the tokenizer (only intended to be used internally).
+  delete res.body.geocoding.query.tokens_complete;
+  delete res.body.geocoding.query.tokens_incomplete;
+
   // OPTIONAL. Warnings and errors.
   addMessages(req, 'warnings', res.body.geocoding);
   addMessages(req, 'errors', res.body.geocoding);

diff --git a/package.json b/package.json
@@ -68,7 +68,7 @@
     "precommit-hook": "^3.0.0",
     "proxyquire": "^1.4.0",
     "tap-dot": "1.0.5",
-    "tape": "^4.4.0"
+    "tape": "^4.5.1"
   },
   "pre-commit": [
     "lint",

diff --git a/query/autocomplete.js b/query/autocomplete.js
@@ -9,7 +9,9 @@ var views = {
   ngrams_strict:              require('./view/ngrams_strict'),
   focus_selected_layers:      require('./view/focus_selected_layers'),
   ngrams_last_token_only:     require('./view/ngrams_last_token_only'),
-  phrase_first_tokens_only:   require('./view/phrase_first_tokens_only')
+  phrase_first_tokens_only:   require('./view/phrase_first_tokens_only'),
+  pop_subquery:               require('./view/pop_subquery'),
+  boost_exact_matches:        require('./view/boost_exact_matches')
 };
 
 //------------------------------
@@ -32,14 +34,16 @@ query.score( peliasQuery.view.admin('country_a') );
 query.score( peliasQuery.view.admin('region') );
 query.score( peliasQuery.view.admin('region_a') );
 query.score( peliasQuery.view.admin('county') );
+query.score( peliasQuery.view.admin('borough') );
 query.score( peliasQuery.view.admin('localadmin') );
 query.score( peliasQuery.view.admin('locality') );
 query.score( peliasQuery.view.admin('neighbourhood') );
 
 // scoring boost
+query.score( views.boost_exact_matches );
 query.score( views.focus_selected_layers( views.ngrams_strict ) );
-query.score( peliasQuery.view.popularity( views.ngrams_strict ) );
-query.score( peliasQuery.view.population( views.ngrams_strict ) );
+query.score( peliasQuery.view.popularity( views.pop_subquery ) );
+query.score( peliasQuery.view.population( views.pop_subquery ) );
 
 // non-scoring hard filters
 query.filter( peliasQuery.view.sources );
@@ -59,29 +63,28 @@ function generateQuery( clean ){
     vs.var( 'sources', clean.sources );
   }
 
-  // mark the name as incomplete (user has not yet typed a comma)
-  vs.var( 'input:name:isComplete', false );
-
-  // perform some operations on 'clean.text':
-  // 1. if there is a space followed by a single char, remove them.
-  //  - this is required as the index uses 2grams and sending 1grams
-  //  - to a 2gram index when using 'type:phrase' or 'operator:and' will
-  //  - result in a complete failure of the query.
-  // 2. trim leading and trailing whitespace.
-  var text = clean.text.replace(/( .$)/g,'').trim();
-
-  // if the input parser has run and suggested a 'parsed_text.name' to use.
-  if( clean.hasOwnProperty('parsed_text') && clean.parsed_text.hasOwnProperty('name') ){
-
-    // mark the name as complete (user has already typed a comma)
-    vs.var( 'input:name:isComplete', true );
-
-    // use 'parsed_text.name' instead of 'clean.text'.
-    text = clean.parsed_text.name;
+  // pass the input tokens to the views so they can choose which tokens
+  // are relevant for their specific function.
+  if( check.array( clean.tokens ) ){
+    vs.var( 'input:name:tokens', clean.tokens );
+    vs.var( 'input:name:tokens_complete', clean.tokens_complete );
+    vs.var( 'input:name:tokens_incomplete', clean.tokens_incomplete );
   }
 
   // input text
-  vs.var( 'input:name', text );
+  vs.var( 'input:name', clean.text );
+
+  // if the tokenizer has run then we set 'input:name' to as the combination of the
+  // 'complete' tokens with the 'incomplete' tokens, the resuting array differs
+  // slightly from the 'input:name:tokens' array as some tokens might have been
+  // removed in the process; such as single grams which are not present in then
+  // ngrams index.
+  if( check.array( clean.tokens_complete ) && check.array( clean.tokens_incomplete ) ){
+    var combined = clean.tokens_complete.concat( clean.tokens_incomplete );
+    if( combined.length ){
+      vs.var( 'input:name', combined.join(' ') );
+    }
+  }
 
   // focus point
   if( check.number(clean['focus.point.lat']) &&

diff --git a/query/autocomplete_defaults.js b/query/autocomplete_defaults.js
@@ -20,20 +20,20 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'boundary:rect:type': 'indexed',
   'boundary:rect:_cache': true,
 
-  'ngram:analyzer': 'peliasPhrase',
+  'ngram:analyzer': 'peliasQueryPartialToken',
   'ngram:field': 'name.default',
   'ngram:boost': 100,
 
-  'phrase:analyzer': 'peliasPhrase',
-  'phrase:field': 'phrase.default',
+  'phrase:analyzer': 'peliasQueryFullToken',
+  'phrase:field': 'name.default',
   'phrase:boost': 1,
-  'phrase:slop': 2,
+  'phrase:slop': 3,
 
   'focus:function': 'linear',
   'focus:offset': '0km',
   'focus:scale': '250km',
   'focus:decay': 0.5,
-  'focus:weight': 10,
+  'focus:weight': 40,
 
   'function_score:score_mode': 'avg',
   'function_score:boost_mode': 'multiply',
@@ -82,6 +82,10 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'admin:neighbourhood:field': 'parent.neighbourhood',
   'admin:neighbourhood:boost': 200,
 
+  'admin:borough:analyzer': 'peliasAdmin',
+  'admin:borough:field': 'parent.borough',
+  'admin:borough:boost': 600,
+
   'popularity:field': 'popularity',
   'popularity:modifier': 'log1p',
   'popularity:max_boost': 20,

diff --git a/query/reverse_defaults.js b/query/reverse_defaults.js
@@ -20,7 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'boundary:rect:type': 'indexed',
   'boundary:rect:_cache': true,
 
-  'ngram:analyzer': 'peliasOneEdgeGram',
+  'ngram:analyzer': 'peliasQueryPartialToken',
   'ngram:field': 'name.default',
   'ngram:boost': 1,
 

diff --git a/query/search_defaults.js b/query/search_defaults.js
@@ -20,7 +20,7 @@ module.exports = _.merge({}, peliasQuery.defaults, {
   'boundary:rect:type': 'indexed',
   'boundary:rect:_cache': true,
 
-  'ngram:analyzer': 'peliasOneEdgeGram',
+  'ngram:analyzer': 'peliasIndexOneEdgeGram',
   'ngram:field': 'name.default',
   'ngram:boost': 1,
 

diff --git a/query/text_parser.js b/query/text_parser.js
@@ -8,7 +8,7 @@ when we can't identify parts of an address. This shouldn't contain fields like c
 or postalcode because we should only try to match those when we're sure that's what they are.
  */
 var adminFields = placeTypes.concat([
-  'region_a',
+  'region_a'
 ]);
 
 /**

diff --git a/query/view/boost_exact_matches.js b/query/view/boost_exact_matches.js
@@ -0,0 +1,40 @@
+
+var peliasQuery = require('pelias-query'),
+    searchDefaults = require('../search_defaults');
+
+/**
+  This view (unfortunately) requires autocomplete to use the phrase.* index.
+
+  ideally we wouldn't need to use this, but at time of writing we are unable
+  to distinguish between 'complete tokens' and 'grams' in the name.* index.
+
+  this view was introduced in order to score exact matches higher than partial
+  matches, without it we find results such as "Clayton Avenue" appearing first
+  in the results list for the query "Clay Av".
+
+  the view uses some of the values from the 'search_defaults.js' file to add an
+  additional 'SHOULD' condition which scores exact matches slighly higher
+  than partial matches.
+**/
+
+module.exports = function( vs ){
+
+  // make a copy of the variables so we don't interfere with the values
+  // passed to other views.
+  var vsCopy = new peliasQuery.Vars( vs.export() );
+
+  // copy phrase:* values from search defaults
+  vsCopy.var('phrase:analyzer').set(searchDefaults['phrase:analyzer']);
+  vsCopy.var('phrase:field').set(searchDefaults['phrase:field']);
+
+  // get a copy of the *complete* tokens produced from the input:name
+  var tokens = vs.var('input:name:tokens_complete').get();
+
+  // no valid tokens to use, fail now, don't render this view.
+  if( !tokens || tokens.length < 1 ){ return null; }
+
+  // set 'input:name' to be only the fully completed characters
+  vsCopy.var('input:name').set( tokens.join(' ') );
+
+  return peliasQuery.view.phrase( vsCopy );
+};
diff --git a/query/view/ngrams_last_token_only.js b/query/view/ngrams_last_token_only.js
@@ -8,29 +8,24 @@ var peliasQuery = require('pelias-query'),
   eg. if the input was "100 foo str", then 'input:name' would only be 'str'
   note: it is assumed that the rest of the input is matched using another view.
 
-  there is an additional flag 'input:name:isComplete' used to disable this view
-  selectively, see that section for more info.
-
   code notes: this view makes a copy of the $vs object in order to change their
   values without mutating the original values, which may be expected in their
   unaltered form by other views.
 **/
 
 module.exports = function( vs ){
 
-  // Totally disable this view when bool value 'input:name:isComplete' is true.
-  // This is the case when the user has typed a comma, so we can assume
-  // that the 'name' part of the query is now complete.
-  if( vs.var('input:name:isComplete').get() ){ return null; }
+  // get a copy of the *tokens_incomplete* tokens produced from the input:name
+  var tokens = vs.var('input:name:tokens_incomplete').get();
+
+  // no valid tokens to use, fail now, don't render this view.
+  if( !tokens || tokens.length < 1 ){ return null; }
 
   // make a copy Vars so we don't mutate the original
   var vsCopy = new peliasQuery.Vars( vs.export() );
 
-  // get the input 'name' variable
-  var name = vs.var('input:name').get();
-
   // set the 'name' variable in the copy to only the last token
-  vsCopy.var('input:name').set( name.substr( name.lastIndexOf(' ')+1 ) );
+  vsCopy.var('input:name').set( tokens.join(' ') );
 
   // return the view rendered using the copy
   return ngrams_strict( vsCopy );

diff --git a/query/view/phrase_first_tokens_only.js b/query/view/phrase_first_tokens_only.js
@@ -7,37 +7,24 @@ var peliasQuery = require('pelias-query');
   eg. if the input was "100 foo str", then 'input:name' would only be '100 foo'
   note: it is assumed that the rest of the input is matched using another view.
 
-  there is an additional flag 'input:name:isComplete' used to disable this view
-  selectively, see that section for more info.
-
   code notes: this view makes a copy of the $vs object in order to change their
   values without mutating the original values, which may be expected in their
   unaltered form by other views.
 **/
 
 module.exports = function( vs ){
 
-  // Don't mutate the name variable when 'input:name:isComplete' is true.
-  // This is the case when the user has typed a comma, so we can assume
-  // that the 'name' part of the query is now complete.
-  if( vs.var('input:name:isComplete').get() ){
-    // return the view rendered using the original vars
-    return peliasQuery.view.phrase( vs );
-  }
+  // get a copy of the *complete* tokens produced from the input:name
+  var tokens = vs.var('input:name:tokens_complete').get();
+
+  // no valid tokens to use, fail now, don't render this view.
+  if( !tokens || tokens.length < 1 ){ return null; }
 
   // make a copy Vars so we don't mutate the original
   var vsCopy = new peliasQuery.Vars( vs.export() );
 
-  // get the input 'name' variable and split in to tokens
-  var name = vs.var('input:name').get(),
-      tokens = name.split(' ');
-
-  // single token only, abort (we don't want the *last* token)
-  // return null here will completely disable the view.
-  if( tokens.length < 2 ){ return null; }
-
   // set the 'name' variable in the copy to all but the last token
-  vsCopy.var('input:name').set( name.substr( 0, name.lastIndexOf(' ') ) );
+  vsCopy.var('input:name').set( tokens.join(' ') );
 
   // return the view rendered using the copy
   return peliasQuery.view.phrase( vsCopy );

diff --git a/query/view/pop_subquery.js b/query/view/pop_subquery.js
@@ -0,0 +1,17 @@
+
+var peliasQuery = require('pelias-query'),
+    check = require('check-types');
+
+/**
+  Population / Popularity subquery
+**/
+
+module.exports = function( vs ){
+
+  var view = peliasQuery.view.ngrams( vs );
+
+  view.match['name.default'].analyzer = vs.var('phrase:analyzer');
+  delete view.match['name.default'].boost;
+
+  return view;
+};