Skip to content

Commit

Permalink
Merge branch 'osm-search:master' into no-superuser-flag
Browse files Browse the repository at this point in the history
  • Loading branch information
robbe-haesendonck authored Dec 4, 2023
2 parents afe90a9 + 8a2c606 commit 3efd1eb
Show file tree
Hide file tree
Showing 14 changed files with 310 additions and 143 deletions.
94 changes: 55 additions & 39 deletions nominatim/api/search/db_search_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""
Convertion from token assignment to an abstract DB search.
"""
from typing import Optional, List, Tuple, Iterator
from typing import Optional, List, Tuple, Iterator, Dict
import heapq

from nominatim.api.types import SearchDetails, DataLayer
Expand Down Expand Up @@ -89,29 +89,34 @@ def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]:
if sdata is None:
return

categories = self.get_search_categories(assignment)
near_items = self.get_near_items(assignment)
if near_items is not None and not near_items:
return # impossible compbination of near items and category parameter

if assignment.name is None:
if categories and not sdata.postcodes:
sdata.qualifiers = categories
categories = None
if near_items and not sdata.postcodes:
sdata.qualifiers = near_items
near_items = None
builder = self.build_poi_search(sdata)
elif assignment.housenumber:
hnr_tokens = self.query.get_tokens(assignment.housenumber,
TokenType.HOUSENUMBER)
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
else:
builder = self.build_special_search(sdata, assignment.address,
bool(categories))
bool(near_items))
else:
builder = self.build_name_search(sdata, assignment.name, assignment.address,
bool(categories))
bool(near_items))

if categories:
penalty = min(categories.penalties)
categories.penalties = [p - penalty for p in categories.penalties]
if near_items:
penalty = min(near_items.penalties)
near_items.penalties = [p - penalty for p in near_items.penalties]
for search in builder:
yield dbs.NearSearch(penalty + assignment.penalty, categories, search)
search_penalty = search.penalty
search.penalty = 0.0
yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
near_items, search)
else:
for search in builder:
search.penalty += assignment.penalty
Expand Down Expand Up @@ -158,11 +163,15 @@ def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
housenumber is the main name token.
"""
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')]
expected_count = sum(t.count for t in hnrs)

partials = [t for trange in address
for t in self.query.get_partials_list(trange)]

if len(partials) != 1 or partials[0].count < 10000:
if expected_count < 8000:
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
[t.token for t in partials], 'restrict'))
elif len(partials) != 1 or partials[0].count < 10000:
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
[t.token for t in partials], 'lookup_all'))
else:
Expand All @@ -173,7 +182,7 @@ def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
'lookup_any'))

sdata.housenumbers = dbf.WeightedStrings([], [])
yield dbs.PlaceSearch(0.05, sdata, sum(t.count for t in hnrs))
yield dbs.PlaceSearch(0.05, sdata, expected_count)


def build_name_search(self, sdata: dbf.SearchData,
Expand Down Expand Up @@ -214,16 +223,17 @@ def yield_lookups(self, name: TokenRange, address: List[TokenRange])\

# Partial term to frequent. Try looking up by rare full names first.
name_fulls = self.query.get_tokens(name, TokenType.WORD)
fulls_count = sum(t.count for t in name_fulls)
# At this point drop unindexed partials from the address.
# This might yield wrong results, nothing we can do about that.
if not partials_indexed:
addr_tokens = [t.token for t in addr_partials if t.is_indexed]
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
# Any of the full names applies with all of the partials from the address
yield penalty, fulls_count / (2**len(addr_partials)),\
dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
'restrict' if fulls_count < 10000 else 'lookup_all')
if name_fulls:
fulls_count = sum(t.count for t in name_fulls)
# At this point drop unindexed partials from the address.
# This might yield wrong results, nothing we can do about that.
if not partials_indexed:
addr_tokens = [t.token for t in addr_partials if t.is_indexed]
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
# Any of the full names applies with all of the partials from the address
yield penalty, fulls_count / (2**len(addr_partials)),\
dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
'restrict' if fulls_count < 10000 else 'lookup_all')

# To catch remaining results, lookup by name and address
# We only do this if there is a reasonable number of results expected.
Expand Down Expand Up @@ -321,8 +331,15 @@ def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchDat
self.query.get_tokens(assignment.postcode,
TokenType.POSTCODE))
if assignment.qualifier:
sdata.set_qualifiers(self.query.get_tokens(assignment.qualifier,
TokenType.QUALIFIER))
tokens = self.query.get_tokens(assignment.qualifier, TokenType.QUALIFIER)
if self.details.categories:
tokens = [t for t in tokens if t.get_category() in self.details.categories]
if not tokens:
return None
sdata.set_qualifiers(tokens)
elif self.details.categories:
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))

if assignment.address:
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
Expand All @@ -332,23 +349,22 @@ def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchDat
return sdata


def get_search_categories(self,
assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
""" Collect tokens for category search or use the categories
def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
""" Collect tokens for near items search or use the categories
requested per parameter.
Returns None if no category search is requested.
"""
if assignment.category:
tokens = [t for t in self.query.get_tokens(assignment.category,
TokenType.CATEGORY)
if not self.details.categories
or t.get_category() in self.details.categories]
return dbf.WeightedCategories([t.get_category() for t in tokens],
[t.penalty for t in tokens])

if self.details.categories:
return dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))
if assignment.near_item:
tokens: Dict[Tuple[str, str], float] = {}
for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
cat = t.get_category()
# The category of a near search will be that of near_item.
# Thus, if search is restricted to a category parameter,
# the two sets must intersect.
if (not self.details.categories or cat in self.details.categories)\
and t.penalty < tokens.get(cat, 1000.0):
tokens[cat] = t.penalty
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))

return None

Expand Down
15 changes: 11 additions & 4 deletions nominatim/api/search/db_search_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
"""
Data structures for more complex fields in abstract search descriptions.
"""
from typing import List, Tuple, Iterator, cast
from typing import List, Tuple, Iterator, cast, Dict
import dataclasses

import sqlalchemy as sa
Expand Down Expand Up @@ -195,10 +195,17 @@ def set_qualifiers(self, tokens: List[Token]) -> None:
""" Set the qulaifier field from the given tokens.
"""
if tokens:
min_penalty = min(t.penalty for t in tokens)
categories: Dict[Tuple[str, str], float] = {}
min_penalty = 1000.0
for t in tokens:
if t.penalty < min_penalty:
min_penalty = t.penalty
cat = t.get_category()
if t.penalty < categories.get(cat, 1000.0):
categories[cat] = t.penalty
self.penalty += min_penalty
self.qualifiers = WeightedCategories([t.get_category() for t in tokens],
[t.penalty - min_penalty for t in tokens])
self.qualifiers = WeightedCategories(list(categories.keys()),
list(categories.values()))


def set_ranking(self, rankings: List[FieldRanking]) -> None:
Expand Down
82 changes: 52 additions & 30 deletions nominatim/api/search/db_searches.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def _select_placex(t: SaFromClause) -> SaSelect:
t.c.class_, t.c.type,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.importance, t.c.wikipedia,
t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
t.c.linked_place_id, t.c.admin_level,
t.c.centroid,
Expand Down Expand Up @@ -158,7 +158,8 @@ async def _get_placex_housenumbers(conn: SearchConnection,
place_ids: List[int],
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
t = conn.t.placex
sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
sql = _select_placex(t).add_columns(t.c.importance)\
.where(t.c.place_id.in_(place_ids))

if details.geometry_output:
sql = _add_geometry_columns(sql, t.c.geometry, details)
Expand Down Expand Up @@ -255,9 +256,20 @@ async def lookup(self, conn: SearchConnection,

base.sort(key=lambda r: (r.accuracy, r.rank_search))
max_accuracy = base[0].accuracy + 0.5
if base[0].rank_address == 0:
min_rank = 0
max_rank = 0
elif base[0].rank_address < 26:
min_rank = 1
max_rank = min(25, base[0].rank_address + 4)
else:
min_rank = 26
max_rank = 30
base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
and r.accuracy <= max_accuracy
and r.bbox and r.bbox.area < 20)
and r.bbox and r.bbox.area < 20
and r.rank_address >= min_rank
and r.rank_address <= max_rank)

if base:
baseids = [b.place_id for b in base[:5] if b.place_id]
Expand All @@ -279,28 +291,37 @@ async def lookup_category(self, results: nres.SearchResults,
"""
table = await conn.get_class_table(*category)

t = conn.t.placex
tgeom = conn.t.placex.alias('pgeom')

sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
.where(t.c.class_ == category[0])\
.where(t.c.type == category[1])

if table is None:
# No classtype table available, do a simplified lookup in placex.
sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\
.order_by(tgeom.c.centroid.ST_Distance(t.c.centroid))
table = conn.t.placex.alias('inner')
sql = sa.select(table.c.place_id,
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
.label('dist'))\
.join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
.where(table.c.class_ == category[0])\
.where(table.c.type == category[1])
else:
# Use classtype table. We can afford to use a larger
# radius for the lookup.
sql = sql.join(table, t.c.place_id == table.c.place_id)\
.join(tgeom,
table.c.centroid.ST_CoveredBy(
sa.case((sa.and_(tgeom.c.rank_address < 9,
sql = sa.select(table.c.place_id,
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
.label('dist'))\
.join(tgeom,
table.c.centroid.ST_CoveredBy(
sa.case((sa.and_(tgeom.c.rank_address > 9,
tgeom.c.geometry.is_area()),
tgeom.c.geometry),
else_ = tgeom.c.centroid.ST_Expand(0.05))))\
.order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
tgeom.c.geometry),
else_ = tgeom.c.centroid.ST_Expand(0.05))))

inner = sql.where(tgeom.c.place_id.in_(ids))\
.group_by(table.c.place_id).subquery()

t = conn.t.placex
sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
.join(inner, inner.c.place_id == t.c.place_id)\
.order_by(inner.c.dist)

sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
if details.countries:
Expand Down Expand Up @@ -342,6 +363,8 @@ async def lookup(self, conn: SearchConnection,
# simply search in placex table
def _base_query() -> SaSelect:
return _select_placex(t) \
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance'))\
.where(t.c.linked_place_id == None) \
.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
Expand Down Expand Up @@ -370,6 +393,7 @@ def _base_query() -> SaSelect:
table = await conn.get_class_table(*category)
if table is not None:
sql = _select_placex(t)\
.add_columns(t.c.importance)\
.join(table, t.c.place_id == table.c.place_id)\
.where(t.c.class_ == category[0])\
.where(t.c.type == category[1])
Expand Down Expand Up @@ -415,6 +439,7 @@ async def lookup(self, conn: SearchConnection,

ccodes = self.countries.values
sql = _select_placex(t)\
.add_columns(t.c.importance)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)

Expand Down Expand Up @@ -591,15 +616,7 @@ async def lookup(self, conn: SearchConnection,
tsearch = conn.t.search_name

sql: SaLambdaSelect = sa.lambda_stmt(lambda:
sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type,
t.c.address, t.c.extratags, t.c.admin_level,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
t.c.centroid,
t.c.geometry.ST_Expand(0).label('bbox'))
.where(t.c.place_id == tsearch.c.place_id))
_select_placex(t).where(t.c.place_id == tsearch.c.place_id))


if details.geometry_output:
Expand Down Expand Up @@ -749,9 +766,6 @@ async def lookup(self, conn: SearchConnection,
assert result
result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = row.accuracy
if not details.excluded or not result.place_id in details.excluded:
results.append(result)

if self.housenumbers and row.rank_address < 30:
if row.placex_hnr:
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
Expand All @@ -771,6 +785,14 @@ async def lookup(self, conn: SearchConnection,
sub.accuracy += 0.6
results.append(sub)

result.accuracy += 1.0 # penalty for missing housenumber
# Only add the street as a result, if it meets all other
# filter conditions.
if (not details.excluded or result.place_id not in details.excluded)\
and (not self.qualifiers or result.category in self.qualifiers.values)\
and result.rank_address >= details.min_rank:
result.accuracy += 1.0 # penalty for missing housenumber
results.append(result)
else:
results.append(result)

return results
9 changes: 6 additions & 3 deletions nominatim/api/search/geocoder.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ async def execute_searches(self, query: QueryStruct,

end_time = dt.datetime.now() + self.timeout

min_ranking = 1000.0
min_ranking = searches[0].penalty + 2.0
prev_penalty = 0.0
for i, search in enumerate(searches):
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
Expand All @@ -94,7 +94,7 @@ async def execute_searches(self, query: QueryStruct,
prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
else:
results[rhash] = result
min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
min_ranking = min(min_ranking, result.accuracy * 1.2)
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
prev_penalty = search.penalty
if dt.datetime.now() >= end_time:
Expand Down Expand Up @@ -134,7 +134,10 @@ def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
return

for result in results:
if not result.display_name:
# Negative importance indicates ordering by distance, which is
# more important than word matching.
if not result.display_name\
or (result.importance is not None and result.importance < 0):
continue
distance = 0.0
norm = self.query_analyzer.normalize_text(result.display_name)
Expand Down
Loading

0 comments on commit 3efd1eb

Please sign in to comment.