Skip to content

Commit

Permalink
Merge pull request #3257 from lonvia/slow-queries
Browse files Browse the repository at this point in the history
Performance tweaks for search
  • Loading branch information
lonvia authored Nov 23, 2023
2 parents 158df6b + 4e4d29f commit 2d54de0
Show file tree
Hide file tree
Showing 5 changed files with 31 additions and 9 deletions.
7 changes: 7 additions & 0 deletions nominatim/api/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,10 @@ def __init__(self) -> None:
self.buffer = io.StringIO()


def _timestamp(self) -> None:
self._write(f'[{dt.datetime.now()}]\n')


def get_buffer(self) -> str:
return self.buffer.getvalue()

Expand All @@ -247,6 +251,7 @@ def function(self, func: str, **kwargs: Any) -> None:


def section(self, heading: str) -> None:
self._timestamp()
self._write(f"\n# {heading}\n\n")


Expand Down Expand Up @@ -283,6 +288,7 @@ def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:


def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
self._timestamp()
self._write(f'{heading}:\n')
total = 0
for rank, res in results:
Expand All @@ -298,6 +304,7 @@ def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:

def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
self._timestamp()
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
self._write(f"| {sqlstr}\n\n")

Expand Down
2 changes: 1 addition & 1 deletion nominatim/api/search/db_search_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,7 +208,7 @@ def yield_lookups(self, name: TokenRange, address: List[TokenRange])\
and all(t.is_indexed for t in addr_partials)
exp_count = min(t.count for t in name_partials) / (2**(len(name_partials) - 1))

if (len(name_partials) > 3 or exp_count < 3000) and partials_indexed:
if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
return

Expand Down
21 changes: 14 additions & 7 deletions nominatim/api/search/db_searches.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,13 @@
#pylint: disable=singleton-comparison,not-callable
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements

def no_index(expr: SaColumn) -> SaColumn:
""" Wrap the given expression, so that the query planner will
refrain from using the expression for index lookup.
"""
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable


def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
""" Create a dictionary from search parameters that can be used
as bind parameter for SQL execute.
Expand Down Expand Up @@ -107,14 +114,14 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
orexpr: List[SaExpression] = []
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
orexpr.append(table.c.rank_address.between(1, 30))
orexpr.append(no_index(table.c.rank_address).between(1, 30))
elif layers & DataLayer.ADDRESS:
orexpr.append(table.c.rank_address.between(1, 29))
orexpr.append(sa.and_(table.c.rank_address == 30,
orexpr.append(no_index(table.c.rank_address).between(1, 29))
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
sa.or_(table.c.housenumber != None,
table.c.address.has_key('addr:housename'))))
elif layers & DataLayer.POI:
orexpr.append(sa.and_(table.c.rank_address == 30,
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
table.c.class_.not_in(('place', 'building'))))

if layers & DataLayer.MANMADE:
Expand All @@ -124,15 +131,15 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
if not layers & DataLayer.NATURAL:
exclude.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
table.c.rank_address == 0))
no_index(table.c.rank_address) == 0))
else:
include = []
if layers & DataLayer.RAILWAY:
include.append('railway')
if layers & DataLayer.NATURAL:
include.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
table.c.rank_address == 0))
no_index(table.c.rank_address) == 0))

if len(orexpr) == 1:
return orexpr[0]
Expand Down Expand Up @@ -295,7 +302,7 @@ async def lookup_category(self, results: nres.SearchResults,
else_ = tgeom.c.centroid.ST_Expand(0.05))))\
.order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))

sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
if details.countries:
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
if details.excluded:
Expand Down
6 changes: 6 additions & 0 deletions nominatim/api/search/icu_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,16 @@ def from_db_row(row: SaRow) -> 'ICUToken':
penalty = 0.0
if row.type == 'w':
penalty = 0.3
elif row.type == 'W':
if len(row.word_token) == 1 and row.word_token == row.word:
penalty = 0.2 if row.word.isdigit() else 0.3
elif row.type == 'H':
penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
if all(not c.isdigit() for c in row.word_token):
penalty += 0.2 * (len(row.word_token) - 1)
elif row.type == 'C':
if len(row.word_token) == 1:
penalty = 0.3

if row.info is None:
lookup_word = row.word
Expand Down
4 changes: 3 additions & 1 deletion nominatim/api/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -538,7 +538,9 @@ def is_impossible(self) -> bool:
or (self.bounded_viewbox
and self.viewbox is not None and self.near is not None
and self.viewbox.contains(self.near))
or self.layers is not None and not self.layers)
or (self.layers is not None and not self.layers)
or (self.max_rank <= 4 and
self.layers is not None and not self.layers & DataLayer.ADDRESS))


def layer_enabled(self, layer: DataLayer) -> bool:
Expand Down

0 comments on commit 2d54de0

Please sign in to comment.