From a87fe8d8bf2f4dd784fdb94031bc978b3f28b799 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 22 Nov 2023 17:01:41 +0100 Subject: [PATCH 1/6] exclude country-level searches with non-address layers --- nominatim/api/types.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/nominatim/api/types.py b/nominatim/api/types.py index 3ca023e7b..5767fe160 100644 --- a/nominatim/api/types.py +++ b/nominatim/api/types.py @@ -538,7 +538,9 @@ def is_impossible(self) -> bool: or (self.bounded_viewbox and self.viewbox is not None and self.near is not None and self.viewbox.contains(self.near)) - or self.layers is not None and not self.layers) + or (self.layers is not None and not self.layers) + or (self.max_rank <= 4 and + self.layers is not None and not self.layers & DataLayer.ADDRESS)) def layer_enabled(self, layer: DataLayer) -> bool: From 155f26060d0bd87389a22e357db11fd85df1f9de Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 22 Nov 2023 17:33:17 +0100 Subject: [PATCH 2/6] avoid index on rank_address in near search --- nominatim/api/search/db_searches.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 97c4292e5..62e9a4502 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -24,6 +24,13 @@ #pylint: disable=singleton-comparison,not-callable #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements +def no_index(expr: SaColumn) -> SaColumn: + """ Wrap the given expression, so that the query planner will + refrain from using the expression for index lookup. + """ + return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable + + def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]: """ Create a dictionary from search parameters that can be used as bind parameter for SQL execute. @@ -295,7 +302,7 @@ async def lookup_category(self, results: nres.SearchResults, else_ = tgeom.c.centroid.ST_Expand(0.05))))\ .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid)) - sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM)) + sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM)) if details.countries: sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM)) if details.excluded: From e7dc24c026cb8059493b4a04df8debe9809b311b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 22 Nov 2023 17:38:32 +0100 Subject: [PATCH 3/6] add timestamps to text logging --- nominatim/api/logging.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/nominatim/api/logging.py b/nominatim/api/logging.py index 5b6d0e4db..37ae7f5f0 100644 --- a/nominatim/api/logging.py +++ b/nominatim/api/logging.py @@ -235,6 +235,10 @@ def __init__(self) -> None: self.buffer = io.StringIO() + def _timestamp(self) -> None: + self._write(f'[{dt.datetime.now()}]\n') + + def get_buffer(self) -> str: return self.buffer.getvalue() @@ -247,6 +251,7 @@ def function(self, func: str, **kwargs: Any) -> None: def section(self, heading: str) -> None: + self._timestamp() self._write(f"\n# {heading}\n\n") @@ -283,6 +288,7 @@ def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None: def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None: + self._timestamp() self._write(f'{heading}:\n') total = 0 for rank, res in results: @@ -298,6 +304,7 @@ def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None: def sql(self, conn: AsyncConnection, statement: 'sa.Executable', params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None: + self._timestamp() sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78)) self._write(f"| {sqlstr}\n\n") From ac5ef6470161994850f0088b37f1c441b76bd99e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 22 Nov 2023 20:54:04 +0100 Subject: [PATCH 4/6] avoid index use when filtering by layer --- nominatim/api/search/db_searches.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/nominatim/api/search/db_searches.py b/nominatim/api/search/db_searches.py index 62e9a4502..41434f062 100644 --- a/nominatim/api/search/db_searches.py +++ b/nominatim/api/search/db_searches.py @@ -114,14 +114,14 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause, def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: orexpr: List[SaExpression] = [] if layers & DataLayer.ADDRESS and layers & DataLayer.POI: - orexpr.append(table.c.rank_address.between(1, 30)) + orexpr.append(no_index(table.c.rank_address).between(1, 30)) elif layers & DataLayer.ADDRESS: - orexpr.append(table.c.rank_address.between(1, 29)) - orexpr.append(sa.and_(table.c.rank_address == 30, + orexpr.append(no_index(table.c.rank_address).between(1, 29)) + orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, sa.or_(table.c.housenumber != None, table.c.address.has_key('addr:housename')))) elif layers & DataLayer.POI: - orexpr.append(sa.and_(table.c.rank_address == 30, + orexpr.append(sa.and_(no_index(table.c.rank_address) == 30, table.c.class_.not_in(('place', 'building')))) if layers & DataLayer.MANMADE: @@ -131,7 +131,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: if not layers & DataLayer.NATURAL: exclude.extend(('natural', 'water', 'waterway')) orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)), - table.c.rank_address == 0)) + no_index(table.c.rank_address) == 0)) else: include = [] if layers & DataLayer.RAILWAY: @@ -139,7 +139,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn: if layers & DataLayer.NATURAL: include.extend(('natural', 'water', 'waterway')) orexpr.append(sa.and_(table.c.class_.in_(tuple(include)), - table.c.rank_address == 0)) + no_index(table.c.rank_address) == 0)) if len(orexpr) == 1: return orexpr[0] From 195c13ee8ad7635818f518a3100e7c158747328f Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 22 Nov 2023 23:57:23 +0100 Subject: [PATCH 5/6] more preference for name-only queries in search --- nominatim/api/search/db_search_builder.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nominatim/api/search/db_search_builder.py b/nominatim/api/search/db_search_builder.py index 66e7efaf7..905b5c621 100644 --- a/nominatim/api/search/db_search_builder.py +++ b/nominatim/api/search/db_search_builder.py @@ -208,7 +208,7 @@ def yield_lookups(self, name: TokenRange, address: List[TokenRange])\ and all(t.is_indexed for t in addr_partials) exp_count = min(t.count for t in name_partials) / (2**(len(name_partials) - 1)) - if (len(name_partials) > 3 or exp_count < 3000) and partials_indexed: + if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed: yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens) return From 4e4d29f653d4929f49536255314ec19264166ec6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 23 Nov 2023 10:51:58 +0100 Subject: [PATCH 6/6] increase penalty for one-letter words --- nominatim/api/search/icu_tokenizer.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index b68e8d10e..196fde2a8 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -101,10 +101,16 @@ def from_db_row(row: SaRow) -> 'ICUToken': penalty = 0.0 if row.type == 'w': penalty = 0.3 + elif row.type == 'W': + if len(row.word_token) == 1 and row.word_token == row.word: + penalty = 0.2 if row.word.isdigit() else 0.3 elif row.type == 'H': penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit()) if all(not c.isdigit() for c in row.word_token): penalty += 0.2 * (len(row.word_token) - 1) + elif row.type == 'C': + if len(row.word_token) == 1: + penalty = 0.3 if row.info is None: lookup_word = row.word