Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix use of category parameter for search() function #3262

Merged
merged 4 commits into from
Nov 28, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 28 additions & 21 deletions nominatim/api/search/db_search_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,29 +89,31 @@ def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]:
if sdata is None:
return

categories = self.get_search_categories(assignment)
near_items = self.get_near_items(assignment)
if near_items is not None and not near_items:
return # impossible compbination of near items and category parameter

if assignment.name is None:
if categories and not sdata.postcodes:
sdata.qualifiers = categories
categories = None
if near_items and not sdata.postcodes:
sdata.qualifiers = near_items
near_items = None
builder = self.build_poi_search(sdata)
elif assignment.housenumber:
hnr_tokens = self.query.get_tokens(assignment.housenumber,
TokenType.HOUSENUMBER)
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
else:
builder = self.build_special_search(sdata, assignment.address,
bool(categories))
bool(near_items))
else:
builder = self.build_name_search(sdata, assignment.name, assignment.address,
bool(categories))
bool(near_items))

if categories:
penalty = min(categories.penalties)
categories.penalties = [p - penalty for p in categories.penalties]
if near_items:
penalty = min(near_items.penalties)
near_items.penalties = [p - penalty for p in near_items.penalties]
for search in builder:
yield dbs.NearSearch(penalty + assignment.penalty, categories, search)
yield dbs.NearSearch(penalty + assignment.penalty, near_items, search)
else:
for search in builder:
search.penalty += assignment.penalty
Expand Down Expand Up @@ -321,8 +323,15 @@ def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchDat
self.query.get_tokens(assignment.postcode,
TokenType.POSTCODE))
if assignment.qualifier:
sdata.set_qualifiers(self.query.get_tokens(assignment.qualifier,
TokenType.QUALIFIER))
tokens = self.query.get_tokens(assignment.qualifier, TokenType.QUALIFIER)
if self.details.categories:
tokens = [t for t in tokens if t.get_category() in self.details.categories]
if not tokens:
return None
sdata.set_qualifiers(tokens)
elif self.details.categories:
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))

if assignment.address:
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
Expand All @@ -332,25 +341,23 @@ def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchDat
return sdata


def get_search_categories(self,
assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
""" Collect tokens for category search or use the categories
def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
""" Collect tokens for near items search or use the categories
requested per parameter.
Returns None if no category search is requested.
"""
if assignment.category:
if assignment.near_item:
tokens: Dict[Tuple[str, str], float] = {}
for t in self.query.get_tokens(assignment.category, TokenType.CATEGORY):
for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
cat = t.get_category()
# The category of a near search will be that of near_item.
# Thus, if search is restricted to a category parameter,
# the two sets must intersect.
if (not self.details.categories or cat in self.details.categories)\
and t.penalty < tokens.get(cat, 1000.0):
tokens[cat] = t.penalty
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))

if self.details.categories:
return dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))

return None


Expand Down
13 changes: 9 additions & 4 deletions nominatim/api/search/db_searches.py
Original file line number Diff line number Diff line change
Expand Up @@ -766,9 +766,6 @@ async def lookup(self, conn: SearchConnection,
assert result
result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = row.accuracy
if not details.excluded or not result.place_id in details.excluded:
results.append(result)

if self.housenumbers and row.rank_address < 30:
if row.placex_hnr:
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
Expand All @@ -788,6 +785,14 @@ async def lookup(self, conn: SearchConnection,
sub.accuracy += 0.6
results.append(sub)

result.accuracy += 1.0 # penalty for missing housenumber
# Only add the street as a result, if it meets all other
# filter conditions.
if (not details.excluded or result.place_id not in details.excluded)\
and (not self.qualifiers or result.category in self.qualifiers.values)\
and result.rank_address >= details.min_rank:
result.accuracy += 1.0 # penalty for missing housenumber
results.append(result)
else:
results.append(result)

return results
4 changes: 2 additions & 2 deletions nominatim/api/search/icu_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,13 +184,13 @@ async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
if row.type == 'S':
if row.info['op'] in ('in', 'near'):
if trange.start == 0:
query.add_token(trange, qmod.TokenType.CATEGORY, token)
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.CATEGORY, token)
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)

Expand Down
8 changes: 4 additions & 4 deletions nominatim/api/search/legacy_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,15 +107,15 @@ async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
for row in await self.lookup_in_db(lookup_words):
for trange in words[row.word_token.strip()]:
token, ttype = self.make_token(row)
if ttype == qmod.TokenType.CATEGORY:
if ttype == qmod.TokenType.NEAR_ITEM:
if trange.start == 0:
query.add_token(trange, qmod.TokenType.CATEGORY, token)
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype == qmod.TokenType.QUALIFIER:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.CATEGORY, token)
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
query.add_token(trange, ttype, token)

Expand Down Expand Up @@ -195,7 +195,7 @@ def make_token(self, row: SaRow) -> Tuple[LegacyToken, qmod.TokenType]:
ttype = qmod.TokenType.POSTCODE
lookup_word = row.word_token[1:]
else:
ttype = qmod.TokenType.CATEGORY if row.operator in ('in', 'near')\
ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
else qmod.TokenType.QUALIFIER
lookup_word = row.word
elif row.word_token.startswith(' '):
Expand Down
4 changes: 2 additions & 2 deletions nominatim/api/search/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class TokenType(enum.Enum):
""" Country name or reference. """
QUALIFIER = enum.auto()
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
CATEGORY = enum.auto()
NEAR_ITEM = enum.auto()
""" Special term used as searchable object(e.g. supermarket in ...). """


Expand Down Expand Up @@ -78,7 +78,7 @@ def compatible_with(self, ttype: TokenType,
return not is_full_phrase or ttype != TokenType.QUALIFIER
if self == PhraseType.AMENITY:
return ttype in (TokenType.WORD, TokenType.PARTIAL)\
or (is_full_phrase and ttype == TokenType.CATEGORY)\
or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
or (not is_full_phrase and ttype == TokenType.QUALIFIER)
if self == PhraseType.STREET:
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
Expand Down
20 changes: 10 additions & 10 deletions nominatim/api/search/token_assignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
housenumber: Optional[qmod.TokenRange] = None
postcode: Optional[qmod.TokenRange] = None
country: Optional[qmod.TokenRange] = None
category: Optional[qmod.TokenRange] = None
near_item: Optional[qmod.TokenRange] = None
qualifier: Optional[qmod.TokenRange] = None


Expand All @@ -64,8 +64,8 @@ def from_ranges(ranges: TypedRangeSeq) -> 'TokenAssignment':
out.postcode = token.trange
elif token.ttype == qmod.TokenType.COUNTRY:
out.country = token.trange
elif token.ttype == qmod.TokenType.CATEGORY:
out.category = token.trange
elif token.ttype == qmod.TokenType.NEAR_ITEM:
out.near_item = token.trange
elif token.ttype == qmod.TokenType.QUALIFIER:
out.qualifier = token.trange
return out
Expand Down Expand Up @@ -109,7 +109,7 @@ def is_final(self) -> bool:
"""
# Country and category must be the final term for left-to-right
return len(self.seq) > 1 and \
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY)
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)


def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
Expand Down Expand Up @@ -165,22 +165,22 @@ def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
if ttype == qmod.TokenType.COUNTRY:
return None if self.direction == -1 else 1

if ttype == qmod.TokenType.CATEGORY:
if ttype == qmod.TokenType.NEAR_ITEM:
return self.direction

if ttype == qmod.TokenType.QUALIFIER:
if self.direction == 1:
if (len(self.seq) == 1
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
or (len(self.seq) == 2
and self.seq[0].ttype == qmod.TokenType.CATEGORY
and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
return 1
return None
if self.direction == -1:
return -1

tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
if len(tempseq) == 0:
return 1
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
Expand Down Expand Up @@ -253,7 +253,7 @@ def recheck_sequence(self) -> bool:
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1):
return False
if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq):
if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
self.penalty += 1.0

return True
Expand Down Expand Up @@ -368,7 +368,7 @@ def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:

# Postcode or country-only search
if not base.address:
if not base.housenumber and (base.postcode or base.country or base.category):
if not base.housenumber and (base.postcode or base.country or base.near_item):
log().comment('postcode/country search')
yield dataclasses.replace(base, penalty=self.penalty)
else:
Expand Down
10 changes: 5 additions & 5 deletions test/python/api/search/test_api_search_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,11 +106,11 @@ def test_query_struct_amenity_single_word():
q.add_node(query.BreakType.END, query.PhraseType.NONE)

q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
q.add_token(query.TokenRange(0, 1), query.TokenType.CATEGORY, mktoken(2))
q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2))
q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3))

assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.CATEGORY)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0


Expand All @@ -121,14 +121,14 @@ def test_query_struct_amenity_two_words():

for trange in [(0, 1), (1, 2)]:
q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1))
q.add_token(query.TokenRange(*trange), query.TokenType.CATEGORY, mktoken(2))
q.add_token(query.TokenRange(*trange), query.TokenType.NEAR_ITEM, mktoken(2))
q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3))

assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.CATEGORY)) == 0
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 0
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1

assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.CATEGORY)) == 0
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.NEAR_ITEM)) == 0
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1

Loading