-
-
Notifications
You must be signed in to change notification settings - Fork 714
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3122 from miku0/sanitizer-final
Adds sanitizer for Japanese addresses to correspond to block address
- Loading branch information
Showing
6 changed files
with
247 additions
and
11 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
# SPDX-License-Identifier: GPL-3.0-or-later | ||
# | ||
# This file is part of Nominatim. (https://nominatim.org) | ||
# | ||
# Copyright (C) 2022 by the Nominatim developer community. | ||
# For a full list of authors see the git log. | ||
""" | ||
This sanitizer maps OSM data to Japanese block addresses. | ||
It replaces blocknumber and housenumber with housenumber, | ||
and quarter and neighbourhood with place. | ||
""" | ||
|
||
|
||
from typing import Callable | ||
from typing import List, Optional | ||
|
||
from nominatim.tokenizer.sanitizers.base import ProcessInfo | ||
from nominatim.tokenizer.sanitizers.config import SanitizerConfig | ||
from nominatim.data.place_name import PlaceName | ||
|
||
def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]: | ||
"""Set up the sanitizer | ||
""" | ||
return tag_japanese | ||
|
||
def reconbine_housenumber( | ||
new_address: List[PlaceName], | ||
tmp_housenumber: Optional[str], | ||
tmp_blocknumber: Optional[str] | ||
) -> List[PlaceName]: | ||
""" Recombine the tag of housenumber by using housenumber and blocknumber | ||
""" | ||
if tmp_blocknumber and tmp_housenumber: | ||
new_address.append( | ||
PlaceName( | ||
kind='housenumber', | ||
name=f'{tmp_blocknumber}-{tmp_housenumber}', | ||
suffix='' | ||
) | ||
) | ||
elif tmp_blocknumber: | ||
new_address.append( | ||
PlaceName( | ||
kind='housenumber', | ||
name=tmp_blocknumber, | ||
suffix='' | ||
) | ||
) | ||
elif tmp_housenumber: | ||
new_address.append( | ||
PlaceName( | ||
kind='housenumber', | ||
name=tmp_housenumber, | ||
suffix='' | ||
) | ||
) | ||
return new_address | ||
|
||
def reconbine_place( | ||
new_address: List[PlaceName], | ||
tmp_neighbourhood: Optional[str], | ||
tmp_quarter: Optional[str] | ||
) -> List[PlaceName]: | ||
""" Recombine the tag of place by using neighbourhood and quarter | ||
""" | ||
if tmp_neighbourhood and tmp_quarter: | ||
new_address.append( | ||
PlaceName( | ||
kind='place', | ||
name=f'{tmp_quarter}{tmp_neighbourhood}', | ||
suffix='' | ||
) | ||
) | ||
elif tmp_neighbourhood: | ||
new_address.append( | ||
PlaceName( | ||
kind='place', | ||
name=tmp_neighbourhood, | ||
suffix='' | ||
) | ||
) | ||
elif tmp_quarter: | ||
new_address.append( | ||
PlaceName( | ||
kind='place', | ||
name=tmp_quarter, | ||
suffix='' | ||
) | ||
) | ||
return new_address | ||
def tag_japanese(obj: ProcessInfo) -> None: | ||
"""Recombine kind of address | ||
""" | ||
if obj.place.country_code != 'jp': | ||
return | ||
tmp_housenumber = None | ||
tmp_blocknumber = None | ||
tmp_neighbourhood = None | ||
tmp_quarter = None | ||
|
||
new_address = [] | ||
for item in obj.address: | ||
if item.kind == 'housenumber': | ||
tmp_housenumber = item.name | ||
elif item.kind == 'block_number': | ||
tmp_blocknumber = item.name | ||
elif item.kind == 'neighbourhood': | ||
tmp_neighbourhood = item.name | ||
elif item.kind == 'quarter': | ||
tmp_quarter = item.name | ||
else: | ||
new_address.append(item) | ||
|
||
new_address = reconbine_housenumber(new_address, tmp_housenumber, tmp_blocknumber) | ||
new_address = reconbine_place(new_address, tmp_neighbourhood, tmp_quarter) | ||
|
||
obj.address = [item for item in new_address if item.name is not None] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
@DB | ||
Feature: Searches in Japan | ||
Test specifically for searches of Japanese addresses and in Japanese language. | ||
@fail-legacy | ||
Scenario: A block house-number is parented to the neighbourhood | ||
Given the grid with origin JP | ||
| 1 | | | | 2 | | ||
| | 3 | | | | | ||
| | | 9 | | | | ||
| | | | 6 | | | ||
And the places | ||
| osm | class | type | name | geometry | | ||
| W1 | highway | residential | 雉子橋通り | 1,2 | | ||
And the places | ||
| osm | class | type | housenr | addr+block_number | addr+neighbourhood | geometry | | ||
| N3 | amenity | restaurant | 2 | 6 | 2丁目 | 3 | | ||
And the places | ||
| osm | class | type | name | geometry | | ||
| N9 | place | neighbourhood | 2丁目 | 9 | | ||
And the places | ||
| osm | class | type | name | geometry | | ||
| N6 | place | quarter | 加瀬 | 6 | | ||
When importing | ||
Then placex contains | ||
| object | parent_place_id | | ||
| N3 | N9 | | ||
When sending search query "2丁目 6-2" | ||
Then results contain | ||
| osm | | ||
| N3 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
from nominatim.data.place_info import PlaceInfo | ||
from nominatim.data.place_name import PlaceName | ||
from nominatim.tokenizer.place_sanitizer import PlaceSanitizer | ||
from typing import Mapping, Optional, List | ||
import pytest | ||
|
||
class TestTagJapanese: | ||
@pytest.fixture(autouse=True) | ||
def setup_country(self, def_config): | ||
self.config = def_config | ||
|
||
def run_sanitizer_on(self,type, **kwargs): | ||
place = PlaceInfo({ | ||
'address': kwargs, | ||
'country_code': 'jp' | ||
}) | ||
sanitizer_args = {'step': 'tag-japanese'} | ||
_, address = PlaceSanitizer([sanitizer_args], self.config).process_names(place) | ||
tmp_list = [(p.name,p.kind) for p in address] | ||
return sorted(tmp_list) | ||
|
||
def test_on_address(self): | ||
res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz') | ||
assert res == [('bar','ref'),('baz','ref_abc'),('foo','name')] | ||
|
||
def test_housenumber(self): | ||
res = self.run_sanitizer_on('address', housenumber='2') | ||
assert res == [('2','housenumber')] | ||
|
||
def test_blocknumber(self): | ||
res = self.run_sanitizer_on('address', block_number='6') | ||
assert res == [('6','housenumber')] | ||
|
||
def test_neighbourhood(self): | ||
res = self.run_sanitizer_on('address', neighbourhood='8') | ||
assert res == [('8','place')] | ||
|
||
def test_quarter(self): | ||
res = self.run_sanitizer_on('address', quarter='kase') | ||
assert res==[('kase','place')] | ||
|
||
def test_housenumber_blocknumber(self): | ||
res = self.run_sanitizer_on('address', housenumber='2', block_number='6') | ||
assert res == [('6-2','housenumber')] | ||
|
||
def test_quarter_neighbourhood(self): | ||
res = self.run_sanitizer_on('address', quarter='kase', neighbourhood='8') | ||
assert res == [('kase8','place')] | ||
|
||
def test_blocknumber_housenumber_quarter(self): | ||
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase') | ||
assert res == [('6-2','housenumber'),('kase','place')] | ||
|
||
def test_blocknumber_housenumber_quarter_neighbourhood(self): | ||
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', neighbourhood='8') | ||
assert res == [('6-2','housenumber'),('8','place')] | ||
|
||
def test_blocknumber_quarter_neighbourhood(self): | ||
res = self.run_sanitizer_on('address',block_number='6', quarter='kase', neighbourhood='8') | ||
assert res == [('6','housenumber'),('kase8','place')] | ||
|
||
def test_blocknumber_quarter(self): | ||
res = self.run_sanitizer_on('address',block_number='6', quarter='kase') | ||
assert res == [('6','housenumber'),('kase','place')] | ||
|
||
def test_blocknumber_neighbourhood(self): | ||
res = self.run_sanitizer_on('address',block_number='6', neighbourhood='8') | ||
assert res == [('6','housenumber'),('8','place')] | ||
|
||
def test_housenumber_quarter_neighbourhood(self): | ||
res = self.run_sanitizer_on('address',housenumber='2', quarter='kase', neighbourhood='8') | ||
assert res == [('2','housenumber'),('kase8','place')] | ||
|
||
def test_housenumber_quarter(self): | ||
res = self.run_sanitizer_on('address',housenumber='2', quarter='kase') | ||
assert res == [('2','housenumber'),('kase','place')] | ||
|
||
def test_housenumber_blocknumber_neighbourhood_quarter(self): | ||
res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase', neighbourhood='8') | ||
assert res == [('6-2','housenumber'),('kase8','place')] |