Merge pull request #3397 from lonvia/improve-handling-unlisted-places

Improve handling addr:place without a place node
This commit is contained in:
Sarah Hoffmann
2024-05-02 13:46:18 +02:00
committed by GitHub
4 changed files with 40 additions and 51 deletions

View File

@@ -481,24 +481,20 @@ BEGIN
name_vector := array_merge(name_vector, hnr_vector); name_vector := array_merge(name_vector, hnr_vector);
END IF; END IF;
IF is_place_addr THEN
addr_place_ids := token_addr_place_search_tokens(token_info);
IF not addr_place_ids <@ parent_name_vector THEN
-- make sure addr:place terms are always searchable
nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
-- If there is a housenumber, also add the place name as a name,
-- so we can search it by the usual housenumber+place algorithms.
IF hnr_vector is not null THEN
name_vector := array_merge(name_vector, addr_place_ids);
END IF;
END IF;
END IF;
-- Cheating here by not recomputing all terms but simply using the ones -- Cheating here by not recomputing all terms but simply using the ones
-- from the parent object. -- from the parent object.
nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector); nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector); nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
-- make sure addr:place terms are always searchable
IF is_place_addr THEN
addr_place_ids := token_addr_place_search_tokens(token_info);
IF hnr_vector is not null AND not addr_place_ids <@ parent_name_vector
THEN
name_vector := array_merge(name_vector, hnr_vector);
END IF;
nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
END IF;
END; END;
$$ $$
LANGUAGE plpgsql; LANGUAGE plpgsql;

View File

@@ -712,10 +712,11 @@ class ICUNameAnalyzer(AbstractAnalyzer):
token_info.add_street(self._retrieve_full_tokens(item.name)) token_info.add_street(self._retrieve_full_tokens(item.name))
elif item.kind == 'place': elif item.kind == 'place':
if not item.suffix: if not item.suffix:
token_info.add_place(self._compute_partial_tokens(item.name)) token_info.add_place(itertools.chain(*self._compute_name_tokens([item])))
elif not item.kind.startswith('_') and not item.suffix and \ elif not item.kind.startswith('_') and not item.suffix and \
item.kind not in ('country', 'full', 'inclusion'): item.kind not in ('country', 'full', 'inclusion'):
token_info.add_address_term(item.kind, self._compute_partial_tokens(item.name)) token_info.add_address_term(item.kind,
itertools.chain(*self._compute_name_tokens([item])))
def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]: def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]:
@@ -756,36 +757,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
return result return result
def _compute_partial_tokens(self, name: str) -> List[int]:
""" Normalize the given term, split it into partial words and return
then token list for them.
"""
assert self.conn is not None
norm_name = self._search_normalized(name)
tokens = []
need_lookup = []
for partial in norm_name.split():
token = self._cache.partials.get(partial)
if token:
tokens.append(token)
else:
need_lookup.append(partial)
if need_lookup:
with self.conn.cursor() as cur:
cur.execute("""SELECT word, getorcreate_partial_word(word)
FROM unnest(%s) word""",
(need_lookup, ))
for partial, token in cur:
assert token is not None
tokens.append(token)
self._cache.partials[partial] = token
return tokens
def _retrieve_full_tokens(self, name: str) -> List[int]: def _retrieve_full_tokens(self, name: str) -> List[int]:
""" Get the full name token for the given name, if it exists. """ Get the full name token for the given name, if it exists.
The name is only retrieved for the standard analyser. The name is only retrieved for the standard analyser.
@@ -957,8 +928,9 @@ class _TokenInfo:
def add_address_term(self, key: str, partials: Iterable[int]) -> None: def add_address_term(self, key: str, partials: Iterable[int]) -> None:
""" Add additional address terms. """ Add additional address terms.
""" """
if partials: array = self._mk_array(partials)
self.address_tokens[key] = self._mk_array(partials) if len(array) > 2:
self.address_tokens[key] = array
def set_postcode(self, postcode: Optional[str]) -> None: def set_postcode(self, postcode: Optional[str]) -> None:
""" Set the postcode to the given one. """ Set the postcode to the given one.

View File

@@ -542,3 +542,24 @@ Feature: Address computation
| object | address | | object | address |
| W1 | R2 | | W1 | R2 |
Scenario: Full name is prefered for unlisted addr:place tags
Given the grid
| | 1 | 2 | |
| 8 | | | 9 |
And the places
| osm | class | type | name | geometry |
| W10 | place | city | Away | (8,1,2,9,8) |
And the places
| osm | class | type | name | addr+city | geometry |
| W1 | highway | residential | Royal Terrace | Gardens | 8,9 |
And the places
| osm | class | type | housenr | addr+place | geometry | extra+foo |
| N1 | place | house | 1 | Royal Terrace Gardens | 1 | bar |
And the places
| osm | class | type | housenr | addr+street | geometry |
| N2 | place | house | 2 | Royal Terrace | 2 |
When importing
When sending search query "1, Royal Terrace Gardens"
Then results contain
| ID | osm |
| 0 | N1 |

View File

@@ -554,7 +554,7 @@ class TestPlaceAddress:
def test_process_place_place(self): def test_process_place_place(self):
info = self.process_address(place='Honu Lulu') info = self.process_address(place='Honu Lulu')
assert eval(info['place']) == self.name_token_set('HONU', 'LULU') assert eval(info['place']) == self.name_token_set('HONU', 'LULU', '#HONU LULU')
def test_process_place_place_extra(self): def test_process_place_place_extra(self):
@@ -574,8 +574,8 @@ class TestPlaceAddress:
suburb='Zwickau', street='Hauptstr', suburb='Zwickau', street='Hauptstr',
full='right behind the church') full='right behind the church')
city = self.name_token_set('ZWICKAU') city = self.name_token_set('ZWICKAU', '#ZWICKAU')
state = self.name_token_set('SACHSEN') state = self.name_token_set('SACHSEN', '#SACHSEN')
result = {k: eval(v) for k,v in info['addr'].items()} result = {k: eval(v) for k,v in info['addr'].items()}
@@ -587,7 +587,7 @@ class TestPlaceAddress:
result = {k: eval(v) for k,v in info['addr'].items()} result = {k: eval(v) for k,v in info['addr'].items()}
assert result == {'city': self.name_token_set('Bruxelles')} assert result == {'city': self.name_token_set('Bruxelles', '#Bruxelles')}
def test_process_place_address_terms_empty(self): def test_process_place_address_terms_empty(self):