Merge pull request #3397 from lonvia/improve-handling-unlisted-places

Improve handling addr:place without a place node
2024-05-02 13:46:18 +02:00
parent 9410263485 7fb3ef4633
commit a2cf6db61b
4 changed files with 40 additions and 51 deletions
--- a/lib-sql/functions/placex_triggers.sql
+++ b/lib-sql/functions/placex_triggers.sql
@@ -481,24 +481,20 @@ BEGIN
    name_vector := array_merge(name_vector, hnr_vector);
  END IF;
  IF is_place_addr THEN
    addr_place_ids := token_addr_place_search_tokens(token_info);
    IF not addr_place_ids <@ parent_name_vector THEN
      -- make sure addr:place terms are always searchable
      nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
      -- If there is a housenumber, also add the place name as a name,
      -- so we can search it by the usual housenumber+place algorithms.
      IF hnr_vector is not null THEN
        name_vector := array_merge(name_vector, addr_place_ids);
      END IF;
    END IF;
  END IF;
  -- Cheating here by not recomputing all terms but simply using the ones
  -- from the parent object.
  nameaddress_vector := array_merge(nameaddress_vector, parent_name_vector);
  nameaddress_vector := array_merge(nameaddress_vector, parent_address_vector);
  -- make sure addr:place terms are always searchable
  IF is_place_addr THEN
    addr_place_ids := token_addr_place_search_tokens(token_info);
    IF hnr_vector is not null AND not addr_place_ids <@ parent_name_vector
    THEN
      name_vector := array_merge(name_vector, hnr_vector);
    END IF;
    nameaddress_vector := array_merge(nameaddress_vector, addr_place_ids);
  END IF;
 END;
 $$
 LANGUAGE plpgsql;
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -712,10 +712,11 @@ class ICUNameAnalyzer(AbstractAnalyzer):
                token_info.add_street(self._retrieve_full_tokens(item.name))
            elif item.kind == 'place':
                if not item.suffix:
-                    token_info.add_place(self._compute_partial_tokens(item.name))
+                    token_info.add_place(itertools.chain(*self._compute_name_tokens([item])))
            elif not item.kind.startswith('_') and not item.suffix and \
                 item.kind not in ('country', 'full', 'inclusion'):
-                token_info.add_address_term(item.kind, self._compute_partial_tokens(item.name))
+                token_info.add_address_term(item.kind,
                                            itertools.chain(*self._compute_name_tokens([item])))
    def _compute_housenumber_token(self, hnr: PlaceName) -> Tuple[Optional[int], Optional[str]]:
@@ -756,36 +757,6 @@ class ICUNameAnalyzer(AbstractAnalyzer):
        return result
    def _compute_partial_tokens(self, name: str) -> List[int]:
        """ Normalize the given term, split it into partial words and return
            then token list for them.
        """
        assert self.conn is not None
        norm_name = self._search_normalized(name)
        tokens = []
        need_lookup = []
        for partial in norm_name.split():
            token = self._cache.partials.get(partial)
            if token:
                tokens.append(token)
            else:
                need_lookup.append(partial)
        if need_lookup:
            with self.conn.cursor() as cur:
                cur.execute("""SELECT word, getorcreate_partial_word(word)
                               FROM unnest(%s) word""",
                            (need_lookup, ))
                for partial, token in cur:
                    assert token is not None
                    tokens.append(token)
                    self._cache.partials[partial] = token
        return tokens
    def _retrieve_full_tokens(self, name: str) -> List[int]:
        """ Get the full name token for the given name, if it exists.
            The name is only retrieved for the standard analyser.
@@ -957,8 +928,9 @@ class _TokenInfo:
    def add_address_term(self, key: str, partials: Iterable[int]) -> None:
        """ Add additional address terms.
        """
-        if partials:
+        array = self._mk_array(partials)
-            self.address_tokens[key] = self._mk_array(partials)
+        if len(array) > 2:
            self.address_tokens[key] = array
    def set_postcode(self, postcode: Optional[str]) -> None:
        """ Set the postcode to the given one.
--- a/test/bdd/db/import/addressing.feature
+++ b/test/bdd/db/import/addressing.feature
@@ -542,3 +542,24 @@ Feature: Address computation
            | object | address |
            | W1     | R2      |
    Scenario: Full name is prefered for unlisted addr:place tags
        Given the grid
            |   | 1 | 2 |   |
            | 8 |   |   | 9 |
        And the places
            | osm | class | type | name    | geometry |
            | W10 | place | city | Away    | (8,1,2,9,8) |
        And the places
            | osm | class   | type        | name          | addr+city | geometry |
            | W1  | highway | residential | Royal Terrace | Gardens   | 8,9      |
        And the places
            | osm | class | type  | housenr | addr+place            | geometry | extra+foo |
            | N1  | place | house | 1       | Royal Terrace Gardens | 1        | bar |
        And the places
            | osm | class | type  | housenr | addr+street   | geometry |
            | N2  | place | house | 2       | Royal Terrace | 2        |
        When importing
        When sending search query "1, Royal Terrace Gardens"
        Then results contain
            | ID | osm |
            | 0  | N1  |
--- a/test/python/tokenizer/test_icu.py
+++ b/test/python/tokenizer/test_icu.py
@@ -554,7 +554,7 @@ class TestPlaceAddress:
    def test_process_place_place(self):
        info = self.process_address(place='Honu Lulu')
-        assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
+        assert eval(info['place']) == self.name_token_set('HONU', 'LULU', '#HONU LULU')
    def test_process_place_place_extra(self):
@@ -574,8 +574,8 @@ class TestPlaceAddress:
                                    suburb='Zwickau', street='Hauptstr',
                                    full='right behind the church')
-        city = self.name_token_set('ZWICKAU')
+        city = self.name_token_set('ZWICKAU', '#ZWICKAU')
-        state = self.name_token_set('SACHSEN')
+        state = self.name_token_set('SACHSEN', '#SACHSEN')
        result = {k: eval(v) for k,v in info['addr'].items()}
@@ -587,7 +587,7 @@ class TestPlaceAddress:
        result = {k: eval(v) for k,v in info['addr'].items()}
-        assert result == {'city': self.name_token_set('Bruxelles')}
+        assert result == {'city': self.name_token_set('Bruxelles', '#Bruxelles')}
    def test_process_place_address_terms_empty(self):