export more data for the tokenizer name preparation

Adds class, type, country and rank to the exported information and removes the rather odd hack for countries. Whether a place represents a country boundary can now be computed by the tokenizer.
2026-03-09 11:34:07 +00:00 · 2021-09-29 11:54:14 +02:00
parent 231250f2eb
commit be65c8303f
7 changed files with 85 additions and 45 deletions
--- a/nominatim/indexer/place_info.py
+++ b/nominatim/indexer/place_info.py
@@ -38,7 +38,31 @@ class PlaceInfo:


    @property
-    def country_feature(self):
-        """ Return the country code if the place is a valid country boundary.
+    def country_code(self):
+        """ The country code of the country the place is in. Guaranteed
+            to be a two-letter lower-case string or None, if no country
+            could be found.
        """
-        return self._info.get('country_feature')
+        return self._info.get('country_code')
+
+
+    @property
+    def rank_address(self):
+        """ The computed rank address before rank correction.
+        """
+        return self._info.get('rank_address')
+
+
+    def is_a(self, key, value):
+        """ Check if the place's primary tag corresponds to the given
+            key and value.
+        """
+        return self._info.get('class') == key and self._info.get('type') == value
+
+
+    def is_country(self):
+        """ Check if the place is a valid country boundary.
+        """
+        return self.rank_address == 4 \
+               and self.is_a('boundary', 'administrative') \
+               and self.country_code is not None
--- a/nominatim/indexer/runners.py
+++ b/nominatim/indexer/runners.py
@@ -39,7 +39,7 @@ class AbstractPlacexRunner:

    @staticmethod
    def get_place_details(worker, ids):
-        worker.perform("""SELECT place_id, (placex_prepare_update(placex)).*
+        worker.perform("""SELECT place_id, (placex_indexing_prepare(placex)).*
                          FROM placex WHERE place_id IN %s""",
                       (tuple((p[0] for p in ids)), ))

--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -397,9 +397,8 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):

            token_info.add_names(fulls, partials)

-            country_feature = place.country_feature
-            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
-                self.add_country_names(country_feature.lower(), names)
+            if place.is_country():
+                self.add_country_names(place.country_code, names)

        address = place.address
        if address:
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -410,9 +410,8 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
        if names:
            token_info.add_names(self.conn, names)

-            country_feature = place.country_feature
-            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
-                self.add_country_names(country_feature.lower(), names)
+            if place.is_country():
+                self.add_country_names(place.country_code, names)

        address = place.address
        if address: