export more data for the tokenizer name preparation

Adds class, type, country and rank to the exported information and removes the rather odd hack for countries. Whether a place represents a country boundary can now be computed by the tokenizer.
2026-03-09 11:34:07 +00:00 · 2021-09-29 11:54:14 +02:00
parent 231250f2eb
commit be65c8303f
7 changed files with 85 additions and 45 deletions
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -397,9 +397,8 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):

            token_info.add_names(fulls, partials)

-            country_feature = place.country_feature
-            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
-                self.add_country_names(country_feature.lower(), names)
+            if place.is_country():
+                self.add_country_names(place.country_code, names)

        address = place.address
        if address:
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -410,9 +410,8 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
        if names:
            token_info.add_names(self.conn, names)

-            country_feature = place.country_feature
-            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
-                self.add_country_names(country_feature.lower(), names)
+            if place.is_country():
+                self.add_country_names(place.country_code, names)

        address = place.address
        if address: