add wrapper class for place data passed to tokenizer

This is mostly for convenience and documentation purposes.
2026-03-07 02:24:08 +00:00 · 2021-09-29 10:37:54 +02:00
parent d44a428b74
commit 231250f2eb
9 changed files with 84 additions and 34 deletions
--- a/nominatim/indexer/place_info.py
+++ b/nominatim/indexer/place_info.py
@@ -0,0 +1,44 @@
+"""
+Wrapper around place information the indexer gets from the database and hands to
+the tokenizer.
+"""
+
+import psycopg2.extras
+
+class PlaceInfo:
+    """ Data class containing all information the tokenizer gets about a
+        place it should process the names for.
+    """
+
+    def __init__(self, info):
+        self._info = info
+
+
+    def analyze(self, analyzer):
+        """ Process this place with the given tokenizer and return the
+            result in psycopg2-compatible Json.
+        """
+        return psycopg2.extras.Json(analyzer.process_place(self))
+
+
+    @property
+    def name(self):
+        """ A dictionary with the names of the place or None if the place
+            has no names.
+        """
+        return self._info.get('name')
+
+
+    @property
+    def address(self):
+        """ A dictionary with the address elements of the place
+            or None if no address information is available.
+        """
+        return self._info.get('address')
+
+
+    @property
+    def country_feature(self):
+        """ Return the country code if the place is a valid country boundary.
+        """
+        return self._info.get('country_feature')
--- a/nominatim/indexer/runners.py
+++ b/nominatim/indexer/runners.py
@@ -4,14 +4,16 @@ tasks.
 """
 import functools

-import psycopg2.extras
 from psycopg2 import sql as pysql

+from nominatim.indexer.place_info import PlaceInfo
+
 # pylint: disable=C0111

 def _mk_valuelist(template, num):
    return pysql.SQL(',').join([pysql.SQL(template)] * num)

+
 class AbstractPlacexRunner:
    """ Returns SQL commands for indexing of the placex table.
    """
@@ -47,7 +49,7 @@ class AbstractPlacexRunner:
        for place in places:
            for field in ('place_id', 'name', 'address', 'linked_place_id'):
                values.append(place[field])
-            values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+            values.append(PlaceInfo(place).analyze(self.analyzer))

        worker.perform(self._index_sql(len(places)), values)

@@ -141,7 +143,7 @@ class InterpolationRunner:
        values = []
        for place in places:
            values.extend((place[x] for x in ('place_id', 'address')))
-            values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
+            values.append(PlaceInfo(place).analyze(self.analyzer))

        worker.perform(self._index_sql(len(places)), values)

--- a/nominatim/tokenizer/base.py
+++ b/nominatim/tokenizer/base.py
@@ -6,6 +6,7 @@ from abc import ABC, abstractmethod
 from typing import List, Tuple, Dict, Any

 from nominatim.config import Configuration
+from nominatim.indexer.place_info import PlaceInfo

 # pylint: disable=unnecessary-pass

@@ -105,20 +106,13 @@ class AbstractAnalyzer(ABC):


    @abstractmethod
-    def process_place(self, place: Dict) -> Any:
+    def process_place(self, place: PlaceInfo) -> Any:
        """ Extract tokens for the given place and compute the
            information to be handed to the PL/pgSQL processor for building
            the search index.

            Arguments:
-                place: Dictionary with the information about the place. Currently
-                       the following fields may be present:
-
-                       - *name* is a dictionary of names for the place together
-                         with the designation of the name.
-                       - *address* is a dictionary of address terms.
-                       - *country_feature* is set to a country code when the
-                         place describes a country.
+                place: Place information retrived from the database.

            Returns:
                A JSON-serialisable structure that will be handed into
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -390,18 +390,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
        """
        token_info = _TokenInfo(self._cache)

-        names = place.get('name')
+        names = place.name

        if names:
            fulls, partials = self._compute_name_tokens(names)

            token_info.add_names(fulls, partials)

-            country_feature = place.get('country_feature')
+            country_feature = place.country_feature
            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
                self.add_country_names(country_feature.lower(), names)

-        address = place.get('address')
+        address = place.address
        if address:
            self._process_place_address(token_info, address)

--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -405,16 +405,16 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
        """
        token_info = _TokenInfo(self._cache)

-        names = place.get('name')
+        names = place.name

        if names:
            token_info.add_names(self.conn, names)

-            country_feature = place.get('country_feature')
+            country_feature = place.country_feature
            if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
                self.add_country_names(country_feature.lower(), names)

-        address = place.get('address')
+        address = place.address
        if address:
            self._process_place_address(token_info, address)

--- a/nominatim/tools/tiger_data.py
+++ b/nominatim/tools/tiger_data.py
@@ -7,12 +7,11 @@ import logging
 import os
 import tarfile

-import psycopg2.extras
-
 from nominatim.db.connection import connect
 from nominatim.db.async_connection import WorkerPool
 from nominatim.db.sql_preprocessor import SQLPreprocessor
 from nominatim.errors import UsageError
+from nominatim.indexer.place_info import PlaceInfo

 LOG = logging.getLogger()

@@ -58,7 +57,7 @@ def handle_threaded_sql_statements(pool, fd, analyzer):
            address = dict(street=row['street'], postcode=row['postcode'])
            args = ('SRID=4326;' + row['geometry'],
                    int(row['from']), int(row['to']), row['interpolation'],
-                    psycopg2.extras.Json(analyzer.process_place(dict(address=address))),
+                    PlaceInfo({'address': address}).analyze(analyzer),
                    analyzer.normalize_postcode(row['postcode']))
        except ValueError:
            continue