Nominatim/nominatim/tokenizer/legacy_tokenizer.py

"""
Tokenizer implementing normalisation as used before Nominatim 4.
"""
from nominatim.db.connection import connect
from nominatim.db import properties

DBCFG_NORMALIZATION = "tokenizer_normalization"

def create(dsn, data_dir):
    """ Create a new instance of the tokenizer provided by this module.
    """
    return LegacyTokenizer(dsn, data_dir)

class LegacyTokenizer:
    """ The legacy tokenizer uses a special PostgreSQL module to normalize
        names and queries. The tokenizer thus implements normalization through
        calls to the database.
    """

    def __init__(self, dsn, data_dir):
        self.dsn = dsn
        self.data_dir = data_dir
        self.normalization = None


    def init_new_db(self, config):
        """ Set up a new tokenizer for the database.

            This copies all necessary data in the project directory to make
            sure the tokenizer remains stable even over updates.
        """
        self.normalization = config.TERM_NORMALIZATION

        # Stable configuration is saved in the database.
        with connect(self.dsn) as conn:
            properties.set_property(conn, DBCFG_NORMALIZATION,
                                    self.normalization)


    def init_from_project(self):
        """ Initialise the tokenizer from the project directory.
        """
        with connect(self.dsn) as conn:
            self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)