mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-10 12:04:06 +00:00
split code into submodules
This commit is contained in:
102
src/nominatim_db/tokenizer/factory.py
Normal file
102
src/nominatim_db/tokenizer/factory.py
Normal file
@@ -0,0 +1,102 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Functions for creating a tokenizer or initialising the right one for an
|
||||
existing database.
|
||||
|
||||
A tokenizer is something that is bound to the lifetime of a database. It
|
||||
can be chosen and configured before the initial import but then needs to
|
||||
be used consistently when querying and updating the database.
|
||||
|
||||
This module provides the functions to create and configure a new tokenizer
|
||||
as well as instantiating the appropriate tokenizer for updating an existing
|
||||
database.
|
||||
|
||||
A tokenizer usually also includes PHP code for querying. The appropriate PHP
|
||||
normalizer module is installed, when the tokenizer is created.
|
||||
"""
|
||||
from typing import Optional
|
||||
import logging
|
||||
import importlib
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim_core.errors import UsageError
|
||||
from nominatim_core.db import properties
|
||||
from nominatim_core.db.connection import connect
|
||||
from nominatim_core.config import Configuration
|
||||
from ..tokenizer.base import AbstractTokenizer, TokenizerModule
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def _import_tokenizer(name: str) -> TokenizerModule:
|
||||
""" Load the tokenizer.py module from project directory.
|
||||
"""
|
||||
src_file = Path(__file__).parent / (name + '_tokenizer.py')
|
||||
if not src_file.is_file():
|
||||
LOG.fatal("No tokenizer named '%s' available. "
|
||||
"Check the setting of NOMINATIM_TOKENIZER.", name)
|
||||
raise UsageError('Tokenizer not found')
|
||||
|
||||
return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer')
|
||||
|
||||
|
||||
def create_tokenizer(config: Configuration, init_db: bool = True,
|
||||
module_name: Optional[str] = None) -> AbstractTokenizer:
|
||||
""" Create a new tokenizer as defined by the given configuration.
|
||||
|
||||
The tokenizer data and code is copied into the 'tokenizer' directory
|
||||
of the project directory and the tokenizer loaded from its new location.
|
||||
"""
|
||||
if module_name is None:
|
||||
module_name = config.TOKENIZER
|
||||
|
||||
# Create the directory for the tokenizer data
|
||||
assert config.project_dir is not None
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.exists():
|
||||
basedir.mkdir()
|
||||
elif not basedir.is_dir():
|
||||
LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
|
||||
raise UsageError("Tokenizer setup failed.")
|
||||
|
||||
# Import and initialize the tokenizer.
|
||||
tokenizer_module = _import_tokenizer(module_name)
|
||||
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
|
||||
tokenizer.init_new_db(config, init_db=init_db)
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
properties.set_property(conn, 'tokenizer', module_name)
|
||||
|
||||
return tokenizer
|
||||
|
||||
|
||||
def get_tokenizer_for_db(config: Configuration) -> AbstractTokenizer:
|
||||
""" Instantiate a tokenizer for an existing database.
|
||||
|
||||
The function looks up the appropriate tokenizer in the database
|
||||
and initialises it.
|
||||
"""
|
||||
assert config.project_dir is not None
|
||||
basedir = config.project_dir / 'tokenizer'
|
||||
if not basedir.is_dir():
|
||||
# Directory will be repopulated by tokenizer below.
|
||||
basedir.mkdir()
|
||||
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
name = properties.get_property(conn, 'tokenizer')
|
||||
|
||||
if name is None:
|
||||
LOG.fatal("Tokenizer was not set up properly. Database property missing.")
|
||||
raise UsageError('Cannot initialize tokenizer.')
|
||||
|
||||
tokenizer_module = _import_tokenizer(name)
|
||||
|
||||
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
|
||||
tokenizer.init_from_project(config)
|
||||
|
||||
return tokenizer
|
||||
Reference in New Issue
Block a user