introduce tokenizer modules

This adds the boilerplate for selecting configurable tokenizers.
A tokenizer can be chosen at import time and will then install
itself such that it is fixed for the given database import even
when the software itself is updated.

The legacy tokenizer implements Nominatim's traditional algorithms.
This commit is contained in:
Sarah Hoffmann
2021-04-21 09:57:17 +02:00
parent 5c7b9ef909
commit af968d4903
10 changed files with 289 additions and 0 deletions

View File

@@ -56,6 +56,7 @@ class SetupAll:
from ..tools import refresh
from ..indexer.indexer import Indexer
from ..tools import postcodes
from ..tokenizer import factory as tokenizer_factory
if args.osm_file and not Path(args.osm_file).is_file():
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
@@ -112,6 +113,10 @@ class SetupAll:
args.data_dir,
args.threads or psutil.cpu_count() or 1)
LOG.warning("Setting up tokenizer")
tokenizer = tokenizer_factory.create_tokenizer(args.config)
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Calculate postcodes')
postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)