mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
introduce name analyzer
The name analyzer is the actual work horse of the tokenizer. It is instantiated on a thread-base and provides all functions for analysing names and queries.
This commit is contained in:
@@ -124,8 +124,9 @@ class Indexer:
|
||||
LOG.warning("Starting indexing boundaries using %s threads",
|
||||
self.num_threads)
|
||||
|
||||
for rank in range(max(minrank, 4), min(maxrank, 26)):
|
||||
self._index(runners.BoundaryRunner(rank))
|
||||
with self.tokenizer.name_analyzer() as analyzer:
|
||||
for rank in range(max(minrank, 4), min(maxrank, 26)):
|
||||
self._index(runners.BoundaryRunner(rank, analyzer))
|
||||
|
||||
def index_by_rank(self, minrank, maxrank):
|
||||
""" Index all entries of placex in the given rank range (inclusive)
|
||||
@@ -138,15 +139,16 @@ class Indexer:
|
||||
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
|
||||
minrank, maxrank, self.num_threads)
|
||||
|
||||
for rank in range(max(1, minrank), maxrank):
|
||||
self._index(runners.RankRunner(rank))
|
||||
with self.tokenizer.name_analyzer() as analyzer:
|
||||
for rank in range(max(1, minrank), maxrank):
|
||||
self._index(runners.RankRunner(rank, analyzer))
|
||||
|
||||
if maxrank == 30:
|
||||
self._index(runners.RankRunner(0))
|
||||
self._index(runners.InterpolationRunner(), 20)
|
||||
self._index(runners.RankRunner(30), 20)
|
||||
else:
|
||||
self._index(runners.RankRunner(maxrank))
|
||||
if maxrank == 30:
|
||||
self._index(runners.RankRunner(0, analyzer))
|
||||
self._index(runners.InterpolationRunner(), 20)
|
||||
self._index(runners.RankRunner(30, analyzer), 20)
|
||||
else:
|
||||
self._index(runners.RankRunner(maxrank, analyzer))
|
||||
|
||||
|
||||
def index_postcodes(self):
|
||||
|
||||
@@ -2,6 +2,10 @@
|
||||
Mix-ins that provide the actual commands for the indexer for various indexing
|
||||
tasks.
|
||||
"""
|
||||
import functools
|
||||
|
||||
import psycopg2.extras
|
||||
|
||||
# pylint: disable=C0111
|
||||
|
||||
class AbstractPlacexRunner:
|
||||
@@ -9,28 +13,26 @@ class AbstractPlacexRunner:
|
||||
"""
|
||||
SELECT_SQL = 'SELECT place_id, (placex_prepare_update(placex)).* FROM placex'
|
||||
|
||||
def __init__(self, rank):
|
||||
def __init__(self, rank, analyzer):
|
||||
self.rank = rank
|
||||
self._sql_terms = 0
|
||||
self._cached_index_sql = None
|
||||
self.analyzer = analyzer
|
||||
|
||||
def _index_sql(self, num_places):
|
||||
if num_places != self._sql_terms:
|
||||
self._cached_index_sql = \
|
||||
""" UPDATE placex
|
||||
SET indexed_status = 0, address = v.addr
|
||||
FROM (VALUES {}) as v(id, addr)
|
||||
WHERE place_id = v.id
|
||||
""".format(','.join(["(%s, %s::hstore)"] * num_places))
|
||||
self._sql_terms = num_places
|
||||
|
||||
return self._cached_index_sql
|
||||
@staticmethod
|
||||
@functools.lru_cache(maxsize=1)
|
||||
def _index_sql(num_places):
|
||||
return """ UPDATE placex
|
||||
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
||||
FROM (VALUES {}) as v(id, addr, ti)
|
||||
WHERE place_id = v.id
|
||||
""".format(','.join(["(%s, %s::hstore, %s::json)"] * num_places))
|
||||
|
||||
|
||||
def index_places(self, worker, places):
|
||||
values = []
|
||||
for place in places:
|
||||
values.extend((place[x] for x in ('place_id', 'address')))
|
||||
values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
|
||||
|
||||
worker.perform(self._index_sql(len(places)), values)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user