mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-07 02:24:08 +00:00
add wrapper class for place data passed to tokenizer
This is mostly for convenience and documentation purposes.
This commit is contained in:
44
nominatim/indexer/place_info.py
Normal file
44
nominatim/indexer/place_info.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Wrapper around place information the indexer gets from the database and hands to
|
||||
the tokenizer.
|
||||
"""
|
||||
|
||||
import psycopg2.extras
|
||||
|
||||
class PlaceInfo:
|
||||
""" Data class containing all information the tokenizer gets about a
|
||||
place it should process the names for.
|
||||
"""
|
||||
|
||||
def __init__(self, info):
|
||||
self._info = info
|
||||
|
||||
|
||||
def analyze(self, analyzer):
|
||||
""" Process this place with the given tokenizer and return the
|
||||
result in psycopg2-compatible Json.
|
||||
"""
|
||||
return psycopg2.extras.Json(analyzer.process_place(self))
|
||||
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
""" A dictionary with the names of the place or None if the place
|
||||
has no names.
|
||||
"""
|
||||
return self._info.get('name')
|
||||
|
||||
|
||||
@property
|
||||
def address(self):
|
||||
""" A dictionary with the address elements of the place
|
||||
or None if no address information is available.
|
||||
"""
|
||||
return self._info.get('address')
|
||||
|
||||
|
||||
@property
|
||||
def country_feature(self):
|
||||
""" Return the country code if the place is a valid country boundary.
|
||||
"""
|
||||
return self._info.get('country_feature')
|
||||
@@ -4,14 +4,16 @@ tasks.
|
||||
"""
|
||||
import functools
|
||||
|
||||
import psycopg2.extras
|
||||
from psycopg2 import sql as pysql
|
||||
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
# pylint: disable=C0111
|
||||
|
||||
def _mk_valuelist(template, num):
|
||||
return pysql.SQL(',').join([pysql.SQL(template)] * num)
|
||||
|
||||
|
||||
class AbstractPlacexRunner:
|
||||
""" Returns SQL commands for indexing of the placex table.
|
||||
"""
|
||||
@@ -47,7 +49,7 @@ class AbstractPlacexRunner:
|
||||
for place in places:
|
||||
for field in ('place_id', 'name', 'address', 'linked_place_id'):
|
||||
values.append(place[field])
|
||||
values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
|
||||
values.append(PlaceInfo(place).analyze(self.analyzer))
|
||||
|
||||
worker.perform(self._index_sql(len(places)), values)
|
||||
|
||||
@@ -141,7 +143,7 @@ class InterpolationRunner:
|
||||
values = []
|
||||
for place in places:
|
||||
values.extend((place[x] for x in ('place_id', 'address')))
|
||||
values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
|
||||
values.append(PlaceInfo(place).analyze(self.analyzer))
|
||||
|
||||
worker.perform(self._index_sql(len(places)), values)
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ from abc import ABC, abstractmethod
|
||||
from typing import List, Tuple, Dict, Any
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
# pylint: disable=unnecessary-pass
|
||||
|
||||
@@ -105,20 +106,13 @@ class AbstractAnalyzer(ABC):
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def process_place(self, place: Dict) -> Any:
|
||||
def process_place(self, place: PlaceInfo) -> Any:
|
||||
""" Extract tokens for the given place and compute the
|
||||
information to be handed to the PL/pgSQL processor for building
|
||||
the search index.
|
||||
|
||||
Arguments:
|
||||
place: Dictionary with the information about the place. Currently
|
||||
the following fields may be present:
|
||||
|
||||
- *name* is a dictionary of names for the place together
|
||||
with the designation of the name.
|
||||
- *address* is a dictionary of address terms.
|
||||
- *country_feature* is set to a country code when the
|
||||
place describes a country.
|
||||
place: Place information retrived from the database.
|
||||
|
||||
Returns:
|
||||
A JSON-serialisable structure that will be handed into
|
||||
|
||||
@@ -390,18 +390,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
||||
"""
|
||||
token_info = _TokenInfo(self._cache)
|
||||
|
||||
names = place.get('name')
|
||||
names = place.name
|
||||
|
||||
if names:
|
||||
fulls, partials = self._compute_name_tokens(names)
|
||||
|
||||
token_info.add_names(fulls, partials)
|
||||
|
||||
country_feature = place.get('country_feature')
|
||||
country_feature = place.country_feature
|
||||
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
|
||||
self.add_country_names(country_feature.lower(), names)
|
||||
|
||||
address = place.get('address')
|
||||
address = place.address
|
||||
if address:
|
||||
self._process_place_address(token_info, address)
|
||||
|
||||
|
||||
@@ -405,16 +405,16 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
|
||||
"""
|
||||
token_info = _TokenInfo(self._cache)
|
||||
|
||||
names = place.get('name')
|
||||
names = place.name
|
||||
|
||||
if names:
|
||||
token_info.add_names(self.conn, names)
|
||||
|
||||
country_feature = place.get('country_feature')
|
||||
country_feature = place.country_feature
|
||||
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
|
||||
self.add_country_names(country_feature.lower(), names)
|
||||
|
||||
address = place.get('address')
|
||||
address = place.address
|
||||
if address:
|
||||
self._process_place_address(token_info, address)
|
||||
|
||||
|
||||
@@ -7,12 +7,11 @@ import logging
|
||||
import os
|
||||
import tarfile
|
||||
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.db.async_connection import WorkerPool
|
||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.indexer.place_info import PlaceInfo
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -58,7 +57,7 @@ def handle_threaded_sql_statements(pool, fd, analyzer):
|
||||
address = dict(street=row['street'], postcode=row['postcode'])
|
||||
args = ('SRID=4326;' + row['geometry'],
|
||||
int(row['from']), int(row['to']), row['interpolation'],
|
||||
psycopg2.extras.Json(analyzer.process_place(dict(address=address))),
|
||||
PlaceInfo({'address': address}).analyze(analyzer),
|
||||
analyzer.normalize_postcode(row['postcode']))
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
Reference in New Issue
Block a user