forked from hans/Nominatim
icu: move housenumber token computation out of TokenInfo
This was the last function to use the cache. There is a more clean separation of responsibility now.
This commit is contained in:
@@ -282,13 +282,6 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
return postcode.strip().upper()
|
return postcode.strip().upper()
|
||||||
|
|
||||||
|
|
||||||
def _make_standard_hnr(self, hnr):
|
|
||||||
""" Create a normalised version of a housenumber.
|
|
||||||
|
|
||||||
This function takes minor shortcuts on transliteration.
|
|
||||||
"""
|
|
||||||
return self._search_normalized(hnr)
|
|
||||||
|
|
||||||
def update_postcodes_from_db(self):
|
def update_postcodes_from_db(self):
|
||||||
""" Update postcode tokens in the word table from the location_postcode
|
""" Update postcode tokens in the word table from the location_postcode
|
||||||
table.
|
table.
|
||||||
@@ -456,7 +449,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
Returns a JSON-serializable structure that will be handed into
|
Returns a JSON-serializable structure that will be handed into
|
||||||
the database via the token_info field.
|
the database via the token_info field.
|
||||||
"""
|
"""
|
||||||
token_info = _TokenInfo(self._cache)
|
token_info = _TokenInfo()
|
||||||
|
|
||||||
names, address = self.sanitizer.process_names(place)
|
names, address = self.sanitizer.process_names(place)
|
||||||
|
|
||||||
@@ -475,6 +468,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
|
|
||||||
|
|
||||||
def _process_place_address(self, token_info, address):
|
def _process_place_address(self, token_info, address):
|
||||||
|
hnr_tokens = set()
|
||||||
hnrs = set()
|
hnrs = set()
|
||||||
addr_terms = []
|
addr_terms = []
|
||||||
streets = []
|
streets = []
|
||||||
@@ -482,9 +476,10 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
if item.kind == 'postcode':
|
if item.kind == 'postcode':
|
||||||
self._add_postcode(item.name)
|
self._add_postcode(item.name)
|
||||||
elif item.kind == 'housenumber':
|
elif item.kind == 'housenumber':
|
||||||
norm_name = self._make_standard_hnr(item.name)
|
token, hnr = self._compute_housenumber_token(item)
|
||||||
if norm_name:
|
if token is not None:
|
||||||
hnrs.add(norm_name)
|
hnr_tokens.add(token)
|
||||||
|
hnrs.add(hnr)
|
||||||
elif item.kind == 'street':
|
elif item.kind == 'street':
|
||||||
streets.extend(self._retrieve_full_tokens(item.name))
|
streets.extend(self._retrieve_full_tokens(item.name))
|
||||||
elif item.kind == 'place':
|
elif item.kind == 'place':
|
||||||
@@ -495,7 +490,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
|
addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
|
||||||
|
|
||||||
if hnrs:
|
if hnrs:
|
||||||
token_info.add_housenumbers(self.conn, hnrs)
|
token_info.add_housenumbers(hnr_tokens, hnrs)
|
||||||
|
|
||||||
if addr_terms:
|
if addr_terms:
|
||||||
token_info.add_address_terms(addr_terms)
|
token_info.add_address_terms(addr_terms)
|
||||||
@@ -504,6 +499,24 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
token_info.add_street(streets)
|
token_info.add_street(streets)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_housenumber_token(self, hnr):
|
||||||
|
""" Normalize the housenumber and return the word token and the
|
||||||
|
canonical form.
|
||||||
|
"""
|
||||||
|
norm_name = self._search_normalized(hnr.name)
|
||||||
|
if not norm_name:
|
||||||
|
return None, None
|
||||||
|
|
||||||
|
token = self._cache.housenumbers.get(norm_name)
|
||||||
|
if token is None:
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute("SELECT getorcreate_hnr_id(%s)", (norm_name, ))
|
||||||
|
token = cur.fetchone()[0]
|
||||||
|
self._cache.housenumbers[norm_name] = token
|
||||||
|
|
||||||
|
return token, norm_name
|
||||||
|
|
||||||
|
|
||||||
def _compute_partial_tokens(self, name):
|
def _compute_partial_tokens(self, name):
|
||||||
""" Normalize the given term, split it into partial words and return
|
""" Normalize the given term, split it into partial words and return
|
||||||
then token list for them.
|
then token list for them.
|
||||||
@@ -612,8 +625,7 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
class _TokenInfo:
|
class _TokenInfo:
|
||||||
""" Collect token information to be sent back to the database.
|
""" Collect token information to be sent back to the database.
|
||||||
"""
|
"""
|
||||||
def __init__(self, cache):
|
def __init__(self):
|
||||||
self._cache = cache
|
|
||||||
self.data = {}
|
self.data = {}
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -627,11 +639,11 @@ class _TokenInfo:
|
|||||||
self.data['names'] = self._mk_array(itertools.chain(fulls, partials))
|
self.data['names'] = self._mk_array(itertools.chain(fulls, partials))
|
||||||
|
|
||||||
|
|
||||||
def add_housenumbers(self, conn, hnrs):
|
def add_housenumbers(self, tokens, hnrs):
|
||||||
""" Extract housenumber information from a list of normalised
|
""" Extract housenumber information from a list of normalised
|
||||||
housenumbers.
|
housenumbers.
|
||||||
"""
|
"""
|
||||||
self.data['hnr_tokens'] = self._mk_array(self._cache.get_hnr_tokens(conn, hnrs))
|
self.data['hnr_tokens'] = self._mk_array(tokens)
|
||||||
self.data['hnr'] = ';'.join(hnrs)
|
self.data['hnr'] = ';'.join(hnrs)
|
||||||
|
|
||||||
|
|
||||||
@@ -670,29 +682,3 @@ class _TokenCache:
|
|||||||
self.fulls = {}
|
self.fulls = {}
|
||||||
self.postcodes = set()
|
self.postcodes = set()
|
||||||
self.housenumbers = {}
|
self.housenumbers = {}
|
||||||
|
|
||||||
|
|
||||||
def get_hnr_tokens(self, conn, terms):
|
|
||||||
""" Get token ids for a list of housenumbers, looking them up in the
|
|
||||||
database if necessary. `terms` is an iterable of normalized
|
|
||||||
housenumbers.
|
|
||||||
"""
|
|
||||||
tokens = []
|
|
||||||
askdb = []
|
|
||||||
|
|
||||||
for term in terms:
|
|
||||||
token = self.housenumbers.get(term)
|
|
||||||
if token is None:
|
|
||||||
askdb.append(term)
|
|
||||||
else:
|
|
||||||
tokens.append(token)
|
|
||||||
|
|
||||||
if askdb:
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute("SELECT nr, getorcreate_hnr_id(nr) FROM unnest(%s) as nr",
|
|
||||||
(askdb, ))
|
|
||||||
for term, tid in cur:
|
|
||||||
self.housenumbers[term] = tid
|
|
||||||
tokens.append(tid)
|
|
||||||
|
|
||||||
return tokens
|
|
||||||
|
|||||||
Reference in New Issue
Block a user