reorganise process_place function

Move address processing into its own function as it is
rather extensive.
This commit is contained in:
Sarah Hoffmann
2021-07-12 11:53:25 +02:00
parent fff0012249
commit 47adb2a3fc
2 changed files with 51 additions and 48 deletions

View File

@@ -411,33 +411,36 @@ class LegacyICUNameAnalyzer:
self.add_country_names(country_feature.lower(), names) self.add_country_names(country_feature.lower(), names)
address = place.get('address') address = place.get('address')
if address: if address:
hnrs = [] self._process_place_address(token_info, address)
addr_terms = []
for key, value in address.items():
if key == 'postcode':
self._add_postcode(value)
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
hnrs.append(value)
elif key == 'street':
token_info.add_street(*self._compute_name_tokens({'name': value}))
elif key == 'place':
token_info.add_place(*self._compute_name_tokens({'name': value}))
elif not key.startswith('_') and \
key not in ('country', 'full'):
addr_terms.append((key, *self._compute_name_tokens({'name': value})))
if hnrs:
hnrs = self._split_housenumbers(hnrs)
token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs])
if addr_terms:
token_info.add_address_terms(addr_terms)
return token_info.data return token_info.data
def _process_place_address(self, token_info, address):
hnrs = []
addr_terms = []
for key, value in address.items():
if key == 'postcode':
self._add_postcode(value)
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
hnrs.append(value)
elif key == 'street':
token_info.add_street(*self._compute_name_tokens({'name': value}))
elif key == 'place':
token_info.add_place(*self._compute_name_tokens({'name': value}))
elif not key.startswith('_') and \
key not in ('country', 'full'):
addr_terms.append((key, *self._compute_name_tokens({'name': value})))
if hnrs:
hnrs = self._split_housenumbers(hnrs)
token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs])
if addr_terms:
token_info.add_address_terms(addr_terms)
def _compute_name_tokens(self, names): def _compute_name_tokens(self, names):
""" Computes the full name and partial name tokens for the given """ Computes the full name and partial name tokens for the given
dictionary of names. dictionary of names.

View File

@@ -424,37 +424,37 @@ class LegacyNameAnalyzer:
self.add_country_names(country_feature.lower(), names) self.add_country_names(country_feature.lower(), names)
address = place.get('address') address = place.get('address')
if address: if address:
hnrs = [] self._process_place_address(token_info, address)
addr_terms = []
for key, value in address.items():
if key == 'postcode':
self._add_postcode(value)
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
hnrs.append(value)
elif key == 'street':
token_info.add_street(self.conn, value)
elif key == 'place':
token_info.add_place(self.conn, value)
elif not key.startswith('_') and \
key not in ('country', 'full'):
addr_terms.append((key, value))
if hnrs:
token_info.add_housenumbers(self.conn, hnrs)
if addr_terms:
token_info.add_address_terms(self.conn, addr_terms)
return token_info.data return token_info.data
def _add_postcode(self, postcode): def _process_place_address(self, token_info, address):
""" Make sure the normalized postcode is present in the word table. hnrs = []
""" addr_terms = []
if re.search(r'[:,;]', postcode) is None:
self._cache.add_postcode(self.conn, self.normalize_postcode(postcode)) for key, value in address.items():
if key == 'postcode':
# Make sure the normalized postcode is present in the word table.
if re.search(r'[:,;]', value) is None:
self._cache.add_postcode(self.conn,
self.normalize_postcode(value))
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
hnrs.append(value)
elif key == 'street':
token_info.add_street(self.conn, value)
elif key == 'place':
token_info.add_place(self.conn, value)
elif not key.startswith('_') and key not in ('country', 'full'):
addr_terms.append((key, value))
if hnrs:
token_info.add_housenumbers(self.conn, hnrs)
if addr_terms:
token_info.add_address_terms(self.conn, addr_terms)
class _TokenInfo: class _TokenInfo: