move filling of postcode table to python

The Python code now takes care of reading postcodes from placex,
enhancing them with potentially existing external postcodes and
updating location_postcodes accordingly. The initial setup and
updates use exactly the same function.

External postcode handling has been generalized. External postcodes
for any country are now accepted. The format of the external postcode
file has changed. We now expect CSV, potentially gzipped. The
postcodes are no longer saved in the database.
This commit is contained in:
Sarah Hoffmann
2021-05-12 19:57:48 +02:00
parent cae0cf3546
commit a4aba23a83
8 changed files with 264 additions and 166 deletions

View File

@@ -305,13 +305,51 @@ class LegacyNameAnalyzer:
return self.normalizer.transliterate(phrase)
def add_postcodes_from_db(self):
""" Add postcodes from the location_postcode table to the word table.
@staticmethod
def normalize_postcode(postcode):
""" Convert the postcode to a standardized form.
This function must yield exactly the same result as the SQL function
'token_normalized_postcode()'.
"""
return postcode.strip().upper()
def update_postcodes_from_db(self):
""" Update postcode tokens in the word table from the location_postcode
table.
"""
with self.conn.cursor() as cur:
cur.execute("""SELECT count(create_postcode_id(pc))
FROM (SELECT distinct(postcode) as pc
FROM location_postcode) x""")
# This finds us the rows in location_postcode and word that are
# missing in the other table.
cur.execute("""SELECT * FROM
(SELECT pc, word FROM
(SELECT distinct(postcode) as pc FROM location_postcode) p
FULL JOIN
(SELECT word FROM word
WHERE class ='place' and type = 'postcode') w
ON pc = word) x
WHERE pc is null or word is null""")
to_delete = []
to_add = []
for postcode, word in cur:
if postcode is None:
to_delete.append(word)
else:
to_add.append(postcode)
if to_delete:
cur.execute("""DELETE FROM WORD
WHERE class ='place' and type = 'postcode'
and word = any(%s)
""", (to_delete, ))
if to_add:
cur.execute("""SELECT count(create_postcode_id(pc))
FROM unnest(%s) as pc
""", (to_add, ))
def update_special_phrases(self, phrases):
@@ -421,7 +459,8 @@ class LegacyNameAnalyzer:
cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
if re.search(r'[:,;]', postcode) is None:
self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
self._cache.postcodes.get(self.normalize_postcode(postcode),
_create_postcode_from_db)
class _TokenInfo: