forked from hans/Nominatim
move filling of postcode table to python
The Python code now takes care of reading postcodes from placex, enhancing them with potentially existing external postcodes and updating location_postcodes accordingly. The initial setup and updates use exactly the same function. External postcode handling has been generalized. External postcodes for any country are now accepted. The format of the external postcode file has changed. We now expect CSV, potentially gzipped. The postcodes are no longer saved in the database.
This commit is contained in:
@@ -11,3 +11,5 @@ ignored-modules=icu
|
|||||||
# 'with' statements.
|
# 'with' statements.
|
||||||
ignored-classes=NominatimArgs,closing
|
ignored-classes=NominatimArgs,closing
|
||||||
disable=too-few-public-methods,duplicate-code
|
disable=too-few-public-methods,duplicate-code
|
||||||
|
|
||||||
|
good-names=i,x,y
|
||||||
|
|||||||
@@ -1,58 +0,0 @@
|
|||||||
-- Create a temporary table with postcodes from placex.
|
|
||||||
|
|
||||||
CREATE TEMP TABLE tmp_new_postcode_locations AS
|
|
||||||
SELECT country_code,
|
|
||||||
upper(trim (both ' ' from address->'postcode')) as pc,
|
|
||||||
ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
|
|
||||||
FROM placex
|
|
||||||
WHERE address ? 'postcode'
|
|
||||||
AND address->'postcode' NOT SIMILAR TO '%(,|;|:)%'
|
|
||||||
AND geometry IS NOT null
|
|
||||||
GROUP BY country_code, pc;
|
|
||||||
|
|
||||||
CREATE INDEX idx_tmp_new_postcode_locations
|
|
||||||
ON tmp_new_postcode_locations (pc, country_code);
|
|
||||||
|
|
||||||
-- add extra US postcodes
|
|
||||||
INSERT INTO tmp_new_postcode_locations (country_code, pc, centroid)
|
|
||||||
SELECT 'us', postcode, ST_SetSRID(ST_Point(x,y),4326)
|
|
||||||
FROM us_postcode u
|
|
||||||
WHERE NOT EXISTS (SELECT 0 FROM tmp_new_postcode_locations new
|
|
||||||
WHERE new.country_code = 'us' AND new.pc = u.postcode);
|
|
||||||
-- add extra UK postcodes
|
|
||||||
INSERT INTO tmp_new_postcode_locations (country_code, pc, centroid)
|
|
||||||
SELECT 'gb', postcode, geometry FROM gb_postcode g
|
|
||||||
WHERE NOT EXISTS (SELECT 0 FROM tmp_new_postcode_locations new
|
|
||||||
WHERE new.country_code = 'gb' and new.pc = g.postcode);
|
|
||||||
|
|
||||||
-- Remove all postcodes that are no longer valid
|
|
||||||
DELETE FROM location_postcode old
|
|
||||||
WHERE NOT EXISTS(SELECT 0 FROM tmp_new_postcode_locations new
|
|
||||||
WHERE old.postcode = new.pc
|
|
||||||
AND old.country_code = new.country_code);
|
|
||||||
|
|
||||||
-- Update geometries where necessary
|
|
||||||
UPDATE location_postcode old SET geometry = new.centroid, indexed_status = 1
|
|
||||||
FROM tmp_new_postcode_locations new
|
|
||||||
WHERE old.postcode = new.pc AND old.country_code = new.country_code
|
|
||||||
AND ST_AsText(old.geometry) != ST_AsText(new.centroid);
|
|
||||||
|
|
||||||
-- Remove all postcodes that already exist from the temporary table
|
|
||||||
DELETE FROM tmp_new_postcode_locations new
|
|
||||||
WHERE EXISTS(SELECT 0 FROM location_postcode old
|
|
||||||
WHERE old.postcode = new.pc AND old.country_code = new.country_code);
|
|
||||||
|
|
||||||
-- Add newly added postcode
|
|
||||||
INSERT INTO location_postcode
|
|
||||||
(place_id, indexed_status, country_code, postcode, geometry)
|
|
||||||
SELECT nextval('seq_place'), 1, country_code, pc, centroid
|
|
||||||
FROM tmp_new_postcode_locations new;
|
|
||||||
|
|
||||||
-- Remove unused word entries
|
|
||||||
DELETE FROM word
|
|
||||||
WHERE class = 'place' AND type = 'postcode'
|
|
||||||
AND NOT EXISTS (SELECT 0 FROM location_postcode p
|
|
||||||
WHERE p.postcode = word.word);
|
|
||||||
|
|
||||||
-- Finally index the newly inserted postcodes
|
|
||||||
UPDATE location_postcode SET indexed_status = 0 WHERE indexed_status > 0;
|
|
||||||
@@ -45,12 +45,15 @@ class UpdateRefresh:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def run(args):
|
def run(args):
|
||||||
from ..tools import refresh
|
from ..tools import refresh, postcodes
|
||||||
from ..tokenizer import factory as tokenizer_factory
|
from ..tokenizer import factory as tokenizer_factory
|
||||||
|
|
||||||
|
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||||
|
|
||||||
if args.postcodes:
|
if args.postcodes:
|
||||||
LOG.warning("Update postcodes centroid")
|
LOG.warning("Update postcodes centroid")
|
||||||
refresh.update_postcodes(args.config.get_libpq_dsn(), args.sqllib_dir)
|
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||||
|
args.project_dir, tokenizer)
|
||||||
|
|
||||||
if args.word_counts:
|
if args.word_counts:
|
||||||
LOG.warning('Recompute frequency of full-word search terms')
|
LOG.warning('Recompute frequency of full-word search terms')
|
||||||
@@ -67,7 +70,6 @@ class UpdateRefresh:
|
|||||||
with connect(args.config.get_libpq_dsn()) as conn:
|
with connect(args.config.get_libpq_dsn()) as conn:
|
||||||
refresh.create_functions(conn, args.config,
|
refresh.create_functions(conn, args.config,
|
||||||
args.diffs, args.enable_debug_statements)
|
args.diffs, args.enable_debug_statements)
|
||||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
|
||||||
tokenizer.update_sql_functions(args.config)
|
tokenizer.update_sql_functions(args.config)
|
||||||
|
|
||||||
if args.wiki_data:
|
if args.wiki_data:
|
||||||
|
|||||||
@@ -116,8 +116,8 @@ class SetupAll:
|
|||||||
|
|
||||||
if args.continue_at is None or args.continue_at == 'load-data':
|
if args.continue_at is None or args.continue_at == 'load-data':
|
||||||
LOG.warning('Calculate postcodes')
|
LOG.warning('Calculate postcodes')
|
||||||
postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir,
|
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||||
tokenizer)
|
args.project_dir, tokenizer)
|
||||||
|
|
||||||
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
||||||
if args.continue_at is not None and args.continue_at != 'load-data':
|
if args.continue_at is not None and args.continue_at != 'load-data':
|
||||||
|
|||||||
@@ -263,6 +263,16 @@ class LegacyICUNameAnalyzer:
|
|||||||
"""
|
"""
|
||||||
return self.normalizer.transliterate(phrase)
|
return self.normalizer.transliterate(phrase)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def normalize_postcode(postcode):
|
||||||
|
""" Convert the postcode to a standardized form.
|
||||||
|
|
||||||
|
This function must yield exactly the same result as the SQL function
|
||||||
|
'token_normalized_postcode()'.
|
||||||
|
"""
|
||||||
|
return postcode.strip().upper()
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(maxsize=1024)
|
@functools.lru_cache(maxsize=1024)
|
||||||
def make_standard_word(self, name):
|
def make_standard_word(self, name):
|
||||||
""" Create the normalised version of the input.
|
""" Create the normalised version of the input.
|
||||||
@@ -285,25 +295,44 @@ class LegacyICUNameAnalyzer:
|
|||||||
|
|
||||||
return self.transliterator.transliterate(hnr)
|
return self.transliterator.transliterate(hnr)
|
||||||
|
|
||||||
def add_postcodes_from_db(self):
|
def update_postcodes_from_db(self):
|
||||||
""" Add postcodes from the location_postcode table to the word table.
|
""" Update postcode tokens in the word table from the location_postcode
|
||||||
|
table.
|
||||||
"""
|
"""
|
||||||
|
to_delete = []
|
||||||
copystr = io.StringIO()
|
copystr = io.StringIO()
|
||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
cur.execute("SELECT distinct(postcode) FROM location_postcode")
|
# This finds us the rows in location_postcode and word that are
|
||||||
for (postcode, ) in cur:
|
# missing in the other table.
|
||||||
copystr.write(postcode)
|
cur.execute("""SELECT * FROM
|
||||||
copystr.write('\t ')
|
(SELECT pc, word FROM
|
||||||
copystr.write(self.transliterator.transliterate(postcode))
|
(SELECT distinct(postcode) as pc FROM location_postcode) p
|
||||||
copystr.write('\tplace\tpostcode\t0\n')
|
FULL JOIN
|
||||||
|
(SELECT word FROM word
|
||||||
|
WHERE class ='place' and type = 'postcode') w
|
||||||
|
ON pc = word) x
|
||||||
|
WHERE pc is null or word is null""")
|
||||||
|
|
||||||
copystr.seek(0)
|
for postcode, word in cur:
|
||||||
cur.copy_from(copystr, 'word',
|
if postcode is None:
|
||||||
columns=['word', 'word_token', 'class', 'type',
|
to_delete.append(word)
|
||||||
'search_name_count'])
|
else:
|
||||||
# Don't really need an ID for postcodes....
|
copystr.write(postcode)
|
||||||
# cur.execute("""UPDATE word SET word_id = nextval('seq_word')
|
copystr.write('\t ')
|
||||||
# WHERE word_id is null and type = 'postcode'""")
|
copystr.write(self.transliterator.transliterate(postcode))
|
||||||
|
copystr.write('\tplace\tpostcode\t0\n')
|
||||||
|
|
||||||
|
if to_delete:
|
||||||
|
cur.execute("""DELETE FROM WORD
|
||||||
|
WHERE class ='place' and type = 'postcode'
|
||||||
|
and word = any(%s)
|
||||||
|
""", (to_delete, ))
|
||||||
|
|
||||||
|
if copystr.getvalue():
|
||||||
|
copystr.seek(0)
|
||||||
|
cur.copy_from(copystr, 'word',
|
||||||
|
columns=['word', 'word_token', 'class', 'type',
|
||||||
|
'search_name_count'])
|
||||||
|
|
||||||
|
|
||||||
def update_special_phrases(self, phrases):
|
def update_special_phrases(self, phrases):
|
||||||
@@ -435,22 +464,25 @@ class LegacyICUNameAnalyzer:
|
|||||||
def _add_postcode(self, postcode):
|
def _add_postcode(self, postcode):
|
||||||
""" Make sure the normalized postcode is present in the word table.
|
""" Make sure the normalized postcode is present in the word table.
|
||||||
"""
|
"""
|
||||||
if re.search(r'[:,;]', postcode) is None and not postcode in self._cache.postcodes:
|
if re.search(r'[:,;]', postcode) is None:
|
||||||
term = self.make_standard_word(postcode)
|
postcode = self.normalize_postcode(postcode)
|
||||||
if not term:
|
|
||||||
return
|
|
||||||
|
|
||||||
with self.conn.cursor() as cur:
|
if postcode not in self._cache.postcodes:
|
||||||
# no word_id needed for postcodes
|
term = self.make_standard_word(postcode)
|
||||||
cur.execute("""INSERT INTO word (word, word_token, class, type,
|
if not term:
|
||||||
search_name_count)
|
return
|
||||||
(SELECT pc, %s, 'place', 'postcode', 0
|
|
||||||
FROM (VALUES (%s)) as v(pc)
|
with self.conn.cursor() as cur:
|
||||||
WHERE NOT EXISTS
|
# no word_id needed for postcodes
|
||||||
(SELECT * FROM word
|
cur.execute("""INSERT INTO word (word, word_token, class, type,
|
||||||
WHERE word = pc and class='place' and type='postcode'))
|
search_name_count)
|
||||||
""", (' ' + term, postcode))
|
(SELECT pc, %s, 'place', 'postcode', 0
|
||||||
self._cache.postcodes.add(postcode)
|
FROM (VALUES (%s)) as v(pc)
|
||||||
|
WHERE NOT EXISTS
|
||||||
|
(SELECT * FROM word
|
||||||
|
WHERE word = pc and class='place' and type='postcode'))
|
||||||
|
""", (' ' + term, postcode))
|
||||||
|
self._cache.postcodes.add(postcode)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def _split_housenumbers(hnrs):
|
def _split_housenumbers(hnrs):
|
||||||
|
|||||||
@@ -305,13 +305,51 @@ class LegacyNameAnalyzer:
|
|||||||
return self.normalizer.transliterate(phrase)
|
return self.normalizer.transliterate(phrase)
|
||||||
|
|
||||||
|
|
||||||
def add_postcodes_from_db(self):
|
@staticmethod
|
||||||
""" Add postcodes from the location_postcode table to the word table.
|
def normalize_postcode(postcode):
|
||||||
|
""" Convert the postcode to a standardized form.
|
||||||
|
|
||||||
|
This function must yield exactly the same result as the SQL function
|
||||||
|
'token_normalized_postcode()'.
|
||||||
|
"""
|
||||||
|
return postcode.strip().upper()
|
||||||
|
|
||||||
|
|
||||||
|
def update_postcodes_from_db(self):
|
||||||
|
""" Update postcode tokens in the word table from the location_postcode
|
||||||
|
table.
|
||||||
"""
|
"""
|
||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
cur.execute("""SELECT count(create_postcode_id(pc))
|
# This finds us the rows in location_postcode and word that are
|
||||||
FROM (SELECT distinct(postcode) as pc
|
# missing in the other table.
|
||||||
FROM location_postcode) x""")
|
cur.execute("""SELECT * FROM
|
||||||
|
(SELECT pc, word FROM
|
||||||
|
(SELECT distinct(postcode) as pc FROM location_postcode) p
|
||||||
|
FULL JOIN
|
||||||
|
(SELECT word FROM word
|
||||||
|
WHERE class ='place' and type = 'postcode') w
|
||||||
|
ON pc = word) x
|
||||||
|
WHERE pc is null or word is null""")
|
||||||
|
|
||||||
|
to_delete = []
|
||||||
|
to_add = []
|
||||||
|
|
||||||
|
for postcode, word in cur:
|
||||||
|
if postcode is None:
|
||||||
|
to_delete.append(word)
|
||||||
|
else:
|
||||||
|
to_add.append(postcode)
|
||||||
|
|
||||||
|
if to_delete:
|
||||||
|
cur.execute("""DELETE FROM WORD
|
||||||
|
WHERE class ='place' and type = 'postcode'
|
||||||
|
and word = any(%s)
|
||||||
|
""", (to_delete, ))
|
||||||
|
if to_add:
|
||||||
|
cur.execute("""SELECT count(create_postcode_id(pc))
|
||||||
|
FROM unnest(%s) as pc
|
||||||
|
""", (to_add, ))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def update_special_phrases(self, phrases):
|
def update_special_phrases(self, phrases):
|
||||||
@@ -421,7 +459,8 @@ class LegacyNameAnalyzer:
|
|||||||
cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
|
cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
|
||||||
|
|
||||||
if re.search(r'[:,;]', postcode) is None:
|
if re.search(r'[:,;]', postcode) is None:
|
||||||
self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
|
self._cache.postcodes.get(self.normalize_postcode(postcode),
|
||||||
|
_create_postcode_from_db)
|
||||||
|
|
||||||
|
|
||||||
class _TokenInfo:
|
class _TokenInfo:
|
||||||
|
|||||||
@@ -2,80 +2,167 @@
|
|||||||
Functions for importing, updating and otherwise maintaining the table
|
Functions for importing, updating and otherwise maintaining the table
|
||||||
of artificial postcode centroids.
|
of artificial postcode centroids.
|
||||||
"""
|
"""
|
||||||
|
import csv
|
||||||
|
import gzip
|
||||||
|
import logging
|
||||||
|
|
||||||
|
from psycopg2.extras import execute_values
|
||||||
|
|
||||||
from nominatim.db.utils import execute_file
|
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
|
|
||||||
def import_postcodes(dsn, project_dir, tokenizer):
|
LOG = logging.getLogger()
|
||||||
""" Set up the initial list of postcodes.
|
|
||||||
|
class _CountryPostcodesCollector:
|
||||||
|
""" Collector for postcodes of a single country.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with connect(dsn) as conn:
|
def __init__(self, country):
|
||||||
conn.drop_table('gb_postcode')
|
self.country = country
|
||||||
conn.drop_table('us_postcode')
|
self.collected = dict()
|
||||||
|
|
||||||
|
|
||||||
|
def add(self, postcode, x, y):
|
||||||
|
""" Add the given postcode to the collection cache. If the postcode
|
||||||
|
already existed, it is overwritten with the new centroid.
|
||||||
|
"""
|
||||||
|
self.collected[postcode] = (x, y)
|
||||||
|
|
||||||
|
|
||||||
|
def commit(self, conn, analyzer, project_dir):
|
||||||
|
""" Update postcodes for the country from the postcodes selected so far
|
||||||
|
as well as any externally supplied postcodes.
|
||||||
|
"""
|
||||||
|
self._update_from_external(analyzer, project_dir)
|
||||||
|
to_add, to_delete, to_update = self._compute_changes(conn)
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("""CREATE TABLE gb_postcode (
|
if to_add:
|
||||||
id integer,
|
execute_values(cur,
|
||||||
postcode character varying(9),
|
"""INSERT INTO location_postcodes
|
||||||
geometry GEOMETRY(Point, 4326))""")
|
(place_id, indexed_status, countrycode,
|
||||||
|
postcode, geometry) VALUES %s""",
|
||||||
|
to_add,
|
||||||
|
template="""(nextval('seq_place'), 1, '{}',
|
||||||
|
%s, 'SRID=4326;POINT(%s %s)')
|
||||||
|
""".format(self.country))
|
||||||
|
if to_delete:
|
||||||
|
cur.execute("""DELETE FROM location_postcodes
|
||||||
|
WHERE country_code = %s and postcode = any(%s)
|
||||||
|
""", (self.country, to_delete))
|
||||||
|
if to_update:
|
||||||
|
execute_values(cur,
|
||||||
|
"""UPDATE location_postcodes
|
||||||
|
SET indexed_status = 2,
|
||||||
|
geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
|
||||||
|
FROM (VALUES %s) AS v (pc, x, y)
|
||||||
|
WHERE country_code = '{}' and postcode = pc
|
||||||
|
""".format(self.country),
|
||||||
|
to_update)
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_changes(self, conn):
|
||||||
|
""" Compute which postcodes from the collected postcodes have to be
|
||||||
|
added or modified and which from the location_postcodes table
|
||||||
|
have to be deleted.
|
||||||
|
"""
|
||||||
|
to_update = []
|
||||||
|
to_delete = []
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("""CREATE TABLE us_postcode (
|
cur.execute("""SELECT postcode, ST_X(geometry), ST_Y(geometry)
|
||||||
postcode text,
|
FROM location_postcodes
|
||||||
x double precision,
|
WHERE country_code = %s""",
|
||||||
y double precision)""")
|
(self.country, ))
|
||||||
conn.commit()
|
for postcode, x, y in cur:
|
||||||
|
oldx, oldy = self.collected.pop(postcode, (None, None))
|
||||||
|
if oldx is not None:
|
||||||
|
dist = (x - oldx)**2 + (y - oldy)**2
|
||||||
|
if dist > 0.000001:
|
||||||
|
to_update.append(postcode, x, y)
|
||||||
|
else:
|
||||||
|
to_delete.append(postcode)
|
||||||
|
|
||||||
gb_postcodes = project_dir / 'gb_postcode_data.sql.gz'
|
to_add = [(k, v[0], v[1]) for k, v in self.collected.items()]
|
||||||
if gb_postcodes.is_file():
|
self.collected = []
|
||||||
execute_file(dsn, gb_postcodes)
|
|
||||||
|
|
||||||
us_postcodes = project_dir / 'us_postcode_data.sql.gz'
|
return to_add, to_delete, to_update
|
||||||
if us_postcodes.is_file():
|
|
||||||
execute_file(dsn, us_postcodes)
|
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute("TRUNCATE location_postcode")
|
|
||||||
cur.execute("""
|
|
||||||
INSERT INTO location_postcode
|
|
||||||
(place_id, indexed_status, country_code, postcode, geometry)
|
|
||||||
SELECT nextval('seq_place'), 1, country_code,
|
|
||||||
token_normalized_postcode(address->'postcode') as pc,
|
|
||||||
ST_Centroid(ST_Collect(ST_Centroid(geometry)))
|
|
||||||
FROM placex
|
|
||||||
WHERE address ? 'postcode'
|
|
||||||
and token_normalized_postcode(address->'postcode') is not null
|
|
||||||
AND geometry IS NOT null
|
|
||||||
GROUP BY country_code, pc
|
|
||||||
""")
|
|
||||||
|
|
||||||
cur.execute("""
|
def _update_from_external(self, analyzer, project_dir):
|
||||||
INSERT INTO location_postcode
|
""" Look for an external postcode file for the active country in
|
||||||
(place_id, indexed_status, country_code, postcode, geometry)
|
the project directory and add missing postcodes when found.
|
||||||
SELECT nextval('seq_place'), 1, 'us',
|
"""
|
||||||
token_normalized_postcode(postcode),
|
csvfile = self._open_external(project_dir)
|
||||||
ST_SetSRID(ST_Point(x,y),4326)
|
if csvfile is None:
|
||||||
FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
|
return
|
||||||
(SELECT postcode FROM location_postcode
|
|
||||||
WHERE country_code = 'us')
|
|
||||||
""")
|
|
||||||
|
|
||||||
cur.execute("""
|
try:
|
||||||
INSERT INTO location_postcode
|
reader = csv.DictReader(csvfile)
|
||||||
(place_id, indexed_status, country_code, postcode, geometry)
|
for row in reader:
|
||||||
SELECT nextval('seq_place'), 1, 'gb',
|
if 'postcode' not in row or 'lat' not in row or 'lon' not in row:
|
||||||
token_normalized_postcode(postcode), geometry
|
LOG.warning("Bad format for external postcode file for country '%s'."
|
||||||
FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
|
" Ignored.", self.country)
|
||||||
(SELECT postcode FROM location_postcode
|
return
|
||||||
WHERE country_code = 'gb')
|
postcode = analyzer.normalize_postcode(row['postcode'])
|
||||||
""")
|
if postcode not in self.collected:
|
||||||
|
try:
|
||||||
|
self.collected[postcode] = (float(row['lon'], float(row['lat'])))
|
||||||
|
except ValueError:
|
||||||
|
LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
|
||||||
|
row['lat'], row['lon'], self.country)
|
||||||
|
|
||||||
cur.execute("""
|
finally:
|
||||||
DELETE FROM word WHERE class='place' and type='postcode'
|
csvfile.close()
|
||||||
and word NOT IN (SELECT postcode FROM location_postcode)
|
|
||||||
""")
|
|
||||||
conn.commit()
|
|
||||||
|
|
||||||
with tokenizer.name_analyzer() as analyzer:
|
|
||||||
analyzer.add_postcodes_from_db()
|
def _open_external(self, project_dir):
|
||||||
|
fname = project_dir / '{}_postcodes.csv'.format(self.country)
|
||||||
|
|
||||||
|
if fname.is_file():
|
||||||
|
LOG.info("Using external postcode file '%s'.", fname)
|
||||||
|
return open(fname, 'r')
|
||||||
|
|
||||||
|
fname = project_dir / '{}_postcodes.csv.gz'.format(self.country)
|
||||||
|
|
||||||
|
if fname.is_file():
|
||||||
|
LOG.info("Using external postcode file '%s'.", fname)
|
||||||
|
return gzip.open(fname, 'rt')
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def update_postcodes(dsn, project_dir, tokenizer):
|
||||||
|
""" Update the table of artificial postcodes.
|
||||||
|
|
||||||
|
Computes artificial postcode centroids from the placex table,
|
||||||
|
potentially enhances it with external data and then updates the
|
||||||
|
postcodes in the table 'location_postcode'.
|
||||||
|
"""
|
||||||
|
with tokenizer.name_analyzer() as analyzer:
|
||||||
|
with connect(dsn) as conn:
|
||||||
|
with conn.cursor("placex_postcodes") as cur:
|
||||||
|
cur.execute("""SELECT country_code, pc, ST_X(centroid), ST_Y(centroid)
|
||||||
|
FROM (
|
||||||
|
SELECT country_code,
|
||||||
|
token_normalized_postcode(address->'postcode') as pc,
|
||||||
|
ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
|
||||||
|
FROM placex
|
||||||
|
WHERE address ? 'postcode' and geometry IS NOT null
|
||||||
|
GROUP BY country_code, pc) xx
|
||||||
|
WHERE pc is not null
|
||||||
|
ORDER BY country_code, pc""")
|
||||||
|
|
||||||
|
collector = None
|
||||||
|
|
||||||
|
for country, postcode, x, y in cur:
|
||||||
|
if collector is None or country != collector.country:
|
||||||
|
if collector is not None:
|
||||||
|
collector.commit(conn, analyzer, project_dir)
|
||||||
|
collector = _CountryPostcodesCollector(country)
|
||||||
|
collector.add(postcode, x, y)
|
||||||
|
|
||||||
|
if collector is not None:
|
||||||
|
collector.commit(conn, analyzer, project_dir)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
analyzer.add_postcodes_from_db()
|
||||||
|
|||||||
@@ -13,12 +13,6 @@ from nominatim.version import NOMINATIM_VERSION
|
|||||||
|
|
||||||
LOG = logging.getLogger()
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
def update_postcodes(dsn, sql_dir):
|
|
||||||
""" Recalculate postcode centroids and add, remove and update entries in the
|
|
||||||
location_postcode table. `conn` is an opne connection to the database.
|
|
||||||
"""
|
|
||||||
execute_file(dsn, sql_dir / 'update-postcodes.sql')
|
|
||||||
|
|
||||||
|
|
||||||
def recompute_word_counts(dsn, sql_dir):
|
def recompute_word_counts(dsn, sql_dir):
|
||||||
""" Compute the frequency of full-word search terms.
|
""" Compute the frequency of full-word search terms.
|
||||||
|
|||||||
Reference in New Issue
Block a user