avoid updates on initial filling of postcode table

This commit is contained in:
Sarah Hoffmann
2026-02-20 18:53:48 +01:00
parent af9458a601
commit 2507d5a298

View File

@@ -78,7 +78,7 @@ class _PostcodeCollector:
self.collected[normalized] += (x, y) self.collected[normalized] += (x, y)
def commit(self, conn: Connection, analyzer: AbstractAnalyzer, def commit(self, conn: Connection, analyzer: AbstractAnalyzer,
project_dir: Optional[Path]) -> None: project_dir: Optional[Path], is_initial: bool) -> None:
""" Update postcodes for the country from the postcodes selected so far. """ Update postcodes for the country from the postcodes selected so far.
When 'project_dir' is set, then any postcode files found in this When 'project_dir' is set, then any postcode files found in this
@@ -87,11 +87,14 @@ class _PostcodeCollector:
if project_dir is not None: if project_dir is not None:
self._update_from_external(analyzer, project_dir) self._update_from_external(analyzer, project_dir)
with conn.cursor() as cur: if is_initial:
cur.execute("""SELECT postcode FROM location_postcodes to_delete = []
WHERE country_code = %s AND osm_id is null""", else:
(self.country, )) with conn.cursor() as cur:
to_delete = [row[0] for row in cur if row[0] not in self.collected] cur.execute("""SELECT postcode FROM location_postcodes
WHERE country_code = %s AND osm_id is null""",
(self.country, ))
to_delete = [row[0] for row in cur if row[0] not in self.collected]
to_add = [dict(zip(('pc', 'x', 'y'), (k, *v.centroid()))) to_add = [dict(zip(('pc', 'x', 'y'), (k, *v.centroid())))
for k, v in self.collected.items()] for k, v in self.collected.items()]
@@ -102,22 +105,32 @@ class _PostcodeCollector:
with conn.cursor() as cur: with conn.cursor() as cur:
if to_add: if to_add:
cur.executemany(pysql.SQL( columns = ['country_code',
"""INSERT INTO location_postcodes 'rank_search',
(country_code, rank_search, postcode, centroid, geometry) 'postcode',
VALUES ({}, {}, %(pc)s, 'centroid',
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), 'geometry']
expand_by_meters(ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {})) values = [pysql.Literal(self.country),
""").format(pysql.Literal(self.country), pysql.Literal(_extent_to_rank(self.extent)),
pysql.Literal(_extent_to_rank(self.extent)), pysql.Placeholder('pc'),
pysql.Literal(self.extent)), pysql.SQL('ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326)'),
to_add) pysql.SQL("""expand_by_meters(
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {})""")
.format(pysql.Literal(self.extent))]
if is_initial:
columns.extend(('place_id', 'indexed_status'))
values.extend((pysql.SQL("nextval('seq_place')"), pysql.Literal(1)))
cur.executemany(pysql.SQL("INSERT INTO location_postcodes ({}) VALUES ({})")
.format(pysql.SQL(',')
.join(pysql.Identifier(c) for c in columns),
pysql.SQL(',').join(values)),
to_add)
if to_delete: if to_delete:
cur.execute("""DELETE FROM location_postcodes cur.execute("""DELETE FROM location_postcodes
WHERE country_code = %s and postcode = any(%s) WHERE country_code = %s and postcode = any(%s)
AND osm_id is null AND osm_id is null
""", (self.country, to_delete)) """, (self.country, to_delete))
cur.execute("ANALYSE location_postcodes")
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None: def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
""" Look for an external postcode file for the active country in """ Look for an external postcode file for the active country in
@@ -176,45 +189,72 @@ def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractT
SET country_code = get_country_code(centroid) SET country_code = get_country_code(centroid)
WHERE country_code is null WHERE country_code is null
""") """)
is_initial = _is_postcode_table_empty(conn)
if is_initial:
conn.execute("""ALTER TABLE location_postcodes
DISABLE TRIGGER location_postcodes_before_insert""")
# Now update first postcode areas # Now update first postcode areas
_update_postcode_areas(conn, analyzer, matcher) _update_postcode_areas(conn, analyzer, matcher, is_initial)
# Then fill with estimated postcode centroids from other info # Then fill with estimated postcode centroids from other info
_update_guessed_postcode(conn, analyzer, matcher, project_dir) _update_guessed_postcode(conn, analyzer, matcher, project_dir, is_initial)
if is_initial:
conn.execute("""ALTER TABLE location_postcodes
ENABLE TRIGGER location_postcodes_before_insert""")
conn.commit() conn.commit()
analyzer.update_postcodes_from_db() analyzer.update_postcodes_from_db()
def _is_postcode_table_empty(conn: Connection) -> bool:
""" Check if there are any entries in the location_postcodes table yet.
"""
with conn.cursor() as cur:
cur.execute("SELECT place_id FROM location_postcodes LIMIT 1")
return cur.fetchone() is None
def _insert_postcode_areas(conn: Connection, country_code: str, def _insert_postcode_areas(conn: Connection, country_code: str,
extent: int, pcs: list[dict[str, str]]) -> None: extent: int, pcs: list[dict[str, str]],
is_initial: bool) -> None:
if pcs: if pcs:
with conn.cursor() as cur: with conn.cursor() as cur:
columns = ['osm_id', 'country_code',
'rank_search', 'postcode',
'centroid', 'geometry']
values = [pysql.Identifier('osm_id'), pysql.Identifier('country_code'),
pysql.Literal(_extent_to_rank(extent)), pysql.Placeholder('out'),
pysql.Identifier('centroid'), pysql.Identifier('geometry')]
if is_initial:
columns.extend(('place_id', 'indexed_status'))
values.extend((pysql.SQL("nextval('seq_place')"), pysql.Literal(1)))
cur.executemany( cur.executemany(
pysql.SQL( pysql.SQL(
""" INSERT INTO location_postcodes """ INSERT INTO location_postcodes ({})
(osm_id, country_code, rank_search, postcode, centroid, geometry) SELECT {} FROM place_postcode
SELECT osm_id, country_code, {}, %(out)s, centroid, geometry
FROM place_postcode
WHERE osm_type = 'R' WHERE osm_type = 'R'
and country_code = {} and postcode = %(in)s and country_code = {} and postcode = %(in)s
and geometry is not null and geometry is not null
""").format(pysql.Literal(_extent_to_rank(extent)), """).format(pysql.SQL(',')
.join(pysql.Identifier(c) for c in columns),
pysql.SQL(',').join(values),
pysql.Literal(country_code)), pysql.Literal(country_code)),
pcs) pcs)
def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer, def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
matcher: PostcodeFormatter) -> None: matcher: PostcodeFormatter, is_initial: bool) -> None:
""" Update the postcode areas made from postcode boundaries. """ Update the postcode areas made from postcode boundaries.
""" """
# first delete all areas that have gone # first delete all areas that have gone
conn.execute(""" DELETE FROM location_postcodes pc if not is_initial:
WHERE pc.osm_id is not null conn.execute(""" DELETE FROM location_postcodes pc
AND NOT EXISTS( WHERE pc.osm_id is not null
SELECT * FROM place_postcode pp AND NOT EXISTS(
WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id SELECT * FROM place_postcode pp
and geometry is not null) WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id
""") and geometry is not null)
""")
# now insert all in country batches, triggers will ensure proper updates # now insert all in country batches, triggers will ensure proper updates
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(""" SELECT country_code, postcode FROM place_postcode cur.execute(""" SELECT country_code, postcode FROM place_postcode
@@ -230,7 +270,8 @@ def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
fmt = matcher.get_matcher(country_code) fmt = matcher.get_matcher(country_code)
elif country_code != cc: elif country_code != cc:
_insert_postcode_areas(conn, country_code, _insert_postcode_areas(conn, country_code,
matcher.get_postcode_extent(country_code), pcs) matcher.get_postcode_extent(country_code), pcs,
is_initial)
country_code = cc country_code = cc
fmt = matcher.get_matcher(country_code) fmt = matcher.get_matcher(country_code)
pcs = [] pcs = []
@@ -241,21 +282,26 @@ def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
if country_code is not None and pcs: if country_code is not None and pcs:
_insert_postcode_areas(conn, country_code, _insert_postcode_areas(conn, country_code,
matcher.get_postcode_extent(country_code), pcs) matcher.get_postcode_extent(country_code), pcs,
is_initial)
def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer, def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
matcher: PostcodeFormatter, project_dir: Optional[Path]) -> None: matcher: PostcodeFormatter, project_dir: Optional[Path],
is_initial: bool) -> None:
""" Computes artificial postcode centroids from the placex table, """ Computes artificial postcode centroids from the placex table,
potentially enhances it with external data and then updates the potentially enhances it with external data and then updates the
postcodes in the table 'location_postcodes'. postcodes in the table 'location_postcodes'.
""" """
# First get the list of countries that currently have postcodes. # First get the list of countries that currently have postcodes.
# (Doing this before starting to insert, so it is fast on import.) # (Doing this before starting to insert, so it is fast on import.)
with conn.cursor() as cur: if is_initial:
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes todo_countries: set[str] = set()
WHERE osm_id is null""") else:
todo_countries = {row[0] for row in cur} with conn.cursor() as cur:
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes
WHERE osm_id is null""")
todo_countries = {row[0] for row in cur}
# Next, get the list of postcodes that are already covered by areas. # Next, get the list of postcodes that are already covered by areas.
area_pcs = defaultdict(set) area_pcs = defaultdict(set)
@@ -275,6 +321,7 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
FROM place_postcode WHERE geometry is not null) FROM place_postcode WHERE geometry is not null)
""") """)
cur.execute("CREATE INDEX ON _global_postcode_area USING gist(geometry)") cur.execute("CREATE INDEX ON _global_postcode_area USING gist(geometry)")
# Recompute the list of valid postcodes from placex. # Recompute the list of valid postcodes from placex.
with conn.cursor(name="placex_postcodes") as cur: with conn.cursor(name="placex_postcodes") as cur:
cur.execute(""" cur.execute("""
@@ -296,7 +343,7 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
for country, postcode, x, y in cur: for country, postcode, x, y in cur:
if collector is None or country != collector.country: if collector is None or country != collector.country:
if collector is not None: if collector is not None:
collector.commit(conn, analyzer, project_dir) collector.commit(conn, analyzer, project_dir, is_initial)
collector = _PostcodeCollector(country, matcher.get_matcher(country), collector = _PostcodeCollector(country, matcher.get_matcher(country),
matcher.get_postcode_extent(country), matcher.get_postcode_extent(country),
exclude=area_pcs[country]) exclude=area_pcs[country])
@@ -304,14 +351,14 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
collector.add(postcode, x, y) collector.add(postcode, x, y)
if collector is not None: if collector is not None:
collector.commit(conn, analyzer, project_dir) collector.commit(conn, analyzer, project_dir, is_initial)
# Now handle any countries that are only in the postcode table. # Now handle any countries that are only in the postcode table.
for country in todo_countries: for country in todo_countries:
fmt = matcher.get_matcher(country) fmt = matcher.get_matcher(country)
ext = matcher.get_postcode_extent(country) ext = matcher.get_postcode_extent(country)
_PostcodeCollector(country, fmt, ext, _PostcodeCollector(country, fmt, ext,
exclude=area_pcs[country]).commit(conn, analyzer, project_dir) exclude=area_pcs[country]).commit(conn, analyzer, project_dir, False)
conn.execute("DROP TABLE IF EXISTS _global_postcode_area") conn.execute("DROP TABLE IF EXISTS _global_postcode_area")