Merge pull request #3996 from lonvia/improved-postcode-import

Avoid updates on initial filling of postcode table
This commit is contained in:
Sarah Hoffmann
2026-02-22 13:12:49 +01:00
committed by GitHub
3 changed files with 103 additions and 45 deletions

View File

@@ -120,6 +120,7 @@ class NominatimArgs:
data_object: Sequence[Tuple[str, int]]
data_area: Sequence[Tuple[str, int]]
ro_access: bool
postcode_force_reimport: bool
# Arguments to 'replication'
init: bool

View File

@@ -84,6 +84,10 @@ class UpdateRefresh:
help='Do not enable code for propagating updates')
group.add_argument('--enable-debug-statements', action='store_true',
help='Enable debug warning statements in functions')
group = parser.add_argument_group('Arguments for postcode refresh')
group.add_argument('--force-reimport', action='store_true',
dest='postcode_force_reimport',
help='Recompute the postcodes from scratch instead of updating')
def run(self, args: NominatimArgs) -> int:
from ..tools import refresh, postcodes
@@ -96,7 +100,8 @@ class UpdateRefresh:
LOG.warning("Update postcodes centroid")
tokenizer = self._get_tokenizer(args.config)
postcodes.update_postcodes(args.config.get_libpq_dsn(),
args.project_dir, tokenizer)
args.project_dir, tokenizer,
force_reimport=args.postcode_force_reimport)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1)
asyncio.run(indexer.index_postcodes())

View File

@@ -78,7 +78,7 @@ class _PostcodeCollector:
self.collected[normalized] += (x, y)
def commit(self, conn: Connection, analyzer: AbstractAnalyzer,
project_dir: Optional[Path]) -> None:
project_dir: Optional[Path], is_initial: bool) -> None:
""" Update postcodes for the country from the postcodes selected so far.
When 'project_dir' is set, then any postcode files found in this
@@ -87,11 +87,14 @@ class _PostcodeCollector:
if project_dir is not None:
self._update_from_external(analyzer, project_dir)
with conn.cursor() as cur:
cur.execute("""SELECT postcode FROM location_postcodes
WHERE country_code = %s AND osm_id is null""",
(self.country, ))
to_delete = [row[0] for row in cur if row[0] not in self.collected]
if is_initial:
to_delete = []
else:
with conn.cursor() as cur:
cur.execute("""SELECT postcode FROM location_postcodes
WHERE country_code = %s AND osm_id is null""",
(self.country, ))
to_delete = [row[0] for row in cur if row[0] not in self.collected]
to_add = [dict(zip(('pc', 'x', 'y'), (k, *v.centroid())))
for k, v in self.collected.items()]
@@ -102,22 +105,32 @@ class _PostcodeCollector:
with conn.cursor() as cur:
if to_add:
cur.executemany(pysql.SQL(
"""INSERT INTO location_postcodes
(country_code, rank_search, postcode, centroid, geometry)
VALUES ({}, {}, %(pc)s,
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326),
expand_by_meters(ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {}))
""").format(pysql.Literal(self.country),
pysql.Literal(_extent_to_rank(self.extent)),
pysql.Literal(self.extent)),
to_add)
columns = ['country_code',
'rank_search',
'postcode',
'centroid',
'geometry']
values = [pysql.Literal(self.country),
pysql.Literal(_extent_to_rank(self.extent)),
pysql.Placeholder('pc'),
pysql.SQL('ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326)'),
pysql.SQL("""expand_by_meters(
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {})""")
.format(pysql.Literal(self.extent))]
if is_initial:
columns.extend(('place_id', 'indexed_status'))
values.extend((pysql.SQL("nextval('seq_place')"), pysql.Literal(1)))
cur.executemany(pysql.SQL("INSERT INTO location_postcodes ({}) VALUES ({})")
.format(pysql.SQL(',')
.join(pysql.Identifier(c) for c in columns),
pysql.SQL(',').join(values)),
to_add)
if to_delete:
cur.execute("""DELETE FROM location_postcodes
WHERE country_code = %s and postcode = any(%s)
AND osm_id is null
""", (self.country, to_delete))
cur.execute("ANALYSE location_postcodes")
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
""" Look for an external postcode file for the active country in
@@ -164,7 +177,8 @@ class _PostcodeCollector:
return None
def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractTokenizer) -> None:
def update_postcodes(dsn: str, project_dir: Optional[Path],
tokenizer: AbstractTokenizer, force_reimport: bool = False) -> None:
""" Update the table of postcodes from the input tables
placex and place_postcode.
"""
@@ -176,45 +190,76 @@ def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractT
SET country_code = get_country_code(centroid)
WHERE country_code is null
""")
if force_reimport:
conn.execute("TRUNCATE location_postcodes")
is_initial = True
else:
is_initial = _is_postcode_table_empty(conn)
if is_initial:
conn.execute("""ALTER TABLE location_postcodes
DISABLE TRIGGER location_postcodes_before_insert""")
# Now update first postcode areas
_update_postcode_areas(conn, analyzer, matcher)
_update_postcode_areas(conn, analyzer, matcher, is_initial)
# Then fill with estimated postcode centroids from other info
_update_guessed_postcode(conn, analyzer, matcher, project_dir)
_update_guessed_postcode(conn, analyzer, matcher, project_dir, is_initial)
if is_initial:
conn.execute("""ALTER TABLE location_postcodes
ENABLE TRIGGER location_postcodes_before_insert""")
conn.commit()
analyzer.update_postcodes_from_db()
def _is_postcode_table_empty(conn: Connection) -> bool:
""" Check if there are any entries in the location_postcodes table yet.
"""
with conn.cursor() as cur:
cur.execute("SELECT place_id FROM location_postcodes LIMIT 1")
return cur.fetchone() is None
def _insert_postcode_areas(conn: Connection, country_code: str,
extent: int, pcs: list[dict[str, str]]) -> None:
extent: int, pcs: list[dict[str, str]],
is_initial: bool) -> None:
if pcs:
with conn.cursor() as cur:
columns = ['osm_id', 'country_code',
'rank_search', 'postcode',
'centroid', 'geometry']
values = [pysql.Identifier('osm_id'), pysql.Identifier('country_code'),
pysql.Literal(_extent_to_rank(extent)), pysql.Placeholder('out'),
pysql.Identifier('centroid'), pysql.Identifier('geometry')]
if is_initial:
columns.extend(('place_id', 'indexed_status'))
values.extend((pysql.SQL("nextval('seq_place')"), pysql.Literal(1)))
cur.executemany(
pysql.SQL(
""" INSERT INTO location_postcodes
(osm_id, country_code, rank_search, postcode, centroid, geometry)
SELECT osm_id, country_code, {}, %(out)s, centroid, geometry
FROM place_postcode
""" INSERT INTO location_postcodes ({})
SELECT {} FROM place_postcode
WHERE osm_type = 'R'
and country_code = {} and postcode = %(in)s
and geometry is not null
""").format(pysql.Literal(_extent_to_rank(extent)),
""").format(pysql.SQL(',')
.join(pysql.Identifier(c) for c in columns),
pysql.SQL(',').join(values),
pysql.Literal(country_code)),
pcs)
def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
matcher: PostcodeFormatter) -> None:
matcher: PostcodeFormatter, is_initial: bool) -> None:
""" Update the postcode areas made from postcode boundaries.
"""
# first delete all areas that have gone
conn.execute(""" DELETE FROM location_postcodes pc
WHERE pc.osm_id is not null
AND NOT EXISTS(
SELECT * FROM place_postcode pp
WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id
and geometry is not null)
""")
if not is_initial:
conn.execute(""" DELETE FROM location_postcodes pc
WHERE pc.osm_id is not null
AND NOT EXISTS(
SELECT * FROM place_postcode pp
WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id
and geometry is not null)
""")
# now insert all in country batches, triggers will ensure proper updates
with conn.cursor() as cur:
cur.execute(""" SELECT country_code, postcode FROM place_postcode
@@ -230,7 +275,8 @@ def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
fmt = matcher.get_matcher(country_code)
elif country_code != cc:
_insert_postcode_areas(conn, country_code,
matcher.get_postcode_extent(country_code), pcs)
matcher.get_postcode_extent(country_code), pcs,
is_initial)
country_code = cc
fmt = matcher.get_matcher(country_code)
pcs = []
@@ -241,21 +287,26 @@ def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
if country_code is not None and pcs:
_insert_postcode_areas(conn, country_code,
matcher.get_postcode_extent(country_code), pcs)
matcher.get_postcode_extent(country_code), pcs,
is_initial)
def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
matcher: PostcodeFormatter, project_dir: Optional[Path]) -> None:
matcher: PostcodeFormatter, project_dir: Optional[Path],
is_initial: bool) -> None:
""" Computes artificial postcode centroids from the placex table,
potentially enhances it with external data and then updates the
postcodes in the table 'location_postcodes'.
"""
# First get the list of countries that currently have postcodes.
# (Doing this before starting to insert, so it is fast on import.)
with conn.cursor() as cur:
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes
WHERE osm_id is null""")
todo_countries = {row[0] for row in cur}
if is_initial:
todo_countries: set[str] = set()
else:
with conn.cursor() as cur:
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes
WHERE osm_id is null""")
todo_countries = {row[0] for row in cur}
# Next, get the list of postcodes that are already covered by areas.
area_pcs = defaultdict(set)
@@ -275,6 +326,7 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
FROM place_postcode WHERE geometry is not null)
""")
cur.execute("CREATE INDEX ON _global_postcode_area USING gist(geometry)")
# Recompute the list of valid postcodes from placex.
with conn.cursor(name="placex_postcodes") as cur:
cur.execute("""
@@ -296,7 +348,7 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
for country, postcode, x, y in cur:
if collector is None or country != collector.country:
if collector is not None:
collector.commit(conn, analyzer, project_dir)
collector.commit(conn, analyzer, project_dir, is_initial)
collector = _PostcodeCollector(country, matcher.get_matcher(country),
matcher.get_postcode_extent(country),
exclude=area_pcs[country])
@@ -304,14 +356,14 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
collector.add(postcode, x, y)
if collector is not None:
collector.commit(conn, analyzer, project_dir)
collector.commit(conn, analyzer, project_dir, is_initial)
# Now handle any countries that are only in the postcode table.
for country in todo_countries:
fmt = matcher.get_matcher(country)
ext = matcher.get_postcode_extent(country)
_PostcodeCollector(country, fmt, ext,
exclude=area_pcs[country]).commit(conn, analyzer, project_dir)
exclude=area_pcs[country]).commit(conn, analyzer, project_dir, False)
conn.execute("DROP TABLE IF EXISTS _global_postcode_area")