mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 02:58:13 +00:00
Merge pull request #3996 from lonvia/improved-postcode-import
Avoid updates on initial filling of postcode table
This commit is contained in:
@@ -120,6 +120,7 @@ class NominatimArgs:
|
||||
data_object: Sequence[Tuple[str, int]]
|
||||
data_area: Sequence[Tuple[str, int]]
|
||||
ro_access: bool
|
||||
postcode_force_reimport: bool
|
||||
|
||||
# Arguments to 'replication'
|
||||
init: bool
|
||||
|
||||
@@ -84,6 +84,10 @@ class UpdateRefresh:
|
||||
help='Do not enable code for propagating updates')
|
||||
group.add_argument('--enable-debug-statements', action='store_true',
|
||||
help='Enable debug warning statements in functions')
|
||||
group = parser.add_argument_group('Arguments for postcode refresh')
|
||||
group.add_argument('--force-reimport', action='store_true',
|
||||
dest='postcode_force_reimport',
|
||||
help='Recompute the postcodes from scratch instead of updating')
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
from ..tools import refresh, postcodes
|
||||
@@ -96,7 +100,8 @@ class UpdateRefresh:
|
||||
LOG.warning("Update postcodes centroid")
|
||||
tokenizer = self._get_tokenizer(args.config)
|
||||
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||
args.project_dir, tokenizer)
|
||||
args.project_dir, tokenizer,
|
||||
force_reimport=args.postcode_force_reimport)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||
args.threads or 1)
|
||||
asyncio.run(indexer.index_postcodes())
|
||||
|
||||
@@ -78,7 +78,7 @@ class _PostcodeCollector:
|
||||
self.collected[normalized] += (x, y)
|
||||
|
||||
def commit(self, conn: Connection, analyzer: AbstractAnalyzer,
|
||||
project_dir: Optional[Path]) -> None:
|
||||
project_dir: Optional[Path], is_initial: bool) -> None:
|
||||
""" Update postcodes for the country from the postcodes selected so far.
|
||||
|
||||
When 'project_dir' is set, then any postcode files found in this
|
||||
@@ -87,11 +87,14 @@ class _PostcodeCollector:
|
||||
if project_dir is not None:
|
||||
self._update_from_external(analyzer, project_dir)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT postcode FROM location_postcodes
|
||||
WHERE country_code = %s AND osm_id is null""",
|
||||
(self.country, ))
|
||||
to_delete = [row[0] for row in cur if row[0] not in self.collected]
|
||||
if is_initial:
|
||||
to_delete = []
|
||||
else:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT postcode FROM location_postcodes
|
||||
WHERE country_code = %s AND osm_id is null""",
|
||||
(self.country, ))
|
||||
to_delete = [row[0] for row in cur if row[0] not in self.collected]
|
||||
|
||||
to_add = [dict(zip(('pc', 'x', 'y'), (k, *v.centroid())))
|
||||
for k, v in self.collected.items()]
|
||||
@@ -102,22 +105,32 @@ class _PostcodeCollector:
|
||||
|
||||
with conn.cursor() as cur:
|
||||
if to_add:
|
||||
cur.executemany(pysql.SQL(
|
||||
"""INSERT INTO location_postcodes
|
||||
(country_code, rank_search, postcode, centroid, geometry)
|
||||
VALUES ({}, {}, %(pc)s,
|
||||
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326),
|
||||
expand_by_meters(ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {}))
|
||||
""").format(pysql.Literal(self.country),
|
||||
pysql.Literal(_extent_to_rank(self.extent)),
|
||||
pysql.Literal(self.extent)),
|
||||
to_add)
|
||||
columns = ['country_code',
|
||||
'rank_search',
|
||||
'postcode',
|
||||
'centroid',
|
||||
'geometry']
|
||||
values = [pysql.Literal(self.country),
|
||||
pysql.Literal(_extent_to_rank(self.extent)),
|
||||
pysql.Placeholder('pc'),
|
||||
pysql.SQL('ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326)'),
|
||||
pysql.SQL("""expand_by_meters(
|
||||
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {})""")
|
||||
.format(pysql.Literal(self.extent))]
|
||||
if is_initial:
|
||||
columns.extend(('place_id', 'indexed_status'))
|
||||
values.extend((pysql.SQL("nextval('seq_place')"), pysql.Literal(1)))
|
||||
|
||||
cur.executemany(pysql.SQL("INSERT INTO location_postcodes ({}) VALUES ({})")
|
||||
.format(pysql.SQL(',')
|
||||
.join(pysql.Identifier(c) for c in columns),
|
||||
pysql.SQL(',').join(values)),
|
||||
to_add)
|
||||
if to_delete:
|
||||
cur.execute("""DELETE FROM location_postcodes
|
||||
WHERE country_code = %s and postcode = any(%s)
|
||||
AND osm_id is null
|
||||
""", (self.country, to_delete))
|
||||
cur.execute("ANALYSE location_postcodes")
|
||||
|
||||
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
||||
""" Look for an external postcode file for the active country in
|
||||
@@ -164,7 +177,8 @@ class _PostcodeCollector:
|
||||
return None
|
||||
|
||||
|
||||
def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractTokenizer) -> None:
|
||||
def update_postcodes(dsn: str, project_dir: Optional[Path],
|
||||
tokenizer: AbstractTokenizer, force_reimport: bool = False) -> None:
|
||||
""" Update the table of postcodes from the input tables
|
||||
placex and place_postcode.
|
||||
"""
|
||||
@@ -176,45 +190,76 @@ def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractT
|
||||
SET country_code = get_country_code(centroid)
|
||||
WHERE country_code is null
|
||||
""")
|
||||
if force_reimport:
|
||||
conn.execute("TRUNCATE location_postcodes")
|
||||
is_initial = True
|
||||
else:
|
||||
is_initial = _is_postcode_table_empty(conn)
|
||||
if is_initial:
|
||||
conn.execute("""ALTER TABLE location_postcodes
|
||||
DISABLE TRIGGER location_postcodes_before_insert""")
|
||||
# Now update first postcode areas
|
||||
_update_postcode_areas(conn, analyzer, matcher)
|
||||
_update_postcode_areas(conn, analyzer, matcher, is_initial)
|
||||
# Then fill with estimated postcode centroids from other info
|
||||
_update_guessed_postcode(conn, analyzer, matcher, project_dir)
|
||||
_update_guessed_postcode(conn, analyzer, matcher, project_dir, is_initial)
|
||||
if is_initial:
|
||||
conn.execute("""ALTER TABLE location_postcodes
|
||||
ENABLE TRIGGER location_postcodes_before_insert""")
|
||||
conn.commit()
|
||||
|
||||
analyzer.update_postcodes_from_db()
|
||||
|
||||
|
||||
def _is_postcode_table_empty(conn: Connection) -> bool:
|
||||
""" Check if there are any entries in the location_postcodes table yet.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT place_id FROM location_postcodes LIMIT 1")
|
||||
return cur.fetchone() is None
|
||||
|
||||
|
||||
def _insert_postcode_areas(conn: Connection, country_code: str,
|
||||
extent: int, pcs: list[dict[str, str]]) -> None:
|
||||
extent: int, pcs: list[dict[str, str]],
|
||||
is_initial: bool) -> None:
|
||||
if pcs:
|
||||
with conn.cursor() as cur:
|
||||
columns = ['osm_id', 'country_code',
|
||||
'rank_search', 'postcode',
|
||||
'centroid', 'geometry']
|
||||
values = [pysql.Identifier('osm_id'), pysql.Identifier('country_code'),
|
||||
pysql.Literal(_extent_to_rank(extent)), pysql.Placeholder('out'),
|
||||
pysql.Identifier('centroid'), pysql.Identifier('geometry')]
|
||||
if is_initial:
|
||||
columns.extend(('place_id', 'indexed_status'))
|
||||
values.extend((pysql.SQL("nextval('seq_place')"), pysql.Literal(1)))
|
||||
|
||||
cur.executemany(
|
||||
pysql.SQL(
|
||||
""" INSERT INTO location_postcodes
|
||||
(osm_id, country_code, rank_search, postcode, centroid, geometry)
|
||||
SELECT osm_id, country_code, {}, %(out)s, centroid, geometry
|
||||
FROM place_postcode
|
||||
""" INSERT INTO location_postcodes ({})
|
||||
SELECT {} FROM place_postcode
|
||||
WHERE osm_type = 'R'
|
||||
and country_code = {} and postcode = %(in)s
|
||||
and geometry is not null
|
||||
""").format(pysql.Literal(_extent_to_rank(extent)),
|
||||
""").format(pysql.SQL(',')
|
||||
.join(pysql.Identifier(c) for c in columns),
|
||||
pysql.SQL(',').join(values),
|
||||
pysql.Literal(country_code)),
|
||||
pcs)
|
||||
|
||||
|
||||
def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
matcher: PostcodeFormatter) -> None:
|
||||
matcher: PostcodeFormatter, is_initial: bool) -> None:
|
||||
""" Update the postcode areas made from postcode boundaries.
|
||||
"""
|
||||
# first delete all areas that have gone
|
||||
conn.execute(""" DELETE FROM location_postcodes pc
|
||||
WHERE pc.osm_id is not null
|
||||
AND NOT EXISTS(
|
||||
SELECT * FROM place_postcode pp
|
||||
WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id
|
||||
and geometry is not null)
|
||||
""")
|
||||
if not is_initial:
|
||||
conn.execute(""" DELETE FROM location_postcodes pc
|
||||
WHERE pc.osm_id is not null
|
||||
AND NOT EXISTS(
|
||||
SELECT * FROM place_postcode pp
|
||||
WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id
|
||||
and geometry is not null)
|
||||
""")
|
||||
# now insert all in country batches, triggers will ensure proper updates
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(""" SELECT country_code, postcode FROM place_postcode
|
||||
@@ -230,7 +275,8 @@ def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
fmt = matcher.get_matcher(country_code)
|
||||
elif country_code != cc:
|
||||
_insert_postcode_areas(conn, country_code,
|
||||
matcher.get_postcode_extent(country_code), pcs)
|
||||
matcher.get_postcode_extent(country_code), pcs,
|
||||
is_initial)
|
||||
country_code = cc
|
||||
fmt = matcher.get_matcher(country_code)
|
||||
pcs = []
|
||||
@@ -241,21 +287,26 @@ def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
|
||||
if country_code is not None and pcs:
|
||||
_insert_postcode_areas(conn, country_code,
|
||||
matcher.get_postcode_extent(country_code), pcs)
|
||||
matcher.get_postcode_extent(country_code), pcs,
|
||||
is_initial)
|
||||
|
||||
|
||||
def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
matcher: PostcodeFormatter, project_dir: Optional[Path]) -> None:
|
||||
matcher: PostcodeFormatter, project_dir: Optional[Path],
|
||||
is_initial: bool) -> None:
|
||||
""" Computes artificial postcode centroids from the placex table,
|
||||
potentially enhances it with external data and then updates the
|
||||
postcodes in the table 'location_postcodes'.
|
||||
"""
|
||||
# First get the list of countries that currently have postcodes.
|
||||
# (Doing this before starting to insert, so it is fast on import.)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes
|
||||
WHERE osm_id is null""")
|
||||
todo_countries = {row[0] for row in cur}
|
||||
if is_initial:
|
||||
todo_countries: set[str] = set()
|
||||
else:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes
|
||||
WHERE osm_id is null""")
|
||||
todo_countries = {row[0] for row in cur}
|
||||
|
||||
# Next, get the list of postcodes that are already covered by areas.
|
||||
area_pcs = defaultdict(set)
|
||||
@@ -275,6 +326,7 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
FROM place_postcode WHERE geometry is not null)
|
||||
""")
|
||||
cur.execute("CREATE INDEX ON _global_postcode_area USING gist(geometry)")
|
||||
|
||||
# Recompute the list of valid postcodes from placex.
|
||||
with conn.cursor(name="placex_postcodes") as cur:
|
||||
cur.execute("""
|
||||
@@ -296,7 +348,7 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
for country, postcode, x, y in cur:
|
||||
if collector is None or country != collector.country:
|
||||
if collector is not None:
|
||||
collector.commit(conn, analyzer, project_dir)
|
||||
collector.commit(conn, analyzer, project_dir, is_initial)
|
||||
collector = _PostcodeCollector(country, matcher.get_matcher(country),
|
||||
matcher.get_postcode_extent(country),
|
||||
exclude=area_pcs[country])
|
||||
@@ -304,14 +356,14 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
collector.add(postcode, x, y)
|
||||
|
||||
if collector is not None:
|
||||
collector.commit(conn, analyzer, project_dir)
|
||||
collector.commit(conn, analyzer, project_dir, is_initial)
|
||||
|
||||
# Now handle any countries that are only in the postcode table.
|
||||
for country in todo_countries:
|
||||
fmt = matcher.get_matcher(country)
|
||||
ext = matcher.get_postcode_extent(country)
|
||||
_PostcodeCollector(country, fmt, ext,
|
||||
exclude=area_pcs[country]).commit(conn, analyzer, project_dir)
|
||||
exclude=area_pcs[country]).commit(conn, analyzer, project_dir, False)
|
||||
|
||||
conn.execute("DROP TABLE IF EXISTS _global_postcode_area")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user