forked from hans/Nominatim
rework postcode computation
Now adds areas to location_postcodes, ignores postcode points inside areas and supports customizable extents.
This commit is contained in:
@@ -2,10 +2,10 @@
|
|||||||
--
|
--
|
||||||
-- This file is part of Nominatim. (https://nominatim.org)
|
-- This file is part of Nominatim. (https://nominatim.org)
|
||||||
--
|
--
|
||||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
-- Copyright (C) 2025 by the Nominatim developer community.
|
||||||
-- For a full list of authors see the git log.
|
-- For a full list of authors see the git log.
|
||||||
|
|
||||||
-- Trigger functions for location_postcode table.
|
-- Trigger functions for location_postcodes table.
|
||||||
|
|
||||||
|
|
||||||
-- Trigger for updates of location_postcode
|
-- Trigger for updates of location_postcode
|
||||||
@@ -13,7 +13,7 @@
|
|||||||
-- Computes the parent object the postcode most likely refers to.
|
-- Computes the parent object the postcode most likely refers to.
|
||||||
-- This will be the place that determines the address displayed when
|
-- This will be the place that determines the address displayed when
|
||||||
-- searching for this postcode.
|
-- searching for this postcode.
|
||||||
CREATE OR REPLACE FUNCTION postcode_update()
|
CREATE OR REPLACE FUNCTION postcodes_update()
|
||||||
RETURNS TRIGGER
|
RETURNS TRIGGER
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
@@ -28,13 +28,10 @@ BEGIN
|
|||||||
|
|
||||||
partition := get_partition(NEW.country_code);
|
partition := get_partition(NEW.country_code);
|
||||||
|
|
||||||
SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode)
|
|
||||||
INTO NEW.rank_search, NEW.rank_address;
|
|
||||||
|
|
||||||
NEW.parent_place_id = 0;
|
NEW.parent_place_id = 0;
|
||||||
FOR location IN
|
FOR location IN
|
||||||
SELECT place_id
|
SELECT place_id
|
||||||
FROM getNearFeatures(partition, NEW.geometry, NEW.geometry, NEW.rank_search)
|
FROM getNearFeatures(partition, NEW.centroid, NEW.centroid, NEW.rank_search)
|
||||||
WHERE NOT isguess ORDER BY rank_address DESC, distance asc LIMIT 1
|
WHERE NOT isguess ORDER BY rank_address DESC, distance asc LIMIT 1
|
||||||
LOOP
|
LOOP
|
||||||
NEW.parent_place_id = location.place_id;
|
NEW.parent_place_id = location.place_id;
|
||||||
@@ -45,3 +42,89 @@ END;
|
|||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION postcodes_delete()
|
||||||
|
RETURNS TRIGGER
|
||||||
|
AS $$
|
||||||
|
BEGIN
|
||||||
|
{% if not disable_diff_updates %}
|
||||||
|
UPDATE placex p SET indexed_status = 2
|
||||||
|
WHERE p.postcode = OLD.postcode AND ST_Intersects(OLD.geometry, p.geometry)
|
||||||
|
AND indexed_status = 0;
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
RETURN OLD;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION postcodes_insert()
|
||||||
|
RETURNS TRIGGER
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
existing RECORD;
|
||||||
|
BEGIN
|
||||||
|
IF NEW.osm_id is not NULL THEN
|
||||||
|
-- postcode area, remove existing from same OSM object
|
||||||
|
SELECT * INTO existing FROM location_postcodes p
|
||||||
|
WHERE p.osm_id = NEW.osm_id;
|
||||||
|
|
||||||
|
IF existing.place_id is not NULL THEN
|
||||||
|
IF existing.postcode != NEW.postcode or existing.country_code != NEW.country_code THEN
|
||||||
|
DELETE FROM location_postcodes p WHERE p.osm_id = NEW.osm_id;
|
||||||
|
existing := NULL;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
IF existing is NULL THEN
|
||||||
|
SELECT * INTO existing FROM location_postcodes p
|
||||||
|
WHERE p.country_code = NEW.country_code AND p.postcode = NEW.postcode;
|
||||||
|
|
||||||
|
IF existing.postcode is NULL THEN
|
||||||
|
{% if not disable_diff_updates %}
|
||||||
|
UPDATE placex p SET indexed_status = 2
|
||||||
|
WHERE ST_Intersects(NEW.geometry, p.geometry)
|
||||||
|
AND indexed_status = 0
|
||||||
|
AND p.rank_address >= 22 AND not p.address ? 'postcode';
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
-- new entry, just insert
|
||||||
|
NEW.indexed_status := 1;
|
||||||
|
NEW.place_id := nextval('seq_place');
|
||||||
|
RETURN NEW;
|
||||||
|
END IF;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- update: only when there are changes
|
||||||
|
IF coalesce(NEW.osm_id, -1) != coalesce(existing.osm_id, -1)
|
||||||
|
OR (NEW.osm_id is not null AND NEW.geometry::text != existing.geometry::text)
|
||||||
|
OR (NEW.osm_id is null
|
||||||
|
AND (abs(ST_X(existing.centroid) - ST_X(NEW.centroid)) > 0.0000001
|
||||||
|
OR abs(ST_Y(existing.centroid) - ST_Y(NEW.centroid)) > 0.0000001))
|
||||||
|
THEN
|
||||||
|
{% if not disable_diff_updates %}
|
||||||
|
UPDATE placex p SET indexed_status = 2
|
||||||
|
WHERE ST_Intersects(ST_Difference(NEW.geometry, existing.geometry), p.geometry)
|
||||||
|
AND indexed_status = 0
|
||||||
|
AND p.rank_address >= 22 AND not p.address ? 'postcode';
|
||||||
|
|
||||||
|
UPDATE placex p SET indexed_status = 2
|
||||||
|
WHERE ST_Intersects(ST_Difference(existing.geometry, NEW.geometry), p.geometry)
|
||||||
|
AND indexed_status = 0
|
||||||
|
AND p.postcode = OLD.postcode;
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
UPDATE location_postcodes p
|
||||||
|
SET osm_id = NEW.osm_id,
|
||||||
|
indexed_status = 2,
|
||||||
|
centroid = NEW.centroid,
|
||||||
|
geometry = NEW.geometry
|
||||||
|
WHERE p.country_code = NEW.country_code AND p.postcode = NEW.postcode;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN NULL;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
--
|
--
|
||||||
-- This file is part of Nominatim. (https://nominatim.org)
|
-- This file is part of Nominatim. (https://nominatim.org)
|
||||||
--
|
--
|
||||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
-- Copyright (C) 2025 by the Nominatim developer community.
|
||||||
-- For a full list of authors see the git log.
|
-- For a full list of authors see the git log.
|
||||||
|
|
||||||
-- Functions related to search and address ranks
|
-- Functions related to search and address ranks
|
||||||
@@ -114,66 +114,6 @@ $$
|
|||||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Guess a ranking for postcodes from country and postcode format.
|
|
||||||
CREATE OR REPLACE FUNCTION get_postcode_rank(country_code VARCHAR(2), postcode TEXT,
|
|
||||||
OUT rank_search SMALLINT,
|
|
||||||
OUT rank_address SMALLINT)
|
|
||||||
AS $$
|
|
||||||
DECLARE
|
|
||||||
part TEXT;
|
|
||||||
BEGIN
|
|
||||||
rank_search := 30;
|
|
||||||
rank_address := 30;
|
|
||||||
postcode := upper(postcode);
|
|
||||||
|
|
||||||
IF country_code = 'gb' THEN
|
|
||||||
IF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
|
|
||||||
rank_search := 25;
|
|
||||||
rank_address := 5;
|
|
||||||
ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
|
|
||||||
rank_search := 23;
|
|
||||||
rank_address := 5;
|
|
||||||
ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
|
|
||||||
rank_search := 21;
|
|
||||||
rank_address := 5;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
ELSEIF country_code = 'sg' THEN
|
|
||||||
IF postcode ~ '^([0-9]{6})$' THEN
|
|
||||||
rank_search := 25;
|
|
||||||
rank_address := 11;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
ELSEIF country_code = 'de' THEN
|
|
||||||
IF postcode ~ '^([0-9]{5})$' THEN
|
|
||||||
rank_search := 21;
|
|
||||||
rank_address := 11;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
ELSE
|
|
||||||
-- Guess at the postcode format and coverage (!)
|
|
||||||
IF postcode ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
|
|
||||||
rank_search := 21;
|
|
||||||
rank_address := 11;
|
|
||||||
ELSE
|
|
||||||
-- Does it look splitable into and area and local code?
|
|
||||||
part := substring(postcode from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
|
|
||||||
|
|
||||||
IF part IS NOT NULL THEN
|
|
||||||
rank_search := 25;
|
|
||||||
rank_address := 11;
|
|
||||||
ELSEIF postcode ~ '^[- :A-Z0-9]{6,}$' THEN
|
|
||||||
rank_search := 21;
|
|
||||||
rank_address := 11;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
END;
|
|
||||||
$$
|
|
||||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
|
||||||
|
|
||||||
|
|
||||||
-- Get standard search and address rank for an object.
|
-- Get standard search and address rank for an object.
|
||||||
--
|
--
|
||||||
-- \param country Two-letter country code where the object is in.
|
-- \param country Two-letter country code where the object is in.
|
||||||
@@ -198,12 +138,7 @@ AS $$
|
|||||||
DECLARE
|
DECLARE
|
||||||
classtype TEXT;
|
classtype TEXT;
|
||||||
BEGIN
|
BEGIN
|
||||||
IF place_class in ('place','boundary')
|
IF extended_type = 'N' AND place_class = 'highway' THEN
|
||||||
and place_type in ('postcode','postal_code')
|
|
||||||
THEN
|
|
||||||
SELECT * INTO search_rank, address_rank
|
|
||||||
FROM get_postcode_rank(country, postcode);
|
|
||||||
ELSEIF extended_type = 'N' AND place_class = 'highway' THEN
|
|
||||||
search_rank = 30;
|
search_rank = 30;
|
||||||
address_rank = 30;
|
address_rank = 30;
|
||||||
ELSEIF place_class = 'landuse' AND extended_type != 'A' THEN
|
ELSEIF place_class = 'landuse' AND extended_type != 'A' THEN
|
||||||
|
|||||||
@@ -323,6 +323,17 @@ END;
|
|||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
|
-- Return the bounding box of the geometry buffered by the given number
|
||||||
|
-- of meters.
|
||||||
|
CREATE OR REPLACE FUNCTION expand_by_meters(geom GEOMETRY, meters FLOAT)
|
||||||
|
RETURNS GEOMETRY
|
||||||
|
AS $$
|
||||||
|
SELECT ST_Envelope(ST_Buffer(geom::geography, meters, 1)::geometry)
|
||||||
|
$$
|
||||||
|
LANGUAGE sql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|
||||||
|
|
||||||
-- Create a bounding box with an extent computed from the radius (in meters)
|
-- Create a bounding box with an extent computed from the radius (in meters)
|
||||||
-- which in turn is derived from the given search rank.
|
-- which in turn is derived from the given search rank.
|
||||||
CREATE OR REPLACE FUNCTION place_node_fuzzy_area(geom GEOMETRY, rank_search INTEGER)
|
CREATE OR REPLACE FUNCTION place_node_fuzzy_area(geom GEOMETRY, rank_search INTEGER)
|
||||||
@@ -341,9 +352,7 @@ BEGIN
|
|||||||
radius := 1000;
|
radius := 1000;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
RETURN ST_Envelope(ST_Collect(
|
RETURN expand_by_meters(geom, radius);
|
||||||
ST_Project(geom::geography, radius, 0.785398)::geometry,
|
|
||||||
ST_Project(geom::geography, radius, 3.9269908)::geometry));
|
|
||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||||
|
|||||||
@@ -26,4 +26,8 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place
|
|||||||
FOR EACH ROW EXECUTE PROCEDURE place_insert();
|
FOR EACH ROW EXECUTE PROCEDURE place_insert();
|
||||||
|
|
||||||
CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcodes
|
CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcodes
|
||||||
FOR EACH ROW EXECUTE PROCEDURE postcode_update();
|
FOR EACH ROW EXECUTE PROCEDURE postcodes_update();
|
||||||
|
CREATE TRIGGER location_postcodes_before_delete BEFORE DELETE ON location_postcodes
|
||||||
|
FOR EACH ROW EXECUTE PROCEDURE postcodes_delete();
|
||||||
|
CREATE TRIGGER location_postcodes_before_insert BEFORE INSERT ON location_postcodes
|
||||||
|
FOR EACH ROW EXECUTE PROCEDURE postcodes_insert();
|
||||||
|
|||||||
@@ -8,7 +8,7 @@
|
|||||||
Functions for importing, updating and otherwise maintaining the table
|
Functions for importing, updating and otherwise maintaining the table
|
||||||
of artificial postcode centroids.
|
of artificial postcode centroids.
|
||||||
"""
|
"""
|
||||||
from typing import Optional, Tuple, Dict, List, TextIO
|
from typing import Optional, Tuple, Dict, TextIO
|
||||||
from collections import defaultdict
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
import csv
|
import csv
|
||||||
@@ -38,13 +38,26 @@ def _to_float(numstr: str, max_value: float) -> float:
|
|||||||
return num
|
return num
|
||||||
|
|
||||||
|
|
||||||
|
def _extent_to_rank(extent: int) -> int:
|
||||||
|
""" Guess a suitable search rank from the extent of a postcode.
|
||||||
|
"""
|
||||||
|
if extent <= 100:
|
||||||
|
return 25
|
||||||
|
if extent <= 3000:
|
||||||
|
return 23
|
||||||
|
return 21
|
||||||
|
|
||||||
|
|
||||||
class _PostcodeCollector:
|
class _PostcodeCollector:
|
||||||
""" Collector for postcodes of a single country.
|
""" Collector for postcodes of a single country.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, country: str, matcher: Optional[CountryPostcodeMatcher]):
|
def __init__(self, country: str, matcher: Optional[CountryPostcodeMatcher],
|
||||||
|
default_extent: int, exclude: set[str] = set()):
|
||||||
self.country = country
|
self.country = country
|
||||||
self.matcher = matcher
|
self.matcher = matcher
|
||||||
|
self.extent = default_extent
|
||||||
|
self.exclude = exclude
|
||||||
self.collected: Dict[str, PointsCentroid] = defaultdict(PointsCentroid)
|
self.collected: Dict[str, PointsCentroid] = defaultdict(PointsCentroid)
|
||||||
self.normalization_cache: Optional[Tuple[str, Optional[str]]] = None
|
self.normalization_cache: Optional[Tuple[str, Optional[str]]] = None
|
||||||
|
|
||||||
@@ -61,7 +74,7 @@ class _PostcodeCollector:
|
|||||||
normalized = self.matcher.normalize(match) if match else None
|
normalized = self.matcher.normalize(match) if match else None
|
||||||
self.normalization_cache = (postcode, normalized)
|
self.normalization_cache = (postcode, normalized)
|
||||||
|
|
||||||
if normalized:
|
if normalized and normalized not in self.exclude:
|
||||||
self.collected[normalized] += (x, y)
|
self.collected[normalized] += (x, y)
|
||||||
|
|
||||||
def commit(self, conn: Connection, analyzer: AbstractAnalyzer,
|
def commit(self, conn: Connection, analyzer: AbstractAnalyzer,
|
||||||
@@ -73,61 +86,37 @@ class _PostcodeCollector:
|
|||||||
"""
|
"""
|
||||||
if project_dir is not None:
|
if project_dir is not None:
|
||||||
self._update_from_external(analyzer, project_dir)
|
self._update_from_external(analyzer, project_dir)
|
||||||
to_add, to_delete, to_update = self._compute_changes(conn)
|
|
||||||
|
|
||||||
LOG.info("Processing country '%s' (%s added, %s deleted, %s updated).",
|
with conn.cursor() as cur:
|
||||||
self.country, len(to_add), len(to_delete), len(to_update))
|
cur.execute("""SELECT postcode FROM location_postcodes
|
||||||
|
WHERE country_code = %s AND osm_id is null""",
|
||||||
|
(self.country, ))
|
||||||
|
to_delete = [row[0] for row in cur if row[0] not in self.collected]
|
||||||
|
|
||||||
|
to_add = [dict(zip(('pc', 'x', 'y'), (k, *v.centroid())))
|
||||||
|
for k, v in self.collected.items()]
|
||||||
|
self.collected = defaultdict(PointsCentroid)
|
||||||
|
|
||||||
|
LOG.info("Processing country '%s' (%s added, %s deleted).",
|
||||||
|
self.country, len(to_add), len(to_delete))
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
if to_add:
|
if to_add:
|
||||||
cur.executemany(pysql.SQL(
|
cur.executemany(pysql.SQL(
|
||||||
"""INSERT INTO location_postcode
|
"""INSERT INTO location_postcodes
|
||||||
(place_id, indexed_status, country_code,
|
(country_code, rank_search, postcode, centroid, geometry)
|
||||||
postcode, geometry)
|
VALUES ({}, {}, %(pc)s,
|
||||||
VALUES (nextval('seq_place'), 1, {}, %s,
|
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326),
|
||||||
ST_SetSRID(ST_MakePoint(%s, %s), 4326))
|
expand_by_meters(ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {}))
|
||||||
""").format(pysql.Literal(self.country)),
|
""").format(pysql.Literal(self.country),
|
||||||
|
pysql.Literal(_extent_to_rank(self.extent)),
|
||||||
|
pysql.Literal(self.extent)),
|
||||||
to_add)
|
to_add)
|
||||||
if to_delete:
|
if to_delete:
|
||||||
cur.execute("""DELETE FROM location_postcode
|
cur.execute("""DELETE FROM location_postcodes
|
||||||
WHERE country_code = %s and postcode = any(%s)
|
WHERE country_code = %s and postcode = any(%s)
|
||||||
|
AND osm_id is null
|
||||||
""", (self.country, to_delete))
|
""", (self.country, to_delete))
|
||||||
if to_update:
|
|
||||||
cur.executemany(
|
|
||||||
pysql.SQL("""UPDATE location_postcode
|
|
||||||
SET indexed_status = 2,
|
|
||||||
geometry = ST_SetSRID(ST_Point(%s, %s), 4326)
|
|
||||||
WHERE country_code = {} and postcode = %s
|
|
||||||
""").format(pysql.Literal(self.country)),
|
|
||||||
to_update)
|
|
||||||
|
|
||||||
def _compute_changes(
|
|
||||||
self, conn: Connection
|
|
||||||
) -> Tuple[List[Tuple[str, float, float]], List[str], List[Tuple[float, float, str]]]:
|
|
||||||
""" Compute which postcodes from the collected postcodes have to be
|
|
||||||
added or modified and which from the location_postcode table
|
|
||||||
have to be deleted.
|
|
||||||
"""
|
|
||||||
to_update = []
|
|
||||||
to_delete = []
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
cur.execute("""SELECT postcode, ST_X(geometry), ST_Y(geometry)
|
|
||||||
FROM location_postcode
|
|
||||||
WHERE country_code = %s""",
|
|
||||||
(self.country, ))
|
|
||||||
for postcode, x, y in cur:
|
|
||||||
pcobj = self.collected.pop(postcode, None)
|
|
||||||
if pcobj:
|
|
||||||
newx, newy = pcobj.centroid()
|
|
||||||
if abs(x - newx) > 0.0000001 or abs(y - newy) > 0.0000001:
|
|
||||||
to_update.append((newx, newy, postcode))
|
|
||||||
else:
|
|
||||||
to_delete.append(postcode)
|
|
||||||
|
|
||||||
to_add = [(k, *v.centroid()) for k, v in self.collected.items()]
|
|
||||||
self.collected = defaultdict(PointsCentroid)
|
|
||||||
|
|
||||||
return to_add, to_delete, to_update
|
|
||||||
|
|
||||||
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
||||||
""" Look for an external postcode file for the active country in
|
""" Look for an external postcode file for the active country in
|
||||||
@@ -152,7 +141,7 @@ class _PostcodeCollector:
|
|||||||
_to_float(row['lat'], 90))
|
_to_float(row['lat'], 90))
|
||||||
self.collected[postcode] += centroid
|
self.collected[postcode] += centroid
|
||||||
except ValueError:
|
except ValueError:
|
||||||
LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
|
LOG.warning("Bad coordinates %s, %s in '%s' country postcode file.",
|
||||||
row['lat'], row['lon'], self.country)
|
row['lat'], row['lon'], self.country)
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@@ -175,63 +164,158 @@ class _PostcodeCollector:
|
|||||||
|
|
||||||
|
|
||||||
def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractTokenizer) -> None:
|
def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractTokenizer) -> None:
|
||||||
""" Update the table of artificial postcodes.
|
""" Update the table of postcodes from the input tables
|
||||||
|
placex and place_postcode.
|
||||||
Computes artificial postcode centroids from the placex table,
|
|
||||||
potentially enhances it with external data and then updates the
|
|
||||||
postcodes in the table 'location_postcode'.
|
|
||||||
"""
|
"""
|
||||||
matcher = PostcodeFormatter()
|
matcher = PostcodeFormatter()
|
||||||
with tokenizer.name_analyzer() as analyzer:
|
with tokenizer.name_analyzer() as analyzer:
|
||||||
with connect(dsn) as conn:
|
with connect(dsn) as conn:
|
||||||
# First get the list of countries that currently have postcodes.
|
# Backfill country_code column where required
|
||||||
# (Doing this before starting to insert, so it is fast on import.)
|
conn.execute("""UPDATE place_postcode
|
||||||
with conn.cursor() as cur:
|
SET country_code = get_country_code(centroid)
|
||||||
cur.execute("SELECT DISTINCT country_code FROM location_postcode")
|
WHERE country_code is null
|
||||||
todo_countries = set((row[0] for row in cur))
|
""")
|
||||||
|
# Now update first postcode areas
|
||||||
# Recompute the list of valid postcodes from placex.
|
_update_postcode_areas(conn, analyzer, matcher)
|
||||||
with conn.cursor(name="placex_postcodes") as cur:
|
# Then fill with estimated postcode centroids from other info
|
||||||
cur.execute("""
|
_update_guessed_postcode(conn, analyzer, matcher, project_dir)
|
||||||
SELECT cc, pc, ST_X(centroid), ST_Y(centroid)
|
|
||||||
FROM (SELECT
|
|
||||||
COALESCE(plx.country_code,
|
|
||||||
get_country_code(ST_Centroid(pl.geometry))) as cc,
|
|
||||||
pl.address->'postcode' as pc,
|
|
||||||
COALESCE(plx.centroid, ST_Centroid(pl.geometry)) as centroid
|
|
||||||
FROM place AS pl LEFT OUTER JOIN placex AS plx
|
|
||||||
ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
|
|
||||||
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null) xx
|
|
||||||
WHERE pc IS NOT null AND cc IS NOT null
|
|
||||||
ORDER BY cc, pc""")
|
|
||||||
|
|
||||||
collector = None
|
|
||||||
|
|
||||||
for country, postcode, x, y in cur:
|
|
||||||
if collector is None or country != collector.country:
|
|
||||||
if collector is not None:
|
|
||||||
collector.commit(conn, analyzer, project_dir)
|
|
||||||
collector = _PostcodeCollector(country, matcher.get_matcher(country))
|
|
||||||
todo_countries.discard(country)
|
|
||||||
collector.add(postcode, x, y)
|
|
||||||
|
|
||||||
if collector is not None:
|
|
||||||
collector.commit(conn, analyzer, project_dir)
|
|
||||||
|
|
||||||
# Now handle any countries that are only in the postcode table.
|
|
||||||
for country in todo_countries:
|
|
||||||
fmt = matcher.get_matcher(country)
|
|
||||||
_PostcodeCollector(country, fmt).commit(conn, analyzer, project_dir)
|
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
analyzer.update_postcodes_from_db()
|
analyzer.update_postcodes_from_db()
|
||||||
|
|
||||||
|
|
||||||
def can_compute(dsn: str) -> bool:
|
def _insert_postcode_areas(conn: Connection, country_code: str,
|
||||||
|
extent: int, pcs: list[dict[str, str]]) -> None:
|
||||||
|
if pcs:
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.executemany(
|
||||||
|
pysql.SQL(
|
||||||
|
""" INSERT INTO location_postcodes
|
||||||
|
(osm_id, country_code, rank_search, postcode, centroid, geometry)
|
||||||
|
SELECT osm_id, country_code, {}, %(out)s, centroid, geometry
|
||||||
|
FROM place_postcode
|
||||||
|
WHERE osm_type = 'R'
|
||||||
|
and country_code = {} and postcode = %(in)s
|
||||||
|
and geometry is not null
|
||||||
|
""").format(pysql.Literal(_extent_to_rank(extent)),
|
||||||
|
pysql.Literal(country_code)),
|
||||||
|
pcs)
|
||||||
|
|
||||||
|
|
||||||
|
def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
|
||||||
|
matcher: PostcodeFormatter) -> None:
|
||||||
|
""" Update the postcode areas made from postcode boundaries.
|
||||||
"""
|
"""
|
||||||
Check that the place table exists so that
|
# first delete all areas that have gone
|
||||||
postcodes can be computed.
|
conn.execute(""" DELETE FROM location_postcodes pc
|
||||||
|
WHERE pc.osm_id is not null
|
||||||
|
AND NOT EXISTS(
|
||||||
|
SELECT * FROM place_postcode pp
|
||||||
|
WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id
|
||||||
|
and geometry is not null)
|
||||||
|
""")
|
||||||
|
# now insert all in country batches, triggers will ensure proper updates
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute(""" SELECT country_code, postcode FROM place_postcode
|
||||||
|
WHERE geometry is not null and osm_type = 'R'
|
||||||
|
ORDER BY country_code
|
||||||
|
""")
|
||||||
|
country_code = None
|
||||||
|
fmt = None
|
||||||
|
pcs = []
|
||||||
|
for cc, postcode in cur:
|
||||||
|
if country_code is None:
|
||||||
|
country_code = cc
|
||||||
|
fmt = matcher.get_matcher(country_code)
|
||||||
|
elif country_code != cc:
|
||||||
|
_insert_postcode_areas(conn, country_code,
|
||||||
|
matcher.get_postcode_extent(country_code), pcs)
|
||||||
|
country_code = cc
|
||||||
|
fmt = matcher.get_matcher(country_code)
|
||||||
|
pcs = []
|
||||||
|
assert fmt is not None
|
||||||
|
|
||||||
|
if (m := fmt.match(postcode)):
|
||||||
|
pcs.append({'out': fmt.normalize(m), 'in': postcode})
|
||||||
|
|
||||||
|
if country_code is not None and pcs:
|
||||||
|
_insert_postcode_areas(conn, country_code,
|
||||||
|
matcher.get_postcode_extent(country_code), pcs)
|
||||||
|
|
||||||
|
|
||||||
|
def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
|
||||||
|
matcher: PostcodeFormatter, project_dir: Optional[Path]) -> None:
|
||||||
|
""" Computes artificial postcode centroids from the placex table,
|
||||||
|
potentially enhances it with external data and then updates the
|
||||||
|
postcodes in the table 'location_postcodes'.
|
||||||
|
"""
|
||||||
|
# First get the list of countries that currently have postcodes.
|
||||||
|
# (Doing this before starting to insert, so it is fast on import.)
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes
|
||||||
|
WHERE osm_id is null""")
|
||||||
|
todo_countries = {row[0] for row in cur}
|
||||||
|
|
||||||
|
# Next, get the list of postcodes that are already covered by areas.
|
||||||
|
area_pcs = defaultdict(set)
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("""SELECT country_code, postcode
|
||||||
|
FROM location_postcodes WHERE osm_id is not null
|
||||||
|
ORDER BY country_code""")
|
||||||
|
for cc, pc in cur:
|
||||||
|
area_pcs[cc].add(pc)
|
||||||
|
|
||||||
|
# Create a temporary table which contains coverage of the postcode areas.
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
cur.execute("DROP TABLE IF EXISTS _global_postcode_area")
|
||||||
|
cur.execute("""CREATE TABLE _global_postcode_area AS
|
||||||
|
(SELECT ST_SubDivide(ST_Union(geometry)) as geometry
|
||||||
|
FROM place_postcode WHERE geometry is not null)
|
||||||
|
""")
|
||||||
|
cur.execute("CREATE INDEX ON _global_postcode_area USING gist(geometry)")
|
||||||
|
# Recompute the list of valid postcodes from placex.
|
||||||
|
with conn.cursor(name="placex_postcodes") as cur:
|
||||||
|
cur.execute("""
|
||||||
|
SELECT country_code, postcode, ST_X(centroid), ST_Y(centroid)
|
||||||
|
FROM (
|
||||||
|
(SELECT country_code, address->'postcode' as postcode, centroid
|
||||||
|
FROM placex WHERE address ? 'postcode')
|
||||||
|
UNION
|
||||||
|
(SELECT country_code, postcode, centroid
|
||||||
|
FROM place_postcode WHERE geometry is null)
|
||||||
|
) x
|
||||||
|
WHERE not postcode like '%,%' and not postcode like '%;%'
|
||||||
|
AND NOT EXISTS(SELECT * FROM _global_postcode_area g
|
||||||
|
WHERE ST_Intersects(x.centroid, g.geometry))
|
||||||
|
ORDER BY country_code""")
|
||||||
|
|
||||||
|
collector = None
|
||||||
|
|
||||||
|
for country, postcode, x, y in cur:
|
||||||
|
if collector is None or country != collector.country:
|
||||||
|
if collector is not None:
|
||||||
|
collector.commit(conn, analyzer, project_dir)
|
||||||
|
collector = _PostcodeCollector(country, matcher.get_matcher(country),
|
||||||
|
matcher.get_postcode_extent(country),
|
||||||
|
exclude=area_pcs[country])
|
||||||
|
todo_countries.discard(country)
|
||||||
|
collector.add(postcode, x, y)
|
||||||
|
|
||||||
|
if collector is not None:
|
||||||
|
collector.commit(conn, analyzer, project_dir)
|
||||||
|
|
||||||
|
# Now handle any countries that are only in the postcode table.
|
||||||
|
for country in todo_countries:
|
||||||
|
fmt = matcher.get_matcher(country)
|
||||||
|
ext = matcher.get_postcode_extent(country)
|
||||||
|
_PostcodeCollector(country, fmt, ext,
|
||||||
|
exclude=area_pcs[country]).commit(conn, analyzer, project_dir)
|
||||||
|
|
||||||
|
conn.execute("DROP TABLE IF EXISTS _global_postcode_area")
|
||||||
|
|
||||||
|
|
||||||
|
def can_compute(dsn: str) -> bool:
|
||||||
|
""" Check that the necessary tables exist so that postcodes can be computed.
|
||||||
"""
|
"""
|
||||||
with connect(dsn) as conn:
|
with connect(dsn) as conn:
|
||||||
return table_exists(conn, 'place')
|
return table_exists(conn, 'place_postcode')
|
||||||
|
|||||||
Reference in New Issue
Block a user