mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 01:47:57 +00:00
rework postcode computation
Now adds areas to location_postcodes, ignores postcode points inside areas and supports customizable extents.
This commit is contained in:
@@ -2,10 +2,10 @@
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
||||
-- Copyright (C) 2025 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
-- Trigger functions for location_postcode table.
|
||||
-- Trigger functions for location_postcodes table.
|
||||
|
||||
|
||||
-- Trigger for updates of location_postcode
|
||||
@@ -13,7 +13,7 @@
|
||||
-- Computes the parent object the postcode most likely refers to.
|
||||
-- This will be the place that determines the address displayed when
|
||||
-- searching for this postcode.
|
||||
CREATE OR REPLACE FUNCTION postcode_update()
|
||||
CREATE OR REPLACE FUNCTION postcodes_update()
|
||||
RETURNS TRIGGER
|
||||
AS $$
|
||||
DECLARE
|
||||
@@ -28,13 +28,10 @@ BEGIN
|
||||
|
||||
partition := get_partition(NEW.country_code);
|
||||
|
||||
SELECT * FROM get_postcode_rank(NEW.country_code, NEW.postcode)
|
||||
INTO NEW.rank_search, NEW.rank_address;
|
||||
|
||||
NEW.parent_place_id = 0;
|
||||
FOR location IN
|
||||
SELECT place_id
|
||||
FROM getNearFeatures(partition, NEW.geometry, NEW.geometry, NEW.rank_search)
|
||||
FROM getNearFeatures(partition, NEW.centroid, NEW.centroid, NEW.rank_search)
|
||||
WHERE NOT isguess ORDER BY rank_address DESC, distance asc LIMIT 1
|
||||
LOOP
|
||||
NEW.parent_place_id = location.place_id;
|
||||
@@ -45,3 +42,89 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION postcodes_delete()
|
||||
RETURNS TRIGGER
|
||||
AS $$
|
||||
BEGIN
|
||||
{% if not disable_diff_updates %}
|
||||
UPDATE placex p SET indexed_status = 2
|
||||
WHERE p.postcode = OLD.postcode AND ST_Intersects(OLD.geometry, p.geometry)
|
||||
AND indexed_status = 0;
|
||||
{% endif %}
|
||||
|
||||
RETURN OLD;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION postcodes_insert()
|
||||
RETURNS TRIGGER
|
||||
AS $$
|
||||
DECLARE
|
||||
existing RECORD;
|
||||
BEGIN
|
||||
IF NEW.osm_id is not NULL THEN
|
||||
-- postcode area, remove existing from same OSM object
|
||||
SELECT * INTO existing FROM location_postcodes p
|
||||
WHERE p.osm_id = NEW.osm_id;
|
||||
|
||||
IF existing.place_id is not NULL THEN
|
||||
IF existing.postcode != NEW.postcode or existing.country_code != NEW.country_code THEN
|
||||
DELETE FROM location_postcodes p WHERE p.osm_id = NEW.osm_id;
|
||||
existing := NULL;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
IF existing is NULL THEN
|
||||
SELECT * INTO existing FROM location_postcodes p
|
||||
WHERE p.country_code = NEW.country_code AND p.postcode = NEW.postcode;
|
||||
|
||||
IF existing.postcode is NULL THEN
|
||||
{% if not disable_diff_updates %}
|
||||
UPDATE placex p SET indexed_status = 2
|
||||
WHERE ST_Intersects(NEW.geometry, p.geometry)
|
||||
AND indexed_status = 0
|
||||
AND p.rank_address >= 22 AND not p.address ? 'postcode';
|
||||
{% endif %}
|
||||
|
||||
-- new entry, just insert
|
||||
NEW.indexed_status := 1;
|
||||
NEW.place_id := nextval('seq_place');
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- update: only when there are changes
|
||||
IF coalesce(NEW.osm_id, -1) != coalesce(existing.osm_id, -1)
|
||||
OR (NEW.osm_id is not null AND NEW.geometry::text != existing.geometry::text)
|
||||
OR (NEW.osm_id is null
|
||||
AND (abs(ST_X(existing.centroid) - ST_X(NEW.centroid)) > 0.0000001
|
||||
OR abs(ST_Y(existing.centroid) - ST_Y(NEW.centroid)) > 0.0000001))
|
||||
THEN
|
||||
{% if not disable_diff_updates %}
|
||||
UPDATE placex p SET indexed_status = 2
|
||||
WHERE ST_Intersects(ST_Difference(NEW.geometry, existing.geometry), p.geometry)
|
||||
AND indexed_status = 0
|
||||
AND p.rank_address >= 22 AND not p.address ? 'postcode';
|
||||
|
||||
UPDATE placex p SET indexed_status = 2
|
||||
WHERE ST_Intersects(ST_Difference(existing.geometry, NEW.geometry), p.geometry)
|
||||
AND indexed_status = 0
|
||||
AND p.postcode = OLD.postcode;
|
||||
{% endif %}
|
||||
|
||||
UPDATE location_postcodes p
|
||||
SET osm_id = NEW.osm_id,
|
||||
indexed_status = 2,
|
||||
centroid = NEW.centroid,
|
||||
geometry = NEW.geometry
|
||||
WHERE p.country_code = NEW.country_code AND p.postcode = NEW.postcode;
|
||||
END IF;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
||||
-- Copyright (C) 2025 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
-- Functions related to search and address ranks
|
||||
@@ -114,66 +114,6 @@ $$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Guess a ranking for postcodes from country and postcode format.
|
||||
CREATE OR REPLACE FUNCTION get_postcode_rank(country_code VARCHAR(2), postcode TEXT,
|
||||
OUT rank_search SMALLINT,
|
||||
OUT rank_address SMALLINT)
|
||||
AS $$
|
||||
DECLARE
|
||||
part TEXT;
|
||||
BEGIN
|
||||
rank_search := 30;
|
||||
rank_address := 30;
|
||||
postcode := upper(postcode);
|
||||
|
||||
IF country_code = 'gb' THEN
|
||||
IF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
|
||||
rank_search := 25;
|
||||
rank_address := 5;
|
||||
ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
|
||||
rank_search := 23;
|
||||
rank_address := 5;
|
||||
ELSEIF postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
|
||||
rank_search := 21;
|
||||
rank_address := 5;
|
||||
END IF;
|
||||
|
||||
ELSEIF country_code = 'sg' THEN
|
||||
IF postcode ~ '^([0-9]{6})$' THEN
|
||||
rank_search := 25;
|
||||
rank_address := 11;
|
||||
END IF;
|
||||
|
||||
ELSEIF country_code = 'de' THEN
|
||||
IF postcode ~ '^([0-9]{5})$' THEN
|
||||
rank_search := 21;
|
||||
rank_address := 11;
|
||||
END IF;
|
||||
|
||||
ELSE
|
||||
-- Guess at the postcode format and coverage (!)
|
||||
IF postcode ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
|
||||
rank_search := 21;
|
||||
rank_address := 11;
|
||||
ELSE
|
||||
-- Does it look splitable into and area and local code?
|
||||
part := substring(postcode from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
|
||||
|
||||
IF part IS NOT NULL THEN
|
||||
rank_search := 25;
|
||||
rank_address := 11;
|
||||
ELSEIF postcode ~ '^[- :A-Z0-9]{6,}$' THEN
|
||||
rank_search := 21;
|
||||
rank_address := 11;
|
||||
END IF;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Get standard search and address rank for an object.
|
||||
--
|
||||
-- \param country Two-letter country code where the object is in.
|
||||
@@ -198,12 +138,7 @@ AS $$
|
||||
DECLARE
|
||||
classtype TEXT;
|
||||
BEGIN
|
||||
IF place_class in ('place','boundary')
|
||||
and place_type in ('postcode','postal_code')
|
||||
THEN
|
||||
SELECT * INTO search_rank, address_rank
|
||||
FROM get_postcode_rank(country, postcode);
|
||||
ELSEIF extended_type = 'N' AND place_class = 'highway' THEN
|
||||
IF extended_type = 'N' AND place_class = 'highway' THEN
|
||||
search_rank = 30;
|
||||
address_rank = 30;
|
||||
ELSEIF place_class = 'landuse' AND extended_type != 'A' THEN
|
||||
|
||||
@@ -323,6 +323,17 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
|
||||
-- Return the bounding box of the geometry buffered by the given number
|
||||
-- of meters.
|
||||
CREATE OR REPLACE FUNCTION expand_by_meters(geom GEOMETRY, meters FLOAT)
|
||||
RETURNS GEOMETRY
|
||||
AS $$
|
||||
SELECT ST_Envelope(ST_Buffer(geom::geography, meters, 1)::geometry)
|
||||
$$
|
||||
LANGUAGE sql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
|
||||
-- Create a bounding box with an extent computed from the radius (in meters)
|
||||
-- which in turn is derived from the given search rank.
|
||||
CREATE OR REPLACE FUNCTION place_node_fuzzy_area(geom GEOMETRY, rank_search INTEGER)
|
||||
@@ -341,9 +352,7 @@ BEGIN
|
||||
radius := 1000;
|
||||
END IF;
|
||||
|
||||
RETURN ST_Envelope(ST_Collect(
|
||||
ST_Project(geom::geography, radius, 0.785398)::geometry,
|
||||
ST_Project(geom::geography, radius, 3.9269908)::geometry));
|
||||
RETURN expand_by_meters(geom, radius);
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE PARALLEL SAFE;
|
||||
|
||||
@@ -26,4 +26,8 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place
|
||||
FOR EACH ROW EXECUTE PROCEDURE place_insert();
|
||||
|
||||
CREATE TRIGGER location_postcode_before_update BEFORE UPDATE ON location_postcodes
|
||||
FOR EACH ROW EXECUTE PROCEDURE postcode_update();
|
||||
FOR EACH ROW EXECUTE PROCEDURE postcodes_update();
|
||||
CREATE TRIGGER location_postcodes_before_delete BEFORE DELETE ON location_postcodes
|
||||
FOR EACH ROW EXECUTE PROCEDURE postcodes_delete();
|
||||
CREATE TRIGGER location_postcodes_before_insert BEFORE INSERT ON location_postcodes
|
||||
FOR EACH ROW EXECUTE PROCEDURE postcodes_insert();
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
Functions for importing, updating and otherwise maintaining the table
|
||||
of artificial postcode centroids.
|
||||
"""
|
||||
from typing import Optional, Tuple, Dict, List, TextIO
|
||||
from typing import Optional, Tuple, Dict, TextIO
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import csv
|
||||
@@ -38,13 +38,26 @@ def _to_float(numstr: str, max_value: float) -> float:
|
||||
return num
|
||||
|
||||
|
||||
def _extent_to_rank(extent: int) -> int:
|
||||
""" Guess a suitable search rank from the extent of a postcode.
|
||||
"""
|
||||
if extent <= 100:
|
||||
return 25
|
||||
if extent <= 3000:
|
||||
return 23
|
||||
return 21
|
||||
|
||||
|
||||
class _PostcodeCollector:
|
||||
""" Collector for postcodes of a single country.
|
||||
"""
|
||||
|
||||
def __init__(self, country: str, matcher: Optional[CountryPostcodeMatcher]):
|
||||
def __init__(self, country: str, matcher: Optional[CountryPostcodeMatcher],
|
||||
default_extent: int, exclude: set[str] = set()):
|
||||
self.country = country
|
||||
self.matcher = matcher
|
||||
self.extent = default_extent
|
||||
self.exclude = exclude
|
||||
self.collected: Dict[str, PointsCentroid] = defaultdict(PointsCentroid)
|
||||
self.normalization_cache: Optional[Tuple[str, Optional[str]]] = None
|
||||
|
||||
@@ -61,7 +74,7 @@ class _PostcodeCollector:
|
||||
normalized = self.matcher.normalize(match) if match else None
|
||||
self.normalization_cache = (postcode, normalized)
|
||||
|
||||
if normalized:
|
||||
if normalized and normalized not in self.exclude:
|
||||
self.collected[normalized] += (x, y)
|
||||
|
||||
def commit(self, conn: Connection, analyzer: AbstractAnalyzer,
|
||||
@@ -73,61 +86,37 @@ class _PostcodeCollector:
|
||||
"""
|
||||
if project_dir is not None:
|
||||
self._update_from_external(analyzer, project_dir)
|
||||
to_add, to_delete, to_update = self._compute_changes(conn)
|
||||
|
||||
LOG.info("Processing country '%s' (%s added, %s deleted, %s updated).",
|
||||
self.country, len(to_add), len(to_delete), len(to_update))
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT postcode FROM location_postcodes
|
||||
WHERE country_code = %s AND osm_id is null""",
|
||||
(self.country, ))
|
||||
to_delete = [row[0] for row in cur if row[0] not in self.collected]
|
||||
|
||||
to_add = [dict(zip(('pc', 'x', 'y'), (k, *v.centroid())))
|
||||
for k, v in self.collected.items()]
|
||||
self.collected = defaultdict(PointsCentroid)
|
||||
|
||||
LOG.info("Processing country '%s' (%s added, %s deleted).",
|
||||
self.country, len(to_add), len(to_delete))
|
||||
|
||||
with conn.cursor() as cur:
|
||||
if to_add:
|
||||
cur.executemany(pysql.SQL(
|
||||
"""INSERT INTO location_postcode
|
||||
(place_id, indexed_status, country_code,
|
||||
postcode, geometry)
|
||||
VALUES (nextval('seq_place'), 1, {}, %s,
|
||||
ST_SetSRID(ST_MakePoint(%s, %s), 4326))
|
||||
""").format(pysql.Literal(self.country)),
|
||||
"""INSERT INTO location_postcodes
|
||||
(country_code, rank_search, postcode, centroid, geometry)
|
||||
VALUES ({}, {}, %(pc)s,
|
||||
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326),
|
||||
expand_by_meters(ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {}))
|
||||
""").format(pysql.Literal(self.country),
|
||||
pysql.Literal(_extent_to_rank(self.extent)),
|
||||
pysql.Literal(self.extent)),
|
||||
to_add)
|
||||
if to_delete:
|
||||
cur.execute("""DELETE FROM location_postcode
|
||||
cur.execute("""DELETE FROM location_postcodes
|
||||
WHERE country_code = %s and postcode = any(%s)
|
||||
AND osm_id is null
|
||||
""", (self.country, to_delete))
|
||||
if to_update:
|
||||
cur.executemany(
|
||||
pysql.SQL("""UPDATE location_postcode
|
||||
SET indexed_status = 2,
|
||||
geometry = ST_SetSRID(ST_Point(%s, %s), 4326)
|
||||
WHERE country_code = {} and postcode = %s
|
||||
""").format(pysql.Literal(self.country)),
|
||||
to_update)
|
||||
|
||||
def _compute_changes(
|
||||
self, conn: Connection
|
||||
) -> Tuple[List[Tuple[str, float, float]], List[str], List[Tuple[float, float, str]]]:
|
||||
""" Compute which postcodes from the collected postcodes have to be
|
||||
added or modified and which from the location_postcode table
|
||||
have to be deleted.
|
||||
"""
|
||||
to_update = []
|
||||
to_delete = []
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT postcode, ST_X(geometry), ST_Y(geometry)
|
||||
FROM location_postcode
|
||||
WHERE country_code = %s""",
|
||||
(self.country, ))
|
||||
for postcode, x, y in cur:
|
||||
pcobj = self.collected.pop(postcode, None)
|
||||
if pcobj:
|
||||
newx, newy = pcobj.centroid()
|
||||
if abs(x - newx) > 0.0000001 or abs(y - newy) > 0.0000001:
|
||||
to_update.append((newx, newy, postcode))
|
||||
else:
|
||||
to_delete.append(postcode)
|
||||
|
||||
to_add = [(k, *v.centroid()) for k, v in self.collected.items()]
|
||||
self.collected = defaultdict(PointsCentroid)
|
||||
|
||||
return to_add, to_delete, to_update
|
||||
|
||||
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
|
||||
""" Look for an external postcode file for the active country in
|
||||
@@ -152,7 +141,7 @@ class _PostcodeCollector:
|
||||
_to_float(row['lat'], 90))
|
||||
self.collected[postcode] += centroid
|
||||
except ValueError:
|
||||
LOG.warning("Bad coordinates %s, %s in %s country postcode file.",
|
||||
LOG.warning("Bad coordinates %s, %s in '%s' country postcode file.",
|
||||
row['lat'], row['lon'], self.country)
|
||||
|
||||
finally:
|
||||
@@ -175,63 +164,158 @@ class _PostcodeCollector:
|
||||
|
||||
|
||||
def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractTokenizer) -> None:
|
||||
""" Update the table of artificial postcodes.
|
||||
|
||||
Computes artificial postcode centroids from the placex table,
|
||||
potentially enhances it with external data and then updates the
|
||||
postcodes in the table 'location_postcode'.
|
||||
""" Update the table of postcodes from the input tables
|
||||
placex and place_postcode.
|
||||
"""
|
||||
matcher = PostcodeFormatter()
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
with connect(dsn) as conn:
|
||||
# First get the list of countries that currently have postcodes.
|
||||
# (Doing this before starting to insert, so it is fast on import.)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT DISTINCT country_code FROM location_postcode")
|
||||
todo_countries = set((row[0] for row in cur))
|
||||
|
||||
# Recompute the list of valid postcodes from placex.
|
||||
with conn.cursor(name="placex_postcodes") as cur:
|
||||
cur.execute("""
|
||||
SELECT cc, pc, ST_X(centroid), ST_Y(centroid)
|
||||
FROM (SELECT
|
||||
COALESCE(plx.country_code,
|
||||
get_country_code(ST_Centroid(pl.geometry))) as cc,
|
||||
pl.address->'postcode' as pc,
|
||||
COALESCE(plx.centroid, ST_Centroid(pl.geometry)) as centroid
|
||||
FROM place AS pl LEFT OUTER JOIN placex AS plx
|
||||
ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
|
||||
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null) xx
|
||||
WHERE pc IS NOT null AND cc IS NOT null
|
||||
ORDER BY cc, pc""")
|
||||
|
||||
collector = None
|
||||
|
||||
for country, postcode, x, y in cur:
|
||||
if collector is None or country != collector.country:
|
||||
if collector is not None:
|
||||
collector.commit(conn, analyzer, project_dir)
|
||||
collector = _PostcodeCollector(country, matcher.get_matcher(country))
|
||||
todo_countries.discard(country)
|
||||
collector.add(postcode, x, y)
|
||||
|
||||
if collector is not None:
|
||||
collector.commit(conn, analyzer, project_dir)
|
||||
|
||||
# Now handle any countries that are only in the postcode table.
|
||||
for country in todo_countries:
|
||||
fmt = matcher.get_matcher(country)
|
||||
_PostcodeCollector(country, fmt).commit(conn, analyzer, project_dir)
|
||||
|
||||
# Backfill country_code column where required
|
||||
conn.execute("""UPDATE place_postcode
|
||||
SET country_code = get_country_code(centroid)
|
||||
WHERE country_code is null
|
||||
""")
|
||||
# Now update first postcode areas
|
||||
_update_postcode_areas(conn, analyzer, matcher)
|
||||
# Then fill with estimated postcode centroids from other info
|
||||
_update_guessed_postcode(conn, analyzer, matcher, project_dir)
|
||||
conn.commit()
|
||||
|
||||
analyzer.update_postcodes_from_db()
|
||||
|
||||
|
||||
def can_compute(dsn: str) -> bool:
|
||||
def _insert_postcode_areas(conn: Connection, country_code: str,
|
||||
extent: int, pcs: list[dict[str, str]]) -> None:
|
||||
if pcs:
|
||||
with conn.cursor() as cur:
|
||||
cur.executemany(
|
||||
pysql.SQL(
|
||||
""" INSERT INTO location_postcodes
|
||||
(osm_id, country_code, rank_search, postcode, centroid, geometry)
|
||||
SELECT osm_id, country_code, {}, %(out)s, centroid, geometry
|
||||
FROM place_postcode
|
||||
WHERE osm_type = 'R'
|
||||
and country_code = {} and postcode = %(in)s
|
||||
and geometry is not null
|
||||
""").format(pysql.Literal(_extent_to_rank(extent)),
|
||||
pysql.Literal(country_code)),
|
||||
pcs)
|
||||
|
||||
|
||||
def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
matcher: PostcodeFormatter) -> None:
|
||||
""" Update the postcode areas made from postcode boundaries.
|
||||
"""
|
||||
Check that the place table exists so that
|
||||
postcodes can be computed.
|
||||
# first delete all areas that have gone
|
||||
conn.execute(""" DELETE FROM location_postcodes pc
|
||||
WHERE pc.osm_id is not null
|
||||
AND NOT EXISTS(
|
||||
SELECT * FROM place_postcode pp
|
||||
WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id
|
||||
and geometry is not null)
|
||||
""")
|
||||
# now insert all in country batches, triggers will ensure proper updates
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(""" SELECT country_code, postcode FROM place_postcode
|
||||
WHERE geometry is not null and osm_type = 'R'
|
||||
ORDER BY country_code
|
||||
""")
|
||||
country_code = None
|
||||
fmt = None
|
||||
pcs = []
|
||||
for cc, postcode in cur:
|
||||
if country_code is None:
|
||||
country_code = cc
|
||||
fmt = matcher.get_matcher(country_code)
|
||||
elif country_code != cc:
|
||||
_insert_postcode_areas(conn, country_code,
|
||||
matcher.get_postcode_extent(country_code), pcs)
|
||||
country_code = cc
|
||||
fmt = matcher.get_matcher(country_code)
|
||||
pcs = []
|
||||
assert fmt is not None
|
||||
|
||||
if (m := fmt.match(postcode)):
|
||||
pcs.append({'out': fmt.normalize(m), 'in': postcode})
|
||||
|
||||
if country_code is not None and pcs:
|
||||
_insert_postcode_areas(conn, country_code,
|
||||
matcher.get_postcode_extent(country_code), pcs)
|
||||
|
||||
|
||||
def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
|
||||
matcher: PostcodeFormatter, project_dir: Optional[Path]) -> None:
|
||||
""" Computes artificial postcode centroids from the placex table,
|
||||
potentially enhances it with external data and then updates the
|
||||
postcodes in the table 'location_postcodes'.
|
||||
"""
|
||||
# First get the list of countries that currently have postcodes.
|
||||
# (Doing this before starting to insert, so it is fast on import.)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes
|
||||
WHERE osm_id is null""")
|
||||
todo_countries = {row[0] for row in cur}
|
||||
|
||||
# Next, get the list of postcodes that are already covered by areas.
|
||||
area_pcs = defaultdict(set)
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT country_code, postcode
|
||||
FROM location_postcodes WHERE osm_id is not null
|
||||
ORDER BY country_code""")
|
||||
for cc, pc in cur:
|
||||
area_pcs[cc].add(pc)
|
||||
|
||||
# Create a temporary table which contains coverage of the postcode areas.
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("DROP TABLE IF EXISTS _global_postcode_area")
|
||||
cur.execute("""CREATE TABLE _global_postcode_area AS
|
||||
(SELECT ST_SubDivide(ST_Union(geometry)) as geometry
|
||||
FROM place_postcode WHERE geometry is not null)
|
||||
""")
|
||||
cur.execute("CREATE INDEX ON _global_postcode_area USING gist(geometry)")
|
||||
# Recompute the list of valid postcodes from placex.
|
||||
with conn.cursor(name="placex_postcodes") as cur:
|
||||
cur.execute("""
|
||||
SELECT country_code, postcode, ST_X(centroid), ST_Y(centroid)
|
||||
FROM (
|
||||
(SELECT country_code, address->'postcode' as postcode, centroid
|
||||
FROM placex WHERE address ? 'postcode')
|
||||
UNION
|
||||
(SELECT country_code, postcode, centroid
|
||||
FROM place_postcode WHERE geometry is null)
|
||||
) x
|
||||
WHERE not postcode like '%,%' and not postcode like '%;%'
|
||||
AND NOT EXISTS(SELECT * FROM _global_postcode_area g
|
||||
WHERE ST_Intersects(x.centroid, g.geometry))
|
||||
ORDER BY country_code""")
|
||||
|
||||
collector = None
|
||||
|
||||
for country, postcode, x, y in cur:
|
||||
if collector is None or country != collector.country:
|
||||
if collector is not None:
|
||||
collector.commit(conn, analyzer, project_dir)
|
||||
collector = _PostcodeCollector(country, matcher.get_matcher(country),
|
||||
matcher.get_postcode_extent(country),
|
||||
exclude=area_pcs[country])
|
||||
todo_countries.discard(country)
|
||||
collector.add(postcode, x, y)
|
||||
|
||||
if collector is not None:
|
||||
collector.commit(conn, analyzer, project_dir)
|
||||
|
||||
# Now handle any countries that are only in the postcode table.
|
||||
for country in todo_countries:
|
||||
fmt = matcher.get_matcher(country)
|
||||
ext = matcher.get_postcode_extent(country)
|
||||
_PostcodeCollector(country, fmt, ext,
|
||||
exclude=area_pcs[country]).commit(conn, analyzer, project_dir)
|
||||
|
||||
conn.execute("DROP TABLE IF EXISTS _global_postcode_area")
|
||||
|
||||
|
||||
def can_compute(dsn: str) -> bool:
|
||||
""" Check that the necessary tables exist so that postcodes can be computed.
|
||||
"""
|
||||
with connect(dsn) as conn:
|
||||
return table_exists(conn, 'place')
|
||||
return table_exists(conn, 'place_postcode')
|
||||
|
||||
Reference in New Issue
Block a user