port legacy tokenizer to new postcode handling

Also documents the changes to the SQL functions of the tokenizer.
This commit is contained in:
Sarah Hoffmann
2022-06-08 08:19:55 +02:00
parent e86db3001f
commit 37b2c6a830
5 changed files with 16 additions and 17 deletions

View File

@@ -186,17 +186,17 @@ def update_postcodes(dsn, project_dir, tokenizer):
# Recompute the list of valid postcodes from placex.
with conn.cursor(name="placex_postcodes") as cur:
cur.execute("""
SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
SELECT cc, pc, ST_X(centroid), ST_Y(centroid)
FROM (SELECT
COALESCE(plx.country_code,
get_country_code(ST_Centroid(pl.geometry))) as cc,
token_normalized_postcode(pl.address->'postcode') as pc,
pl.address->'postcode' as pc,
COALESCE(plx.centroid, ST_Centroid(pl.geometry)) as centroid
FROM place AS pl LEFT OUTER JOIN placex AS plx
ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null) xx
WHERE pc IS NOT null AND cc IS NOT null
ORDER BY country_code, pc""")
ORDER BY cc, pc""")
collector = None