Improved performance of the postcodes query and some code cleaning

This commit is contained in:
AntoJvlt
2021-06-12 15:46:08 +02:00
parent ddf866c4c7
commit 3676310efe
3 changed files with 5 additions and 8 deletions

View File

@@ -61,7 +61,7 @@ class UpdateRefresh:
args.threads or 1) args.threads or 1)
indexer.index_postcodes() indexer.index_postcodes()
else: else:
LOG.error("The place table doesn\'t exists. " \ LOG.error("The place table doesn\'t exist. " \
"Postcode updates on a frozen database is not possible.") "Postcode updates on a frozen database is not possible.")
if args.word_counts: if args.word_counts:

View File

@@ -165,15 +165,13 @@ def update_postcodes(dsn, project_dir, tokenizer):
with conn.cursor(name="placex_postcodes") as cur: with conn.cursor(name="placex_postcodes") as cur:
cur.execute(""" cur.execute("""
SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid) SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
FROM ( FROM (SELECT
SELECT
COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc, COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc,
token_normalized_postcode(pl.address->'postcode') as pc, token_normalized_postcode(pl.address->'postcode') as pc,
COALESCE(ST_Centroid(ST_Collect(plx.centroid)), ST_Centroid(ST_Collect(ST_Centroid(pl.geometry)))) as centroid ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid
FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null
GROUP BY cc, pc GROUP BY cc, pc) xx
) xx
WHERE pc IS NOT null AND cc IS NOT null WHERE pc IS NOT null AND cc IS NOT null
ORDER BY country_code, pc""") ORDER BY country_code, pc""")

View File

@@ -114,7 +114,6 @@ def test_postcodes_ignore_empty_country(dsn, postcode_table, tmp_path,
insert_implicit_postcode, tokenizer): insert_implicit_postcode, tokenizer):
insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511')) insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511'))
postcodes.update_postcodes(dsn, tmp_path, tokenizer) postcodes.update_postcodes(dsn, tmp_path, tokenizer)
print(postcode_table.row_set)
assert not postcode_table.row_set assert not postcode_table.row_set