Integrated OSM views into importance computation

This commit is contained in:
Tareq Al-Ahdal
2022-08-25 09:45:18 +02:00
committed by Sarah Hoffmann
parent ac467c7a2d
commit 0ab0f0ea44
8 changed files with 113 additions and 37 deletions

View File

@@ -98,19 +98,74 @@ $$
LANGUAGE plpgsql STABLE; LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION get_osm_views(centroid GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
result BIGINT;
BEGIN
SELECT ST_Value(osm_views.rast, centroid)
FROM osm_views
WHERE ST_Intersects(ST_ConvexHull(osm_views.rast), centroid) LIMIT 1 INTO result;
return COALESCE(result, 0);
END;
$$
LANGUAGE plpgsql STABLE;
CREATE OR REPLACE FUNCTION normalize_osm_views(views BIGINT)
RETURNS FLOAT
AS $$
DECLARE
normalized_osm_views FLOAT;
max_views BIGINT;
BEGIN
IF views > 0 THEN
-- Get the highest view count to use it in normalizing the data
SELECT max_views_count FROM osm_views_stat INTO max_views;
normalized_osm_views := (LOG(views))/(LOG(max_views));
ELSE
normalized_osm_views := 0.0;
END IF;
RETURN normalized_osm_views;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE, CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2), country_code varchar(2),
osm_type varchar(1), osm_id BIGINT) osm_type varchar(1), osm_id BIGINT,
centroid GEOMETRY)
RETURNS place_importance RETURNS place_importance
AS $$ AS $$
DECLARE DECLARE
match RECORD; match RECORD;
result place_importance; result place_importance;
osm_views_exists BIGINT;
views BIGINT;
BEGIN BEGIN
-- check if osm_views table exists
SELECT COUNT(table_name)
INTO osm_views_exists
FROM information_schema.tables
WHERE table_schema LIKE 'public' AND
table_type LIKE 'BASE TABLE' AND
table_name = 'osm_views';
-- add importance by OSM views if osm_views table exists
IF osm_views_exists THEN
views := get_osm_views(centroid);
result.importance := normalize_osm_views(views) * 0.35;
END IF;
-- add importance by wiki data if the place has one
FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code) FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code)
WHERE language is not NULL WHERE language is not NULL
LOOP LOOP
result.importance := match.importance; result.importance := COALESCE(result.importance, 0) + match.importance * 0.65;
result.wikipedia := match.language || ':' || match.title; result.wikipedia := match.language || ':' || match.title;
RETURN result; RETURN result;
END LOOP; END LOOP;
@@ -119,13 +174,13 @@ BEGIN
FOR match IN SELECT * FROM wikipedia_article FOR match IN SELECT * FROM wikipedia_article
WHERE wd_page_title = extratags->'wikidata' WHERE wd_page_title = extratags->'wikidata'
ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP
result.importance := match.importance; result.importance := COALESCE(result.importance, 0) + match.importance * 0.65;
result.wikipedia := match.language || ':' || match.title; result.wikipedia := match.language || ':' || match.title;
RETURN result; RETURN result;
END LOOP; END LOOP;
END IF; END IF;
RETURN null; RETURN result;
END; END;
$$ $$
LANGUAGE plpgsql; LANGUAGE plpgsql;

View File

@@ -965,7 +965,7 @@ BEGIN
NEW.importance := null; NEW.importance := null;
SELECT wikipedia, importance SELECT wikipedia, importance
FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id) FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id, NEW.centroid)
INTO NEW.wikipedia,NEW.importance; INTO NEW.wikipedia,NEW.importance;
{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %} {% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
@@ -1088,7 +1088,7 @@ BEGIN
SELECT wikipedia, importance SELECT wikipedia, importance
FROM compute_importance(location.extratags, NEW.country_code, FROM compute_importance(location.extratags, NEW.country_code,
'N', location.osm_id) 'N', location.osm_id, NEW.centroid)
INTO linked_wikipedia,linked_importance; INTO linked_wikipedia,linked_importance;
-- Use the maximum importance if one could be computed from the linked object. -- Use the maximum importance if one could be computed from the linked object.

View File

@@ -136,13 +136,13 @@ class UpdateRefresh:
if args.osm_views: if args.osm_views:
data_path = Path(args.project_dir) data_path = Path(args.project_dir)
LOG.warning('Import OSM views GeoTIFF data from %s', data_path) LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
with connect(args.config.get_libpq_dsn()) as conn: num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
if refresh.import_osm_views_geotiff(conn, data_path) == 1: if num == 1:
LOG.fatal('FATAL: OSM views GeoTIFF file not found') LOG.fatal('FATAL: OSM views GeoTIFF file not found')
return 1 return 1
if refresh.import_osm_views_geotiff(conn, data_path) == 2: if num == 2:
LOG.fatal('FATAL: PostGIS version number is less than 3') LOG.fatal('FATAL: PostGIS version number is less than 3')
return 1 return 1
# Attention: importance MUST come after wiki data import. # Attention: importance MUST come after wiki data import.
if args.importance: if args.importance:

View File

@@ -105,18 +105,18 @@ class SetupAll:
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0: data_path) > 0:
LOG.error('Wikipedia importance dump file not found. ' LOG.error('Wikipedia importance dump file not found. '
'Calculating importance values of locations will not \ 'Calculating importance values of locations will not '
use Wikipedia importance data.') 'use Wikipedia importance data.')
LOG.warning('Importing OSM views GeoTIFF data') LOG.warning('Importing OSM views GeoTIFF data')
data_path = Path(args.project_dir) data_path = Path(args.project_dir)
with connect(args.config.get_libpq_dsn()) as conn: num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
if refresh.import_osm_views_geotiff(conn, data_path) == 1: if num == 1:
LOG.error('OSM views GeoTIFF file not found. ' LOG.error('OSM views GeoTIFF file not found. '
'Calculating importance values of locations will not use OSM views data.') 'Calculating importance values of locations will not use OSM views data.')
elif refresh.import_osm_views_geotiff(conn, data_path) == 2: elif num == 2:
LOG.error('PostGIS version number is less than 3. ' LOG.error('PostGIS version number is less than 3. '
'Calculating importance values of locations will not use OSM views data.') 'Calculating importance values of locations will not use OSM views data.')
if args.continue_at is None or args.continue_at == 'load-data': if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables') LOG.warning('Initialise tables')

View File

@@ -16,7 +16,7 @@ from pathlib import Path
from psycopg2 import sql as pysql from psycopg2 import sql as pysql
from nominatim.config import Configuration from nominatim.config import Configuration
from nominatim.db.connection import Connection from nominatim.db.connection import Connection, connect
from nominatim.db.utils import execute_file from nominatim.db.utils import execute_file
from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.version import version_str from nominatim.version import version_str
@@ -147,28 +147,48 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F
return 0 return 0
def import_osm_views_geotiff(conn: Connection, data_path: Path) -> int: def import_osm_views_geotiff(dsn: str, data_path: Path) -> int:
""" Replaces the OSM views table with new data. """ Replaces the OSM views table with new data.
Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not
be found. Throws an exception if there was an error reading the file. be found. Throws an exception if there was an error reading the file.
""" """
datafile = data_path / 'osmviews.tiff' datafile = data_path / 'osmviews.tiff'
if not datafile.exists(): if not datafile.exists():
return 1 return 1
with connect(dsn) as conn:
postgis_version = conn.postgis_version_tuple() postgis_version = conn.postgis_version_tuple()
if postgis_version[0] < 3: if postgis_version[0] < 3:
return 2 return 2
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute('DROP TABLE IF EXISTS "osm_views"') cur.drop_table("osm_views")
conn.commit() cur.drop_table("osm_views_stat")
cmd = f"raster2pgsql -s 4326 -I -C -t 100x100 {datafile} \ # -ovr: 6 -> zoom 12, 5 -> zoom 13, 4 -> zoom 14, 3 -> zoom 15
public.osm_views | psql nominatim > /dev/null" reproject_geotiff = f"gdalwarp -q -multi -ovr 3 -overwrite \
subprocess.run(["/bin/bash", "-c" , cmd], check=True) -co COMPRESS=LZW -tr 0.01 0.01 -t_srs EPSG:4326 {datafile} raster2import.tiff"
subprocess.run(["/bin/bash", "-c" , reproject_geotiff], check=True)
tile_size = 256
import_geotiff = f"raster2pgsql -I -C -Y -t {tile_size}x{tile_size} raster2import.tiff \
public.osm_views | psql {dsn} > /dev/null"
subprocess.run(["/bin/bash", "-c" , import_geotiff], check=True)
cleanup = "rm raster2import.tiff"
subprocess.run(["/bin/bash", "-c" , cleanup], check=True)
# To normalize osm views data, the max view value is needed
cur.execute(f"""
CREATE TABLE osm_views_stat AS (
SELECT MAX(ST_Value(osm_views.rast, 1, x, y)) AS max_views_count
FROM osm_views CROSS JOIN
generate_series(1, {tile_size}) As x
CROSS JOIN generate_series(1, {tile_size}) As y
WHERE x <= ST_Width(rast) AND y <= ST_Height(rast));
""")
conn.commit()
return 0 return 0
@@ -182,7 +202,7 @@ def recompute_importance(conn: Connection) -> None:
cur.execute(""" cur.execute("""
UPDATE placex SET (wikipedia, importance) = UPDATE placex SET (wikipedia, importance) =
(SELECT wikipedia, importance (SELECT wikipedia, importance
FROM compute_importance(extratags, country_code, osm_type, osm_id)) FROM compute_importance(extratags, country_code, osm_type, osm_id, centroid))
""") """)
cur.execute(""" cur.execute("""
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance

View File

@@ -69,7 +69,7 @@ class TestCliImportWithDb:
assert cf_mock.called > 1 assert cf_mock.called > 1
for mock in mocks: for mock in mocks:
assert mock.called > 0, "Mock '{}' not called".format(mock.func_name) assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
def test_import_continue_load_data(self, mock_func_factory): def test_import_continue_load_data(self, mock_func_factory):

View File

@@ -32,7 +32,7 @@ class TestRefresh:
func_mock = mock_func_factory(nominatim.tools.refresh, func) func_mock = mock_func_factory(nominatim.tools.refresh, func)
assert self.call_nominatim('refresh', '--' + command) == 0 assert self.call_nominatim('refresh', '--' + command) == 0
assert func_mock.called > 0 assert func_mock.called == 1
def test_refresh_word_count(self): def test_refresh_word_count(self):

View File

@@ -38,6 +38,7 @@ def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE, temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2), country_code varchar(2),
osm_type varchar(1), osm_id BIGINT, osm_type varchar(1), osm_id BIGINT,
centroid GEOMETRY,
OUT importance FLOAT, OUT importance FLOAT,
OUT wikipedia TEXT) OUT wikipedia TEXT)
AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""") AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")