mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Integrated OSM views into importance computation
This commit is contained in:
committed by
Sarah Hoffmann
parent
ac467c7a2d
commit
0ab0f0ea44
@@ -98,19 +98,74 @@ $$
|
|||||||
LANGUAGE plpgsql STABLE;
|
LANGUAGE plpgsql STABLE;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION get_osm_views(centroid GEOMETRY)
|
||||||
|
RETURNS BIGINT
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
result BIGINT;
|
||||||
|
BEGIN
|
||||||
|
SELECT ST_Value(osm_views.rast, centroid)
|
||||||
|
FROM osm_views
|
||||||
|
WHERE ST_Intersects(ST_ConvexHull(osm_views.rast), centroid) LIMIT 1 INTO result;
|
||||||
|
|
||||||
|
return COALESCE(result, 0);
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql STABLE;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION normalize_osm_views(views BIGINT)
|
||||||
|
RETURNS FLOAT
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
normalized_osm_views FLOAT;
|
||||||
|
max_views BIGINT;
|
||||||
|
BEGIN
|
||||||
|
IF views > 0 THEN
|
||||||
|
-- Get the highest view count to use it in normalizing the data
|
||||||
|
SELECT max_views_count FROM osm_views_stat INTO max_views;
|
||||||
|
normalized_osm_views := (LOG(views))/(LOG(max_views));
|
||||||
|
ELSE
|
||||||
|
normalized_osm_views := 0.0;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
RETURN normalized_osm_views;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
|
CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
|
||||||
country_code varchar(2),
|
country_code varchar(2),
|
||||||
osm_type varchar(1), osm_id BIGINT)
|
osm_type varchar(1), osm_id BIGINT,
|
||||||
|
centroid GEOMETRY)
|
||||||
RETURNS place_importance
|
RETURNS place_importance
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
match RECORD;
|
match RECORD;
|
||||||
result place_importance;
|
result place_importance;
|
||||||
|
osm_views_exists BIGINT;
|
||||||
|
views BIGINT;
|
||||||
BEGIN
|
BEGIN
|
||||||
|
-- check if osm_views table exists
|
||||||
|
SELECT COUNT(table_name)
|
||||||
|
INTO osm_views_exists
|
||||||
|
FROM information_schema.tables
|
||||||
|
WHERE table_schema LIKE 'public' AND
|
||||||
|
table_type LIKE 'BASE TABLE' AND
|
||||||
|
table_name = 'osm_views';
|
||||||
|
|
||||||
|
-- add importance by OSM views if osm_views table exists
|
||||||
|
IF osm_views_exists THEN
|
||||||
|
views := get_osm_views(centroid);
|
||||||
|
result.importance := normalize_osm_views(views) * 0.35;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
-- add importance by wiki data if the place has one
|
||||||
FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code)
|
FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code)
|
||||||
WHERE language is not NULL
|
WHERE language is not NULL
|
||||||
LOOP
|
LOOP
|
||||||
result.importance := match.importance;
|
result.importance := COALESCE(result.importance, 0) + match.importance * 0.65;
|
||||||
result.wikipedia := match.language || ':' || match.title;
|
result.wikipedia := match.language || ':' || match.title;
|
||||||
RETURN result;
|
RETURN result;
|
||||||
END LOOP;
|
END LOOP;
|
||||||
@@ -119,13 +174,13 @@ BEGIN
|
|||||||
FOR match IN SELECT * FROM wikipedia_article
|
FOR match IN SELECT * FROM wikipedia_article
|
||||||
WHERE wd_page_title = extratags->'wikidata'
|
WHERE wd_page_title = extratags->'wikidata'
|
||||||
ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP
|
ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP
|
||||||
result.importance := match.importance;
|
result.importance := COALESCE(result.importance, 0) + match.importance * 0.65;
|
||||||
result.wikipedia := match.language || ':' || match.title;
|
result.wikipedia := match.language || ':' || match.title;
|
||||||
RETURN result;
|
RETURN result;
|
||||||
END LOOP;
|
END LOOP;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
RETURN null;
|
RETURN result;
|
||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|||||||
@@ -965,7 +965,7 @@ BEGIN
|
|||||||
|
|
||||||
NEW.importance := null;
|
NEW.importance := null;
|
||||||
SELECT wikipedia, importance
|
SELECT wikipedia, importance
|
||||||
FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id)
|
FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id, NEW.centroid)
|
||||||
INTO NEW.wikipedia,NEW.importance;
|
INTO NEW.wikipedia,NEW.importance;
|
||||||
|
|
||||||
{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
|
{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
|
||||||
@@ -1088,7 +1088,7 @@ BEGIN
|
|||||||
|
|
||||||
SELECT wikipedia, importance
|
SELECT wikipedia, importance
|
||||||
FROM compute_importance(location.extratags, NEW.country_code,
|
FROM compute_importance(location.extratags, NEW.country_code,
|
||||||
'N', location.osm_id)
|
'N', location.osm_id, NEW.centroid)
|
||||||
INTO linked_wikipedia,linked_importance;
|
INTO linked_wikipedia,linked_importance;
|
||||||
|
|
||||||
-- Use the maximum importance if one could be computed from the linked object.
|
-- Use the maximum importance if one could be computed from the linked object.
|
||||||
|
|||||||
@@ -136,13 +136,13 @@ class UpdateRefresh:
|
|||||||
if args.osm_views:
|
if args.osm_views:
|
||||||
data_path = Path(args.project_dir)
|
data_path = Path(args.project_dir)
|
||||||
LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
|
LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
|
||||||
with connect(args.config.get_libpq_dsn()) as conn:
|
num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
|
||||||
if refresh.import_osm_views_geotiff(conn, data_path) == 1:
|
if num == 1:
|
||||||
LOG.fatal('FATAL: OSM views GeoTIFF file not found')
|
LOG.fatal('FATAL: OSM views GeoTIFF file not found')
|
||||||
return 1
|
return 1
|
||||||
if refresh.import_osm_views_geotiff(conn, data_path) == 2:
|
if num == 2:
|
||||||
LOG.fatal('FATAL: PostGIS version number is less than 3')
|
LOG.fatal('FATAL: PostGIS version number is less than 3')
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
# Attention: importance MUST come after wiki data import.
|
# Attention: importance MUST come after wiki data import.
|
||||||
if args.importance:
|
if args.importance:
|
||||||
|
|||||||
@@ -105,18 +105,18 @@ class SetupAll:
|
|||||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||||
data_path) > 0:
|
data_path) > 0:
|
||||||
LOG.error('Wikipedia importance dump file not found. '
|
LOG.error('Wikipedia importance dump file not found. '
|
||||||
'Calculating importance values of locations will not \
|
'Calculating importance values of locations will not '
|
||||||
use Wikipedia importance data.')
|
'use Wikipedia importance data.')
|
||||||
|
|
||||||
LOG.warning('Importing OSM views GeoTIFF data')
|
LOG.warning('Importing OSM views GeoTIFF data')
|
||||||
data_path = Path(args.project_dir)
|
data_path = Path(args.project_dir)
|
||||||
with connect(args.config.get_libpq_dsn()) as conn:
|
num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
|
||||||
if refresh.import_osm_views_geotiff(conn, data_path) == 1:
|
if num == 1:
|
||||||
LOG.error('OSM views GeoTIFF file not found. '
|
LOG.error('OSM views GeoTIFF file not found. '
|
||||||
'Calculating importance values of locations will not use OSM views data.')
|
'Calculating importance values of locations will not use OSM views data.')
|
||||||
elif refresh.import_osm_views_geotiff(conn, data_path) == 2:
|
elif num == 2:
|
||||||
LOG.error('PostGIS version number is less than 3. '
|
LOG.error('PostGIS version number is less than 3. '
|
||||||
'Calculating importance values of locations will not use OSM views data.')
|
'Calculating importance values of locations will not use OSM views data.')
|
||||||
|
|
||||||
if args.continue_at is None or args.continue_at == 'load-data':
|
if args.continue_at is None or args.continue_at == 'load-data':
|
||||||
LOG.warning('Initialise tables')
|
LOG.warning('Initialise tables')
|
||||||
|
|||||||
@@ -16,7 +16,7 @@ from pathlib import Path
|
|||||||
from psycopg2 import sql as pysql
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
from nominatim.config import Configuration
|
from nominatim.config import Configuration
|
||||||
from nominatim.db.connection import Connection
|
from nominatim.db.connection import Connection, connect
|
||||||
from nominatim.db.utils import execute_file
|
from nominatim.db.utils import execute_file
|
||||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||||
from nominatim.version import version_str
|
from nominatim.version import version_str
|
||||||
@@ -147,28 +147,48 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F
|
|||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def import_osm_views_geotiff(conn: Connection, data_path: Path) -> int:
|
def import_osm_views_geotiff(dsn: str, data_path: Path) -> int:
|
||||||
""" Replaces the OSM views table with new data.
|
""" Replaces the OSM views table with new data.
|
||||||
|
|
||||||
Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not
|
Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not
|
||||||
be found. Throws an exception if there was an error reading the file.
|
be found. Throws an exception if there was an error reading the file.
|
||||||
"""
|
"""
|
||||||
datafile = data_path / 'osmviews.tiff'
|
datafile = data_path / 'osmviews.tiff'
|
||||||
|
|
||||||
if not datafile.exists():
|
if not datafile.exists():
|
||||||
return 1
|
return 1
|
||||||
|
with connect(dsn) as conn:
|
||||||
|
|
||||||
postgis_version = conn.postgis_version_tuple()
|
postgis_version = conn.postgis_version_tuple()
|
||||||
if postgis_version[0] < 3:
|
if postgis_version[0] < 3:
|
||||||
return 2
|
return 2
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute('DROP TABLE IF EXISTS "osm_views"')
|
cur.drop_table("osm_views")
|
||||||
conn.commit()
|
cur.drop_table("osm_views_stat")
|
||||||
|
|
||||||
cmd = f"raster2pgsql -s 4326 -I -C -t 100x100 {datafile} \
|
# -ovr: 6 -> zoom 12, 5 -> zoom 13, 4 -> zoom 14, 3 -> zoom 15
|
||||||
public.osm_views | psql nominatim > /dev/null"
|
reproject_geotiff = f"gdalwarp -q -multi -ovr 3 -overwrite \
|
||||||
subprocess.run(["/bin/bash", "-c" , cmd], check=True)
|
-co COMPRESS=LZW -tr 0.01 0.01 -t_srs EPSG:4326 {datafile} raster2import.tiff"
|
||||||
|
subprocess.run(["/bin/bash", "-c" , reproject_geotiff], check=True)
|
||||||
|
|
||||||
|
tile_size = 256
|
||||||
|
import_geotiff = f"raster2pgsql -I -C -Y -t {tile_size}x{tile_size} raster2import.tiff \
|
||||||
|
public.osm_views | psql {dsn} > /dev/null"
|
||||||
|
subprocess.run(["/bin/bash", "-c" , import_geotiff], check=True)
|
||||||
|
|
||||||
|
cleanup = "rm raster2import.tiff"
|
||||||
|
subprocess.run(["/bin/bash", "-c" , cleanup], check=True)
|
||||||
|
|
||||||
|
# To normalize osm views data, the max view value is needed
|
||||||
|
cur.execute(f"""
|
||||||
|
CREATE TABLE osm_views_stat AS (
|
||||||
|
SELECT MAX(ST_Value(osm_views.rast, 1, x, y)) AS max_views_count
|
||||||
|
FROM osm_views CROSS JOIN
|
||||||
|
generate_series(1, {tile_size}) As x
|
||||||
|
CROSS JOIN generate_series(1, {tile_size}) As y
|
||||||
|
WHERE x <= ST_Width(rast) AND y <= ST_Height(rast));
|
||||||
|
""")
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
@@ -182,7 +202,7 @@ def recompute_importance(conn: Connection) -> None:
|
|||||||
cur.execute("""
|
cur.execute("""
|
||||||
UPDATE placex SET (wikipedia, importance) =
|
UPDATE placex SET (wikipedia, importance) =
|
||||||
(SELECT wikipedia, importance
|
(SELECT wikipedia, importance
|
||||||
FROM compute_importance(extratags, country_code, osm_type, osm_id))
|
FROM compute_importance(extratags, country_code, osm_type, osm_id, centroid))
|
||||||
""")
|
""")
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
|
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ class TestCliImportWithDb:
|
|||||||
assert cf_mock.called > 1
|
assert cf_mock.called > 1
|
||||||
|
|
||||||
for mock in mocks:
|
for mock in mocks:
|
||||||
assert mock.called > 0, "Mock '{}' not called".format(mock.func_name)
|
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
||||||
|
|
||||||
|
|
||||||
def test_import_continue_load_data(self, mock_func_factory):
|
def test_import_continue_load_data(self, mock_func_factory):
|
||||||
|
|||||||
@@ -32,7 +32,7 @@ class TestRefresh:
|
|||||||
func_mock = mock_func_factory(nominatim.tools.refresh, func)
|
func_mock = mock_func_factory(nominatim.tools.refresh, func)
|
||||||
|
|
||||||
assert self.call_nominatim('refresh', '--' + command) == 0
|
assert self.call_nominatim('refresh', '--' + command) == 0
|
||||||
assert func_mock.called > 0
|
assert func_mock.called == 1
|
||||||
|
|
||||||
|
|
||||||
def test_refresh_word_count(self):
|
def test_refresh_word_count(self):
|
||||||
|
|||||||
@@ -38,6 +38,7 @@ def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db
|
|||||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
|
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
|
||||||
country_code varchar(2),
|
country_code varchar(2),
|
||||||
osm_type varchar(1), osm_id BIGINT,
|
osm_type varchar(1), osm_id BIGINT,
|
||||||
|
centroid GEOMETRY,
|
||||||
OUT importance FLOAT,
|
OUT importance FLOAT,
|
||||||
OUT wikipedia TEXT)
|
OUT wikipedia TEXT)
|
||||||
AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")
|
AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")
|
||||||
|
|||||||
Reference in New Issue
Block a user