load views as a SQL file and rename to 'secondary importance'

The only requirement for secondary importance is that a raster table
comes out of it. The generic name leaves open where the data comes
from.
This commit is contained in:
Sarah Hoffmann
2022-09-27 21:08:15 +02:00
parent 0ab0f0ea44
commit 3185fad918
4 changed files with 29 additions and 58 deletions

View File

@@ -115,7 +115,7 @@ class NominatimArgs:
address_levels: bool address_levels: bool
functions: bool functions: bool
wiki_data: bool wiki_data: bool
osm_views: bool secondary_importance: bool
importance: bool importance: bool
website: bool website: bool
diffs: bool diffs: bool

View File

@@ -63,8 +63,8 @@ class UpdateRefresh:
help='Update the PL/pgSQL functions in the database') help='Update the PL/pgSQL functions in the database')
group.add_argument('--wiki-data', action='store_true', group.add_argument('--wiki-data', action='store_true',
help='Update Wikipedia/data importance numbers') help='Update Wikipedia/data importance numbers')
group.add_argument('--osm-views', action='store_true', group.add_argument('--secondary-importance', action='store_true',
help='Update OSM views/data importance numbers') help='Update secondary importance raster data')
group.add_argument('--importance', action='store_true', group.add_argument('--importance', action='store_true',
help='Recompute place importances (expensive!)') help='Recompute place importances (expensive!)')
group.add_argument('--website', action='store_true', group.add_argument('--website', action='store_true',
@@ -117,6 +117,20 @@ class UpdateRefresh:
with connect(args.config.get_libpq_dsn()) as conn: with connect(args.config.get_libpq_dsn()) as conn:
refresh.load_address_levels_from_config(conn, args.config) refresh.load_address_levels_from_config(conn, args.config)
# Attention: must come BEFORE functions
if args.secondary_importance:
with connect(args.config.get_libpq_dsn()) as conn:
# If the table did not exist before, then the importance code
# needs to be enabled.
if not conn.table_exists('secondary_importance'):
args.functions = True
LOG.warning('Import secondary importance raster data from %s', args.project_dir)
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) > 0:
LOG.fatal('FATAL: Cannot update sendary importance raster data')
return 1
if args.functions: if args.functions:
LOG.warning('Create functions') LOG.warning('Create functions')
with connect(args.config.get_libpq_dsn()) as conn: with connect(args.config.get_libpq_dsn()) as conn:
@@ -133,17 +147,6 @@ class UpdateRefresh:
LOG.fatal('FATAL: Wikipedia importance dump file not found') LOG.fatal('FATAL: Wikipedia importance dump file not found')
return 1 return 1
if args.osm_views:
data_path = Path(args.project_dir)
LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path)
if num == 1:
LOG.fatal('FATAL: OSM views GeoTIFF file not found')
return 1
if num == 2:
LOG.fatal('FATAL: PostGIS version number is less than 3')
return 1
# Attention: importance MUST come after wiki data import. # Attention: importance MUST come after wiki data import.
if args.importance: if args.importance:
LOG.warning('Update importance values for database') LOG.warning('Update importance values for database')

View File

@@ -58,8 +58,6 @@ class SetupAll:
group2.add_argument('--no-updates', action='store_true', group2.add_argument('--no-updates', action='store_true',
help="Do not keep tables that are only needed for " help="Do not keep tables that are only needed for "
"updating the database later") "updating the database later")
group2.add_argument('--osm-views', action='store_true',
help='Import OSM views GeoTIFF')
group2.add_argument('--offline', action='store_true', group2.add_argument('--offline', action='store_true',
help="Do not attempt to load any additional data from the internet") help="Do not attempt to load any additional data from the internet")
group3 = parser.add_argument_group('Expert options') group3 = parser.add_argument_group('Expert options')
@@ -108,15 +106,11 @@ class SetupAll:
'Calculating importance values of locations will not ' 'Calculating importance values of locations will not '
'use Wikipedia importance data.') 'use Wikipedia importance data.')
LOG.warning('Importing OSM views GeoTIFF data') LOG.warning('Importing secondary importance raster data')
data_path = Path(args.project_dir) if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path) args.project_dir) != 0:
if num == 1: LOG.error('Secondary importance file not imported. '
LOG.error('OSM views GeoTIFF file not found. ' 'Falling back to default ranking.')
'Calculating importance values of locations will not use OSM views data.')
elif num == 2:
LOG.error('PostGIS version number is less than 3. '
'Calculating importance values of locations will not use OSM views data.')
if args.continue_at is None or args.continue_at == 'load-data': if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables') LOG.warning('Initialise tables')

View File

@@ -9,7 +9,6 @@ Functions for bringing auxiliary data in the database up-to-date.
""" """
from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
import logging import logging
import subprocess
from textwrap import dedent from textwrap import dedent
from pathlib import Path from pathlib import Path
@@ -147,48 +146,23 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F
return 0 return 0
def import_osm_views_geotiff(dsn: str, data_path: Path) -> int: def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
""" Replaces the OSM views table with new data. """ Replaces the secondary importance raster data table with new data.
Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not Returns 0 if all was well and 1 if the raster SQL file could not
be found. Throws an exception if there was an error reading the file. be found. Throws an exception if there was an error reading the file.
""" """
datafile = data_path / 'osmviews.tiff' datafile = data_path / 'secondary_importance.sql.gz'
if not datafile.exists(): if not datafile.exists():
return 1 return 1
with connect(dsn) as conn:
with connect(dsn) as conn:
postgis_version = conn.postgis_version_tuple() postgis_version = conn.postgis_version_tuple()
if postgis_version[0] < 3: if postgis_version[0] < 3:
LOG.error('PostGIS version is too old for using OSM raster data.')
return 2 return 2
with conn.cursor() as cur: execute_file(dsn, datafile, ignore_errors=ignore_errors)
cur.drop_table("osm_views")
cur.drop_table("osm_views_stat")
# -ovr: 6 -> zoom 12, 5 -> zoom 13, 4 -> zoom 14, 3 -> zoom 15
reproject_geotiff = f"gdalwarp -q -multi -ovr 3 -overwrite \
-co COMPRESS=LZW -tr 0.01 0.01 -t_srs EPSG:4326 {datafile} raster2import.tiff"
subprocess.run(["/bin/bash", "-c" , reproject_geotiff], check=True)
tile_size = 256
import_geotiff = f"raster2pgsql -I -C -Y -t {tile_size}x{tile_size} raster2import.tiff \
public.osm_views | psql {dsn} > /dev/null"
subprocess.run(["/bin/bash", "-c" , import_geotiff], check=True)
cleanup = "rm raster2import.tiff"
subprocess.run(["/bin/bash", "-c" , cleanup], check=True)
# To normalize osm views data, the max view value is needed
cur.execute(f"""
CREATE TABLE osm_views_stat AS (
SELECT MAX(ST_Value(osm_views.rast, 1, x, y)) AS max_views_count
FROM osm_views CROSS JOIN
generate_series(1, {tile_size}) As x
CROSS JOIN generate_series(1, {tile_size}) As y
WHERE x <= ST_Width(rast) AND y <= ST_Height(rast));
""")
conn.commit()
return 0 return 0