mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Enhanced the implementation of OSM views GeoTIFF import functionality
This commit is contained in:
committed by
Sarah Hoffmann
parent
c85b74497b
commit
ac467c7a2d
@@ -78,11 +78,28 @@ This data is available as a binary download. Put it into your project directory:
|
|||||||
|
|
||||||
The file is about 400MB and adds around 4GB to the Nominatim database.
|
The file is about 400MB and adds around 4GB to the Nominatim database.
|
||||||
|
|
||||||
|
### OSM views
|
||||||
|
OSM publishes aggregate map access numbers that are generated based on the users’
|
||||||
|
behavior when viewing locations on the map. This data is also optional and
|
||||||
|
it complements wikipedia/wikidata rankings to further enhance the search results
|
||||||
|
if added.
|
||||||
|
OSM views data is avalaible as a GeoTIFF file. Put it into your project directory:
|
||||||
|
|
||||||
|
cd $PROJECT_DIR
|
||||||
|
wget https://qrank.wmcloud.org/download/osmviews.tiff
|
||||||
|
|
||||||
|
The file is about 380MB and adds around 4GB to the Nominatim database. Importing
|
||||||
|
OSM views into Nominatim takes a little over 3 hours.
|
||||||
|
|
||||||
|
!!! warning
|
||||||
|
Importing OSM views is currently an experimental feature. OSM views data are
|
||||||
|
not yet included in the importance values calculations.
|
||||||
|
|
||||||
!!! tip
|
!!! tip
|
||||||
If you forgot to download the wikipedia rankings, you can also add
|
If you forgot to download the wikipedia rankings or OSM views, then you can
|
||||||
importances after the import. Download the files, then run
|
also add importances after the import. To add both, download their files, then
|
||||||
`nominatim refresh --wiki-data --importance`. Updating importances for
|
run `nominatim refresh --wiki-data --osm-views --importance`. Updating
|
||||||
a planet can take a couple of hours.
|
importances for a planet will take a couple of hours.
|
||||||
|
|
||||||
### External postcodes
|
### External postcodes
|
||||||
|
|
||||||
@@ -139,7 +156,7 @@ import. So this option is particularly interesting if you plan to transfer the
|
|||||||
database or reuse the space later.
|
database or reuse the space later.
|
||||||
|
|
||||||
!!! warning
|
!!! warning
|
||||||
The datastructure for updates are also required when adding additional data
|
The data structure for updates are also required when adding additional data
|
||||||
after the import, for example [TIGER housenumber data](../customize/Tiger.md).
|
after the import, for example [TIGER housenumber data](../customize/Tiger.md).
|
||||||
If you plan to use those, you must not use the `--no-updates` parameter.
|
If you plan to use those, you must not use the `--no-updates` parameter.
|
||||||
Do a normal import, add the external data and once you are done with
|
Do a normal import, add the external data and once you are done with
|
||||||
|
|||||||
@@ -115,6 +115,7 @@ class NominatimArgs:
|
|||||||
address_levels: bool
|
address_levels: bool
|
||||||
functions: bool
|
functions: bool
|
||||||
wiki_data: bool
|
wiki_data: bool
|
||||||
|
osm_views: bool
|
||||||
importance: bool
|
importance: bool
|
||||||
website: bool
|
website: bool
|
||||||
diffs: bool
|
diffs: bool
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ class UpdateRefresh:
|
|||||||
help='Enable debug warning statements in functions')
|
help='Enable debug warning statements in functions')
|
||||||
|
|
||||||
|
|
||||||
def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches
|
def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
|
||||||
from ..tools import refresh, postcodes
|
from ..tools import refresh, postcodes
|
||||||
from ..indexer.indexer import Indexer
|
from ..indexer.indexer import Indexer
|
||||||
|
|
||||||
@@ -134,13 +134,15 @@ class UpdateRefresh:
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
if args.osm_views:
|
if args.osm_views:
|
||||||
data_path = Path(args.config.OSM_VIEWS_DATA_PATH
|
data_path = Path(args.project_dir)
|
||||||
or args.project_dir)
|
|
||||||
LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
|
LOG.warning('Import OSM views GeoTIFF data from %s', data_path)
|
||||||
if refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(),
|
with connect(args.config.get_libpq_dsn()) as conn:
|
||||||
data_path) > 0:
|
if refresh.import_osm_views_geotiff(conn, data_path) == 1:
|
||||||
LOG.fatal('FATAL: OSM views GeoTIFF file not found')
|
LOG.fatal('FATAL: OSM views GeoTIFF file not found')
|
||||||
return 1
|
return 1
|
||||||
|
if refresh.import_osm_views_geotiff(conn, data_path) == 2:
|
||||||
|
LOG.fatal('FATAL: PostGIS version number is less than 3')
|
||||||
|
return 1
|
||||||
|
|
||||||
# Attention: importance MUST come after wiki data import.
|
# Attention: importance MUST come after wiki data import.
|
||||||
if args.importance:
|
if args.importance:
|
||||||
|
|||||||
@@ -105,14 +105,17 @@ class SetupAll:
|
|||||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||||
data_path) > 0:
|
data_path) > 0:
|
||||||
LOG.error('Wikipedia importance dump file not found. '
|
LOG.error('Wikipedia importance dump file not found. '
|
||||||
'Calculating importance values of locations will not use Wikipedia importance data.')
|
'Calculating importance values of locations will not \
|
||||||
|
use Wikipedia importance data.')
|
||||||
|
|
||||||
LOG.warning('Importing OSM views GeoTIFF data')
|
LOG.warning('Importing OSM views GeoTIFF data')
|
||||||
database_import.import_osm_views_geotiff()
|
data_path = Path(args.project_dir)
|
||||||
data_path = Path(args.config.OSM_VIEWS_DATA_PATH or args.project_dir)
|
with connect(args.config.get_libpq_dsn()) as conn:
|
||||||
if refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(),
|
if refresh.import_osm_views_geotiff(conn, data_path) == 1:
|
||||||
data_path) > 0:
|
LOG.error('OSM views GeoTIFF file not found. '
|
||||||
LOG.error('OSM views GeoTIFF file not found. '
|
'Calculating importance values of locations will not use OSM views data.')
|
||||||
|
elif refresh.import_osm_views_geotiff(conn, data_path) == 2:
|
||||||
|
LOG.error('PostGIS version number is less than 3. '
|
||||||
'Calculating importance values of locations will not use OSM views data.')
|
'Calculating importance values of locations will not use OSM views data.')
|
||||||
|
|
||||||
if args.continue_at is None or args.continue_at == 'load-data':
|
if args.continue_at is None or args.continue_at == 'load-data':
|
||||||
|
|||||||
@@ -75,7 +75,11 @@ def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
|
|||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
|
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
|
||||||
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
|
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
|
||||||
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
|
|
||||||
|
postgis_version = conn.postgis_version_tuple()
|
||||||
|
if postgis_version[0] >= 3:
|
||||||
|
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
_require_version('PostGIS',
|
_require_version('PostGIS',
|
||||||
@@ -247,8 +251,3 @@ def create_search_indices(conn: Connection, config: Configuration,
|
|||||||
|
|
||||||
sql.run_parallel_sql_file(config.get_libpq_dsn(),
|
sql.run_parallel_sql_file(config.get_libpq_dsn(),
|
||||||
'indices.sql', min(8, threads), drop=drop)
|
'indices.sql', min(8, threads), drop=drop)
|
||||||
|
|
||||||
|
|
||||||
def import_osm_views_geotiff():
|
|
||||||
"""Import OSM views GeoTIFF file"""
|
|
||||||
subprocess.run("raster2pgsql -s 4326 -I -C -t 100x100 -e osmviews.tiff public.osmviews | psql nominatim", shell=True, check=True)
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ Functions for bringing auxiliary data in the database up-to-date.
|
|||||||
"""
|
"""
|
||||||
from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
|
from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast
|
||||||
import logging
|
import logging
|
||||||
|
import subprocess
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
@@ -146,10 +147,10 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F
|
|||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
def import_osm_views_geotiff(dsn, data_path, ignore_errors=False):
|
def import_osm_views_geotiff(conn: Connection, data_path: Path) -> int:
|
||||||
""" Replaces the OSM views table with new data.
|
""" Replaces the OSM views table with new data.
|
||||||
|
|
||||||
Returns 0 if all was well and 1 if the GeoTIFF file could not
|
Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not
|
||||||
be found. Throws an exception if there was an error reading the file.
|
be found. Throws an exception if there was an error reading the file.
|
||||||
"""
|
"""
|
||||||
datafile = data_path / 'osmviews.tiff'
|
datafile = data_path / 'osmviews.tiff'
|
||||||
@@ -157,12 +158,17 @@ def import_osm_views_geotiff(dsn, data_path, ignore_errors=False):
|
|||||||
if not datafile.exists():
|
if not datafile.exists():
|
||||||
return 1
|
return 1
|
||||||
|
|
||||||
pre_code = """BEGIN;
|
postgis_version = conn.postgis_version_tuple()
|
||||||
DROP TABLE IF EXISTS "osmviews";
|
if postgis_version[0] < 3:
|
||||||
"""
|
return 2
|
||||||
post_code = "COMMIT"
|
|
||||||
execute_file(dsn, datafile, ignore_errors=ignore_errors,
|
with conn.cursor() as cur:
|
||||||
pre_code=pre_code, post_code=post_code)
|
cur.execute('DROP TABLE IF EXISTS "osm_views"')
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
cmd = f"raster2pgsql -s 4326 -I -C -t 100x100 {datafile} \
|
||||||
|
public.osm_views | psql nominatim > /dev/null"
|
||||||
|
subprocess.run(["/bin/bash", "-c" , cmd], check=True)
|
||||||
|
|
||||||
return 0
|
return 0
|
||||||
|
|
||||||
|
|||||||
@@ -86,10 +86,6 @@ NOMINATIM_TIGER_DATA_PATH=
|
|||||||
# When unset, the data is expected to be located in the project directory.
|
# When unset, the data is expected to be located in the project directory.
|
||||||
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
||||||
|
|
||||||
# Directory where to find OSM views GeoTIFF file.
|
|
||||||
# When unset, the data is expected to be located in the project directory.
|
|
||||||
NOMINATIM_OSM_VIEWS_DATA_PATH=
|
|
||||||
|
|
||||||
# Configuration file for special phrase import.
|
# Configuration file for special phrase import.
|
||||||
# OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
|
# OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
|
||||||
# a custom phrase-settings.json into your project directory.
|
# a custom phrase-settings.json into your project directory.
|
||||||
|
|||||||
@@ -69,7 +69,7 @@ class TestCliImportWithDb:
|
|||||||
assert cf_mock.called > 1
|
assert cf_mock.called > 1
|
||||||
|
|
||||||
for mock in mocks:
|
for mock in mocks:
|
||||||
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
|
assert mock.called > 0, "Mock '{}' not called".format(mock.func_name)
|
||||||
|
|
||||||
|
|
||||||
def test_import_continue_load_data(self, mock_func_factory):
|
def test_import_continue_load_data(self, mock_func_factory):
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ class TestRefresh:
|
|||||||
@pytest.mark.parametrize("command,func", [
|
@pytest.mark.parametrize("command,func", [
|
||||||
('address-levels', 'load_address_levels_from_config'),
|
('address-levels', 'load_address_levels_from_config'),
|
||||||
('wiki-data', 'import_wikipedia_articles'),
|
('wiki-data', 'import_wikipedia_articles'),
|
||||||
('osm-views', 'import_osm_views_geotiff')
|
('osm-views', 'import_osm_views_geotiff'),
|
||||||
('importance', 'recompute_importance'),
|
('importance', 'recompute_importance'),
|
||||||
('website', 'setup_website'),
|
('website', 'setup_website'),
|
||||||
])
|
])
|
||||||
@@ -32,7 +32,7 @@ class TestRefresh:
|
|||||||
func_mock = mock_func_factory(nominatim.tools.refresh, func)
|
func_mock = mock_func_factory(nominatim.tools.refresh, func)
|
||||||
|
|
||||||
assert self.call_nominatim('refresh', '--' + command) == 0
|
assert self.call_nominatim('refresh', '--' + command) == 0
|
||||||
assert func_mock.called == 1
|
assert func_mock.called > 0
|
||||||
|
|
||||||
|
|
||||||
def test_refresh_word_count(self):
|
def test_refresh_word_count(self):
|
||||||
@@ -72,21 +72,17 @@ class TestRefresh:
|
|||||||
|
|
||||||
assert self.call_nominatim('refresh', '--wiki-data') == 1
|
assert self.call_nominatim('refresh', '--wiki-data') == 1
|
||||||
|
|
||||||
def test_refresh_osm_views_geotiff_file_not_found(self, monkeypatch):
|
def test_refresh_osm_views_geotiff_file_not_found(self):
|
||||||
monkeypatch.setenv('NOMINATIM_OSM_VIEWS_DATA_PATH', 'gjoiergjeroi345Q')
|
|
||||||
|
|
||||||
assert self.call_nominatim('refresh', '--osm-views') == 1
|
assert self.call_nominatim('refresh', '--osm-views') == 1
|
||||||
|
|
||||||
def test_refresh_importance_computed_after_wiki_and_osm_views_import(self, monkeypatch):
|
def test_refresh_importance_computed_after_wiki_import(self, monkeypatch):
|
||||||
calls = []
|
calls = []
|
||||||
monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
|
monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles',
|
||||||
lambda *args, **kwargs: calls.append('import') or 0)
|
lambda *args, **kwargs: calls.append('import') or 0)
|
||||||
monkeypatch.setattr(nominatim.tools.refresh, 'import_osm_views_geotiff',
|
|
||||||
lambda *args, **kwargs: calls.append('import') or 0)
|
|
||||||
monkeypatch.setattr(nominatim.tools.refresh, 'recompute_importance',
|
monkeypatch.setattr(nominatim.tools.refresh, 'recompute_importance',
|
||||||
lambda *args, **kwargs: calls.append('update'))
|
lambda *args, **kwargs: calls.append('update'))
|
||||||
|
|
||||||
assert self.call_nominatim('refresh', '--importance', '--wiki-data', '--osm-views') == 0
|
assert self.call_nominatim('refresh', '--importance', '--wiki-data') == 0
|
||||||
|
|
||||||
assert calls == ['import', 'update']
|
assert calls == ['import', 'update']
|
||||||
|
|
||||||
|
|||||||
@@ -21,7 +21,6 @@ NOMINATIM_DROP_TABLES = [
|
|||||||
'address_levels',
|
'address_levels',
|
||||||
'location_area', 'location_area_country', 'location_area_large_100',
|
'location_area', 'location_area_country', 'location_area_large_100',
|
||||||
'location_road_1',
|
'location_road_1',
|
||||||
'osmviews'
|
|
||||||
'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways',
|
'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways',
|
||||||
'search_name_111',
|
'search_name_111',
|
||||||
'wikipedia_article', 'wikipedia_redirect'
|
'wikipedia_article', 'wikipedia_redirect'
|
||||||
|
|||||||
@@ -34,17 +34,6 @@ def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, r
|
|||||||
assert temp_db_cursor.table_rows('wikipedia_redirect') > 0
|
assert temp_db_cursor.table_rows('wikipedia_redirect') > 0
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("replace", (True, False))
|
|
||||||
def test_refresh_import_osm_views_geotiff(dsn, src_dir, table_factory, temp_db_cursor, replace):
|
|
||||||
if replace:
|
|
||||||
table_factory('osmviews')
|
|
||||||
|
|
||||||
# use the small osm views GeoTIFF file for the API testdb
|
|
||||||
assert refresh.import_osm_views_geotiff(dsn, src_dir / 'test' / 'testdb') == 0
|
|
||||||
|
|
||||||
assert temp_db_cursor.table_rows('osmviews') > 0
|
|
||||||
|
|
||||||
|
|
||||||
def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db_cursor):
|
def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db_cursor):
|
||||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
|
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
|
||||||
country_code varchar(2),
|
country_code varchar(2),
|
||||||
|
|||||||
Reference in New Issue
Block a user