mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 01:47:57 +00:00
Compare commits
17 Commits
8188689765
...
3.5.x
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
489653b6ed | ||
|
|
bb0c42e638 | ||
|
|
2d226be156 | ||
|
|
61fe274c6e | ||
|
|
0ac99bc2a9 | ||
|
|
76ddace267 | ||
|
|
777c70926a | ||
|
|
b2886426b7 | ||
|
|
a836ca8991 | ||
|
|
30016b98b7 | ||
|
|
0f5fc10e31 | ||
|
|
72335fb631 | ||
|
|
a863392938 | ||
|
|
168c2e222e | ||
|
|
770f8e31a8 | ||
|
|
dd55a76d6d | ||
|
|
670cff0d09 |
@@ -20,7 +20,7 @@ project(nominatim)
|
||||
|
||||
set(NOMINATIM_VERSION_MAJOR 3)
|
||||
set(NOMINATIM_VERSION_MINOR 5)
|
||||
set(NOMINATIM_VERSION_PATCH 0)
|
||||
set(NOMINATIM_VERSION_PATCH 2)
|
||||
|
||||
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
|
||||
|
||||
|
||||
19
ChangeLog
19
ChangeLog
@@ -1,3 +1,22 @@
|
||||
3.5.2
|
||||
|
||||
* ensure that wikipedia tags are imported for all styles
|
||||
* reinstate verbosity for indexing during updates
|
||||
* make house number reappear in display name on named POIs
|
||||
* introduce batch processing in indexer to avoid transaction ID overrun
|
||||
* increase splitting for large geometries to improve indexing speed
|
||||
* remove deprecated get_magic_quotes_gpc() function
|
||||
* make sure that all postcodes have an entry in word and are thus searchable
|
||||
* remove use of ST_Covers in conjunction woth ST_Intersects,
|
||||
causes bad query planning and slow updates in Postgis3
|
||||
* update osm2pgsql
|
||||
|
||||
3.5.1
|
||||
|
||||
* disable jit and parallel processing in PostgreSQL for osm2pgsql
|
||||
* update libosmium to 2.15.6 (fixes an issue with processing hanging
|
||||
on large multipolygons)
|
||||
|
||||
3.5.0
|
||||
|
||||
* structured select on HTML search page
|
||||
|
||||
@@ -2,8 +2,3 @@
|
||||
|
||||
require_once(CONST_BasePath.'/lib/lib.php');
|
||||
require_once(CONST_BasePath.'/lib/DB.php');
|
||||
|
||||
if (get_magic_quotes_gpc()) {
|
||||
echo "Please disable magic quotes in your php.ini configuration\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
0
nominatim/indexer/__init__.py
Normal file
0
nominatim/indexer/__init__.py
Normal file
52
nominatim/indexer/progress.py
Normal file
52
nominatim/indexer/progress.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim.
|
||||
# Copyright (C) 2020 Sarah Hoffmann
|
||||
|
||||
import logging
|
||||
from datetime import datetime
|
||||
|
||||
log = logging.getLogger()
|
||||
|
||||
class ProgressLogger(object):
|
||||
""" Tracks and prints progress for the indexing process.
|
||||
`name` is the name of the indexing step being tracked.
|
||||
`total` sets up the total number of items that need processing.
|
||||
`log_interval` denotes the interval in seconds at which progres
|
||||
should be reported.
|
||||
"""
|
||||
|
||||
def __init__(self, name, total, log_interval=1):
|
||||
self.name = name
|
||||
self.total_places = total
|
||||
self.done_places = 0
|
||||
self.rank_start_time = datetime.now()
|
||||
self.next_info = 100 if log.isEnabledFor(logging.INFO) else total + 1
|
||||
|
||||
def add(self, num=1):
|
||||
""" Mark `num` places as processed. Print a log message if the
|
||||
logging is at least info and the log interval has past.
|
||||
"""
|
||||
self.done_places += num
|
||||
|
||||
if self.done_places >= self.next_info:
|
||||
now = datetime.now()
|
||||
done_time = (now - self.rank_start_time).total_seconds()
|
||||
places_per_sec = self.done_places / done_time
|
||||
eta = (self.total_places - self.done_places)/places_per_sec
|
||||
|
||||
log.info("Done {} in {} @ {:.3f} per second - {} ETA (seconds): {:.2f}"
|
||||
.format(self.done_places, int(done_time),
|
||||
places_per_sec, self.name, eta))
|
||||
|
||||
self.next_info += int(places_per_sec)
|
||||
|
||||
def done(self):
|
||||
""" Print final staticstics about the progress.
|
||||
"""
|
||||
rank_end_time = datetime.now()
|
||||
diff_seconds = (rank_end_time-self.rank_start_time).total_seconds()
|
||||
|
||||
log.warning("Done {}/{} in {} @ {:.3f} per second - FINISHED {}\n".format(
|
||||
self.done_places, self.total_places, int(diff_seconds),
|
||||
self.done_places/diff_seconds, self.name))
|
||||
@@ -32,6 +32,8 @@ import psycopg2
|
||||
from psycopg2.extras import wait_select
|
||||
import select
|
||||
|
||||
from indexer.progress import ProgressLogger
|
||||
|
||||
log = logging.getLogger()
|
||||
|
||||
def make_connection(options, asynchronous=False):
|
||||
@@ -55,24 +57,19 @@ class RankRunner(object):
|
||||
def name(self):
|
||||
return "rank {}".format(self.rank)
|
||||
|
||||
def sql_index_sectors(self):
|
||||
return """SELECT geometry_sector, count(*) FROM placex
|
||||
def sql_count_objects(self):
|
||||
return """SELECT count(*) FROM placex
|
||||
WHERE rank_search = {} and indexed_status > 0
|
||||
GROUP BY geometry_sector
|
||||
ORDER BY geometry_sector""".format(self.rank)
|
||||
""".format(self.rank)
|
||||
|
||||
def sql_nosector_places(self):
|
||||
def sql_get_objects(self):
|
||||
return """SELECT place_id FROM placex
|
||||
WHERE indexed_status > 0 and rank_search = {}
|
||||
ORDER BY geometry_sector""".format(self.rank)
|
||||
|
||||
def sql_sector_places(self):
|
||||
return """SELECT place_id FROM placex
|
||||
WHERE indexed_status > 0 and rank_search = {}
|
||||
and geometry_sector = %s""".format(self.rank)
|
||||
|
||||
def sql_index_place(self):
|
||||
return "UPDATE placex SET indexed_status = 0 WHERE place_id = %s"
|
||||
def sql_index_place(self, ids):
|
||||
return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
|
||||
.format(','.join((str(i) for i in ids)))
|
||||
|
||||
|
||||
class InterpolationRunner(object):
|
||||
@@ -83,25 +80,19 @@ class InterpolationRunner(object):
|
||||
def name(self):
|
||||
return "interpolation lines (location_property_osmline)"
|
||||
|
||||
def sql_index_sectors(self):
|
||||
return """SELECT geometry_sector, count(*) FROM location_property_osmline
|
||||
WHERE indexed_status > 0
|
||||
GROUP BY geometry_sector
|
||||
ORDER BY geometry_sector"""
|
||||
def sql_count_objects(self):
|
||||
return """SELECT count(*) FROM location_property_osmline
|
||||
WHERE indexed_status > 0"""
|
||||
|
||||
def sql_nosector_places(self):
|
||||
def sql_get_objects(self):
|
||||
return """SELECT place_id FROM location_property_osmline
|
||||
WHERE indexed_status > 0
|
||||
ORDER BY geometry_sector"""
|
||||
|
||||
def sql_sector_places(self):
|
||||
return """SELECT place_id FROM location_property_osmline
|
||||
WHERE indexed_status > 0 and geometry_sector = %s
|
||||
ORDER BY geometry_sector"""
|
||||
|
||||
def sql_index_place(self):
|
||||
def sql_index_place(self, ids):
|
||||
return """UPDATE location_property_osmline
|
||||
SET indexed_status = 0 WHERE place_id = %s"""
|
||||
SET indexed_status = 0 WHERE place_id IN ({})"""\
|
||||
.format(','.join((str(i) for i in ids)))
|
||||
|
||||
|
||||
class DBConnection(object):
|
||||
@@ -210,83 +201,48 @@ class Indexer(object):
|
||||
self.index(RankRunner(rank))
|
||||
|
||||
if self.maxrank == 30:
|
||||
self.index(InterpolationRunner())
|
||||
self.index(InterpolationRunner(), 20)
|
||||
|
||||
self.index(RankRunner(self.maxrank))
|
||||
self.index(RankRunner(self.maxrank), 20)
|
||||
|
||||
def index(self, obj):
|
||||
def index(self, obj, batch=1):
|
||||
""" Index a single rank or table. `obj` describes the SQL to use
|
||||
for indexing.
|
||||
for indexing. `batch` describes the number of objects that
|
||||
should be processed with a single SQL statement
|
||||
"""
|
||||
log.warning("Starting {}".format(obj.name()))
|
||||
|
||||
cur = self.conn.cursor(name='main')
|
||||
cur.execute(obj.sql_index_sectors())
|
||||
cur = self.conn.cursor()
|
||||
cur.execute(obj.sql_count_objects())
|
||||
|
||||
total_tuples = 0
|
||||
for r in cur:
|
||||
total_tuples += r[1]
|
||||
log.debug("Total number of rows; {}".format(total_tuples))
|
||||
total_tuples = cur.fetchone()[0]
|
||||
log.debug("Total number of rows: {}".format(total_tuples))
|
||||
|
||||
cur.scroll(0, mode='absolute')
|
||||
cur.close()
|
||||
|
||||
next_thread = self.find_free_thread()
|
||||
done_tuples = 0
|
||||
rank_start_time = datetime.now()
|
||||
progress = ProgressLogger(obj.name(), total_tuples)
|
||||
|
||||
sector_sql = obj.sql_sector_places()
|
||||
index_sql = obj.sql_index_place()
|
||||
min_grouped_tuples = total_tuples - len(self.threads) * 1000
|
||||
cur = self.conn.cursor(name='places')
|
||||
cur.execute(obj.sql_get_objects())
|
||||
|
||||
next_info = 100 if log.isEnabledFor(logging.INFO) else total_tuples + 1
|
||||
|
||||
for r in cur:
|
||||
sector = r[0]
|
||||
|
||||
# Should we do the remaining ones together?
|
||||
do_all = done_tuples > min_grouped_tuples
|
||||
|
||||
pcur = self.conn.cursor(name='places')
|
||||
|
||||
if do_all:
|
||||
pcur.execute(obj.sql_nosector_places())
|
||||
else:
|
||||
pcur.execute(sector_sql, (sector, ))
|
||||
|
||||
for place in pcur:
|
||||
place_id = place[0]
|
||||
log.debug("Processing place {}".format(place_id))
|
||||
thread = next(next_thread)
|
||||
|
||||
thread.perform(index_sql, (place_id,))
|
||||
done_tuples += 1
|
||||
|
||||
if done_tuples >= next_info:
|
||||
now = datetime.now()
|
||||
done_time = (now - rank_start_time).total_seconds()
|
||||
tuples_per_sec = done_tuples / done_time
|
||||
log.info("Done {} in {} @ {:.3f} per second - {} ETA (seconds): {:.2f}"
|
||||
.format(done_tuples, int(done_time),
|
||||
tuples_per_sec, obj.name(),
|
||||
(total_tuples - done_tuples)/tuples_per_sec))
|
||||
next_info += int(tuples_per_sec)
|
||||
|
||||
pcur.close()
|
||||
|
||||
if do_all:
|
||||
while True:
|
||||
places = [p[0] for p in cur.fetchmany(batch)]
|
||||
if len(places) == 0:
|
||||
break
|
||||
|
||||
log.debug("Processing places: {}".format(places))
|
||||
thread = next(next_thread)
|
||||
|
||||
thread.perform(obj.sql_index_place(places))
|
||||
progress.add(len(places))
|
||||
|
||||
cur.close()
|
||||
|
||||
for t in self.threads:
|
||||
t.wait()
|
||||
|
||||
rank_end_time = datetime.now()
|
||||
diff_seconds = (rank_end_time-rank_start_time).total_seconds()
|
||||
|
||||
log.warning("Done {}/{} in {} @ {:.3f} per second - FINISHED {}\n".format(
|
||||
done_tuples, total_tuples, int(diff_seconds),
|
||||
done_tuples/diff_seconds, obj.name()))
|
||||
progress.done()
|
||||
|
||||
def find_free_thread(self):
|
||||
""" Generator that returns the next connection that is free for
|
||||
|
||||
Submodule osm2pgsql updated: cb7655a4ff...b8af2b242f
@@ -5,6 +5,11 @@
|
||||
"no" : "skip"
|
||||
}
|
||||
},
|
||||
{ "keys" : ["wikipedia", "wikipedia:*", "wikidata"],
|
||||
"values" : {
|
||||
"" : "extra"
|
||||
}
|
||||
},
|
||||
{
|
||||
"keys" : ["name:prefix", "name:suffix", "name:botanical", "*wikidata"],
|
||||
"values" : {
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
[
|
||||
{ "keys" : ["wikipedia", "wikipedia:*", "wikidata"],
|
||||
"values" : {
|
||||
"" : "extra"
|
||||
}
|
||||
},
|
||||
{
|
||||
"keys" : ["name:prefix", "name:suffix", "name:botanical", "*wikidata"],
|
||||
"values" : {
|
||||
|
||||
@@ -1,4 +1,9 @@
|
||||
[
|
||||
{ "keys" : ["wikipedia", "wikipedia:*", "wikidata"],
|
||||
"values" : {
|
||||
"" : "extra"
|
||||
}
|
||||
},
|
||||
{
|
||||
"keys" : ["name:prefix", "name:suffix", "name:botanical", "*wikidata"],
|
||||
"values" : {
|
||||
|
||||
@@ -272,7 +272,7 @@ BEGIN
|
||||
END IF;
|
||||
|
||||
IF searchhousenumber IS NOT NULL THEN
|
||||
location := ROW(in_place_id, null, null, hstore('ref', searchhousenumber),
|
||||
location := ROW(null, null, null, hstore('ref', searchhousenumber),
|
||||
'place', 'house_number', null, null, true, true, 28, 0)::addressline;
|
||||
RETURN NEXT location;
|
||||
END IF;
|
||||
|
||||
@@ -81,7 +81,8 @@ BEGIN
|
||||
lookup_word := upper(trim(postcode));
|
||||
lookup_token := ' ' || make_standard_name(lookup_word);
|
||||
SELECT min(word_id) FROM word
|
||||
WHERE word_token = lookup_token and class='place' and type='postcode'
|
||||
WHERE word_token = lookup_token and word = lookup_word
|
||||
and class='place' and type='postcode'
|
||||
INTO return_word_id;
|
||||
IF return_word_id IS NULL THEN
|
||||
return_word_id := nextval('seq_word');
|
||||
|
||||
@@ -162,14 +162,14 @@ BEGIN
|
||||
IF st_area(NEW.geometry) < 0.000000001 AND st_area(existinggeometry) < 1 THEN
|
||||
|
||||
-- re-index points that have moved in / out of the polygon, could be done as a single query but postgres gets the index usage wrong
|
||||
update placex set indexed_status = 2 where indexed_status = 0 and
|
||||
(st_covers(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
|
||||
AND NOT (st_covers(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry))
|
||||
update placex set indexed_status = 2 where indexed_status = 0
|
||||
AND ST_Intersects(NEW.geometry, placex.geometry)
|
||||
AND NOT ST_Intersects(existinggeometry, placex.geometry)
|
||||
AND rank_search > existingplacex.rank_search AND (rank_search < 28 or name is not null);
|
||||
|
||||
update placex set indexed_status = 2 where indexed_status = 0 and
|
||||
(st_covers(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry))
|
||||
AND NOT (st_covers(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
|
||||
update placex set indexed_status = 2 where indexed_status = 0
|
||||
AND ST_Intersects(existinggeometry, placex.geometry)
|
||||
AND NOT ST_Intersects(NEW.geometry, placex.geometry)
|
||||
AND rank_search > existingplacex.rank_search AND (rank_search < 28 or name is not null);
|
||||
|
||||
END IF;
|
||||
|
||||
@@ -455,9 +455,9 @@ BEGIN
|
||||
-- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
|
||||
|
||||
-- work around bug in postgis, this may have been fixed in 2.0.0 (see http://trac.osgeo.org/postgis/ticket/547)
|
||||
update placex set indexed_status = 2 where (st_covers(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
|
||||
update placex set indexed_status = 2 where ST_Intersects(NEW.geometry, placex.geometry)
|
||||
AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point' and (rank_search < 28 or name is not null or (NEW.rank_search >= 16 and address ? 'place'));
|
||||
update placex set indexed_status = 2 where (st_covers(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
|
||||
update placex set indexed_status = 2 where ST_Intersects(NEW.geometry, placex.geometry)
|
||||
AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point' and (rank_search < 28 or name is not null or (NEW.rank_search >= 16 and address ? 'place'));
|
||||
END IF;
|
||||
ELSE
|
||||
|
||||
@@ -431,7 +431,7 @@ DECLARE
|
||||
geo RECORD;
|
||||
BEGIN
|
||||
-- 10000000000 is ~~ 1x1 degree
|
||||
FOR geo IN select quad_split_geometry(geometry, 0.25, 20) as geom LOOP
|
||||
FOR geo IN select quad_split_geometry(geometry, 0.01, 20) as geom LOOP
|
||||
RETURN NEXT geo.geom;
|
||||
END LOOP;
|
||||
RETURN;
|
||||
@@ -476,9 +476,9 @@ BEGIN
|
||||
IF placegeom IS NOT NULL AND ST_IsValid(placegeom) THEN
|
||||
IF ST_GeometryType(placegeom) in ('ST_Polygon','ST_MultiPolygon') THEN
|
||||
FOR geom IN select split_geometry(placegeom) FROM placex WHERE place_id = placeid LOOP
|
||||
update placex set indexed_status = 2 where (st_covers(geom, placex.geometry) OR ST_Intersects(geom, placex.geometry))
|
||||
update placex set indexed_status = 2 where ST_Intersects(geom, placex.geometry)
|
||||
AND rank_search > rank and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point' and (rank_search < 28 or name is not null or (rank >= 16 and address ? 'place'));
|
||||
update placex set indexed_status = 2 where (st_covers(geom, placex.geometry) OR ST_Intersects(geom, placex.geometry))
|
||||
update placex set indexed_status = 2 where ST_Intersects(geom, placex.geometry)
|
||||
AND rank_search > rank and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point' and (rank_search < 28 or name is not null or (rank >= 16 and address ? 'place'));
|
||||
END LOOP;
|
||||
ELSE
|
||||
|
||||
@@ -137,3 +137,22 @@ Feature: Import of postcodes
|
||||
And word contains
|
||||
| word | class | type |
|
||||
| 01982 | place | postcode |
|
||||
|
||||
Scenario: Different postcodes with the same normalization can both be found
|
||||
Given the places
|
||||
| osm | class | type | addr+postcode | addr+housenumber | geometry |
|
||||
| N34 | place | house | EH4 7EA | 111 | country:gb |
|
||||
| N35 | place | house | E4 7EA | 111 | country:gb |
|
||||
When importing
|
||||
Then location_postcode contains exactly
|
||||
| country | postcode | geometry |
|
||||
| gb | EH4 7EA | country:gb |
|
||||
| gb | E4 7EA | country:gb |
|
||||
When searching for "EH4 7EA"
|
||||
Then results contain
|
||||
| type | placename |
|
||||
| postcode | EH4 7EA |
|
||||
When searching for "E4 7EA"
|
||||
Then results contain
|
||||
| type | placename |
|
||||
| postcode | E4 7EA |
|
||||
|
||||
@@ -4,13 +4,13 @@ import random
|
||||
import os
|
||||
from nose.tools import * # for assert functions
|
||||
|
||||
@given(u'the (\d+ )?grid')
|
||||
@given(u'the ([0-9.]+ )?grid')
|
||||
def define_node_grid(context, grid_step):
|
||||
"""
|
||||
Define a grid of node positions.
|
||||
"""
|
||||
if grid_step is not None:
|
||||
grid_step = int(grd_step.strip())
|
||||
grid_step = float(grid_step.strip())
|
||||
else:
|
||||
grid_step = 0.00001
|
||||
|
||||
|
||||
@@ -55,6 +55,7 @@ date_default_timezone_set('Etc/UTC');
|
||||
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
$fPostgresVersion = $oDB->getPostgresVersion();
|
||||
|
||||
$aDSNInfo = Nominatim\DB::parseDSN(CONST_Database_DSN);
|
||||
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
|
||||
@@ -90,13 +91,21 @@ if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
|
||||
if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
|
||||
$oOsm2pgsqlCmd->addParams('--flat-nodes', CONST_Osm2pgsql_Flatnode_File);
|
||||
}
|
||||
if ($fPostgresVersion >= 11.0) {
|
||||
$oOsm2pgsqlCmd->addEnvPair(
|
||||
'PGOPTIONS',
|
||||
'-c jit=off -c max_parallel_workers_per_gather=0'
|
||||
);
|
||||
}
|
||||
|
||||
|
||||
$oIndexCmd = (new \Nominatim\Shell(CONST_BasePath.'/nominatim/nominatim.py'))
|
||||
->addParams('--database', $aDSNInfo['database'])
|
||||
->addParams('--port', $aDSNInfo['port'])
|
||||
->addParams('--threads', $aResult['index-instances']);
|
||||
|
||||
if (!$aResult['quiet']) {
|
||||
$oIndexCmd->addParams('--verbose');
|
||||
}
|
||||
if ($aResult['verbose']) {
|
||||
$oIndexCmd->addParams('--verbose');
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user