Compare commits

...

124 Commits

Author SHA1 Message Date
Sarah Hoffmann
282bd4a67e prepare for 3.4.2 release 2020-05-02 22:04:32 +02:00
Sarah Hoffmann
51f6db2e9c properly escape class parameter
The class parameter was used as is, allowing for potential
SQL injection via the API.

Thanks to @bladeswords for finding this.
2020-05-02 21:58:16 +02:00
Sarah Hoffmann
e4ecbef61e prepare for 3.4.1 release 2019-12-28 22:53:38 +01:00
Sarah Hoffmann
23dd49a5a2 update osm2pgsql (exclude country and postcode from address tags) 2019-12-28 22:41:33 +01:00
Francesc Hervada-Sala
0c85f88be8 typo - fixes openstreetmap#1606 2019-12-28 22:41:19 +01:00
Sarah Hoffmann
7829a05002 update osm2pgsql (deletion and address updates) 2019-12-28 22:40:46 +01:00
Sarah Hoffmann
233e064f0b prepare for 3.4.0 release 2019-10-25 22:04:59 +02:00
Sarah Hoffmann
203e210d3a update osm2pgsql (bound COPY buffers) 2019-10-22 22:47:03 +02:00
Sarah Hoffmann
ff1c78fef5 Merge pull request #1502 from mtmail/specialphrases-quotes
Specialphrases quotes
2019-10-22 21:41:53 +02:00
Sarah Hoffmann
d3a731dae4 Merge pull request #1528 from chatelao/patch-2
Typo - Wekipedia (Wikipedia)
2019-10-22 00:21:33 +02:00
chatelao
73a4433d8e Typo - Wekipedia (Wikipedia) 2019-10-21 15:35:55 +02:00
Sarah Hoffmann
3b4ffea690 Merge pull request #1526 from lonvia/index-concurrently
create/drop indexes concurrently
2019-10-19 18:23:59 +02:00
Sarah Hoffmann
05d7f91392 fix rank of postcode results
Fixes #1487.
2019-10-19 18:12:22 +02:00
Sarah Hoffmann
e3e9f69654 fix rank of postcode results
Fixes #1487.
2019-10-19 17:57:57 +02:00
Sarah Hoffmann
34a4a9b08f create/drop indexes concurrently
Fixes #1507.
2019-10-19 17:13:05 +02:00
Sarah Hoffmann
e0836664e5 Merge pull request #1524 from MatthiasLohr/bugfix/uninitialized-string-offset
Fix for #1523: Fix PHP warning
2019-10-15 09:51:20 +02:00
Matthias Lohr
8d7499342f Fixed PHP warning from #1523
Signed-off-by: Matthias Lohr <mail@mlohr.com>
2019-10-15 08:46:19 +02:00
Sarah Hoffmann
a7b24627b5 Merge pull request #1484 from mtmail/ignore-errors-on-setup-drop
on --drop warn on non-existing tables, dont croak
2019-10-15 00:37:33 +02:00
Sarah Hoffmann
452324cf01 Merge pull request #1519 from mtmail/doc-viewbox-parameters2
documentation: add note what x,y mean for viewbox parameter
2019-10-15 00:34:30 +02:00
Sarah Hoffmann
15c5c8db24 add place=city_block/quarter to address hierarchy
Fixes #1516.
2019-10-14 23:49:06 +02:00
marc tobias
423efd54e4 documentation: add note what x,y mean for viewbox parameter 2019-10-08 19:22:51 +13:00
TC Haddad
5e45e0b3d7 Gsoc2019 contributions for adding Wikidata to Nominatim (#1475)
Complete rewrite of wikipedia processing scripts, addition of processing wikidata, new data source, new documentation by @tchaddad during Google Summer of Code 2019 project.
2019-10-06 15:56:39 +08:00
Sarah Hoffmann
a60e7f2376 Merge pull request #1511 from cbpetersen/patch-1
Remove duplicate format query param
2019-10-01 13:52:24 +02:00
Christoffer Bo Petersen
ac7f0f7581 Remove duplicate format query param 2019-10-01 12:37:53 +02:00
marc tobias
9c872345d6 special phrases: use printf, line length below 120char 2019-09-19 01:12:42 +02:00
marc tobias
bd312fa747 special phrases: sometimes quotes are not escaped 2019-09-19 00:20:30 +02:00
marc tobias
573fba55af SetupClass: on --drop check if table exists first 2019-09-04 13:12:11 +02:00
Sarah Hoffmann
39787f7d62 Merge pull request #1474 from mtmail/tiger-data-2019
US TIGER 2019 data got released
2019-09-03 22:54:22 +02:00
Sarah Hoffmann
f4c067d527 Merge pull request #1478 from tbertels/patch-1
Remove administrative arrondissements from Belgian addresses
2019-09-02 17:50:51 +02:00
Thomas Bertels
8d3595c3e2 Remove administrative arrondissements from Belgian addresses
"administrative7" -> [14, 0]
2019-08-27 14:15:18 +02:00
Sarah Hoffmann
b81a57f1e4 Merge pull request #1477 from dpasqualin/fix-python-shebang
Fix python shebang following PEP 394 recommendation
2019-08-26 22:40:43 +02:00
Diego Pasqualin
a624f8b599 Fix python shebang following PEP 394 recommendation 2019-08-26 14:54:19 +02:00
marc tobias
74f49a9d89 US TIGER 2019 data got released 2019-08-23 14:59:03 +02:00
TC Haddad
b7b89b30ea fix spelling on US-Tiger documentation page (#1459) 2019-08-12 01:40:13 +02:00
Sarah Hoffmann
fb012504b2 Merge pull request #1444 from lonvia/require-python-3
Require python 3
2019-08-07 22:38:43 +02:00
Sarah Hoffmann
7ed9ecf350 Merge pull request #1453 from mtmail/add-boundingbox-to-lookup-results
lookup endpoint returns boundingbox
2019-08-06 20:40:06 +02:00
marc tobias
3af1520461 lookup endpoint returns boundingbox 2019-08-05 23:32:46 +02:00
Sarah Hoffmann
a7edda32ba Merge pull request #1445 from mtmail/hierarchy-endpoint-broke
/hierarchy.php was missing namespace calling AddressDetails
2019-07-28 23:11:02 +02:00
marc tobias
7b09e320a8 /hierarchy.php was missing namespace calling AddressDetails 2019-07-28 22:05:51 +02:00
Sarah Hoffmann
46e077c40b adapt TIGER conversion script for python 3 2019-07-28 20:56:02 +02:00
Sarah Hoffmann
7753ba6019 require python 3 for all tools used in updates 2019-07-28 20:36:35 +02:00
Sarah Hoffmann
511204c158 Merge pull request #1443 from lonvia/reorganize-search-name-partition-tables
Reorganize search name partition tables
2019-07-28 15:18:12 +02:00
Sarah Hoffmann
65daef70c1 Merge pull request #1433 from mtmail/us-postcode-import-optional
make US postcode data to an optional download
2019-07-28 14:50:13 +02:00
Sarah Hoffmann
7ab373e86d add cmake mode for building documentation only 2019-07-28 14:27:14 +02:00
Sarah Hoffmann
79b81d39d8 streamline indexes of search_name partition tables
Remove index on name_vector. We always do near search where the
geometry index is sufficient. Also split centroid index in low
and high rank indexes. Reduces index size by about 25%.
2019-07-28 13:29:35 +02:00
Sarah Hoffmann
2bbe5017d4 use bbox of geometry when searching for attached streets
As we are doing a distance search, this improves results for
large places like airports.

Fixes #1442.
2019-07-28 13:28:27 +02:00
marc tobias
765a932561 make US postcode data to an optional download 2019-07-24 01:13:57 +02:00
Sarah Hoffmann
4a2c9431ee Merge pull request #1432 from mtmail/two-outputformats-for-lookup-endpoint
lookup endpoint supports jsonv2 and geocodejson output now
2019-07-22 23:31:56 +02:00
Sarah Hoffmann
de15d10f86 Merge pull request #1430 from mtmail/exclude-negative-tiger-housenumber-ranges
during TIGER import skip records with negative house number range
2019-07-22 23:30:06 +02:00
Sarah Hoffmann
55d414bd72 Merge pull request #1427 from mtmail/documentation-how-to-build-documentation
New readme file on how to build the documentation
2019-07-22 21:24:32 +02:00
marc tobias
1560685020 lookup endpoint supports jsonv2 and geocodejson output now 2019-07-21 23:20:48 +02:00
marc tobias
0e44659033 during TIGER import skip records with negative house number range 2019-07-21 21:41:12 +02:00
marc tobias
3b39cfb1cf New readme file on how to build the documentation 2019-07-21 21:31:14 +02:00
Sarah Hoffmann
15bca71b0d Merge pull request #1422 from lonvia/remove-country-from-addressline
Remove country from addressline
2019-07-16 22:29:17 +02:00
Sarah Hoffmann
3c12455c5b Merge pull request #1421 from asantoz/patch-1
Minor change on lookup endpoint doc
2019-07-11 10:09:33 +02:00
Sarah Hoffmann
927b4c928e add migration hints for country table 2019-07-10 22:54:32 +02:00
Sarah Hoffmann
be47cd2549 remove country from place_addressline
The country information can be determined sufficiently from
the country code. We only loose the specific OSM object
behind the address.

Also streamlines the location_area_country table.
2019-07-10 21:29:47 +02:00
André Santos
a4a17f93f5 Minor change on lookup endpoint doc
Fix documentation about lookup endpoint on output formats available on filter `format`
2019-07-10 19:26:38 +01:00
Sarah Hoffmann
745e52b798 Merge pull request #1419 from asantoz/minor-fix-doc
Minor issue on api docs
2019-07-08 22:23:10 +02:00
André Santos
bbc2da2a4b Minor issue on api docs
Fix a minor issue on API docs in details endpoint example 🙏
2019-07-08 20:08:43 +01:00
Sarah Hoffmann
4c1793b4e3 recreate interpolations when one of their support nodes changes
A simple update is not enough because the interpolation splits
might change as well as the housenumbers.

Fixes #1360.
2019-07-03 23:15:54 +02:00
Sarah Hoffmann
d1ca73f813 Reset housenumber on every place update
As it is a computed field, it needs to be computed from scratch
to take into account any surrounding changes.

Fixes #1395.
2019-07-03 20:56:35 +02:00
Sarah Hoffmann
cdc7d0fe0e remove visibility modifier from constants again
Only supported on PHP >= 7.1.
2019-07-02 23:24:49 +02:00
Sarah Hoffmann
a27a271034 Merge pull request #1415 from nslxndr/fix-db-log
Fix DB log
2019-07-02 20:47:42 +02:00
Sandor Nagy
6c097d24b1 Fix SQL concatenation for new query log 2019-07-02 01:19:59 +02:00
Sandor Nagy
0115b655bd lib/log.php broke after switch to PDO DB abstraction 2019-07-02 01:19:55 +02:00
Sarah Hoffmann
e8f1463cc2 Merge pull request #1414 from lonvia/remove-more-places-from-address
Remove more places from address ranking
2019-07-01 22:33:20 +02:00
Sarah Hoffmann
e164d53fcc adapt tests to new place address ranks 2019-06-30 23:09:43 +02:00
Sarah Hoffmann
b8f7b3cc8d Remove county places and Regierungsbezirke vom German addresses 2019-06-30 22:27:44 +02:00
Sarah Hoffmann
b0e6fb73c6 generally remove all country and state places from address
Gets rid of the hard-coded expection for place nodes and sets
the address rank generally via the address level config instead.
That means only administrative boundaries are now used at that
level in addresses.
2019-06-30 22:27:44 +02:00
Sarah Hoffmann
dd50f1737b Merge pull request #1412 from lonvia/rewrite-wordset-computation
Rework word set computation
2019-06-30 10:48:09 +02:00
Sarah Hoffmann
38a99856c0 Rework word set computation
Switch from an recursive algorithm for computing the word sets
to an iterative one that benefits from caching intermediate
results. This considerably reduces the amount of memory needed,
so that the depth restriction can be dropped. To ensure that
the number of word sets remains manageable, only sets up to
a certain length are accepted and only a certain number of
total word sets. If word sets need to be dropped, we drop
the ones with more words per word set first.

To further reduce the number of potential word sets, the valid
tokens are looked up first and then only word sets containing
valid tokens are computed.

Fixes #1403, #1404 and #654.
2019-06-29 18:22:31 +02:00
Sarah Hoffmann
09e7f0d013 remove historic:neighbourhood from address ranks
Should not be reverse searchable.

Fixes #1379.
2019-06-10 20:12:27 +02:00
Sarah Hoffmann
e05e413cc4 use real centroid when looking for near roads
The point-on-surface may be at the corner in large objects, so
that roads are too far away.

Fixes #1389.
2019-06-10 18:23:12 +02:00
Sarah Hoffmann
2c21cbb5e6 update osm2pgsql (downgrading unnamed places)
Also adds tests for updating unnamed places.
2019-06-10 18:22:11 +02:00
Sarah Hoffmann
3bc4b4bf9f update osm2pgsql (import special tags) 2019-06-09 13:58:05 +02:00
Sarah Hoffmann
a09f2a6987 Merge pull request #1381 from mtmail/faq-entry-about-managed-database-services
FAQ entry about managed database services
2019-06-09 11:04:47 +02:00
Sarah Hoffmann
1f57d730df Merge pull request #1394 from mtmail/update-postcodes-without-colon
exclude postcode ranges separated by colon from centre point calculation
2019-06-09 11:03:10 +02:00
Sarah Hoffmann
eebc72b2bc Merge pull request #1388 from mtmail/register-shutdown-function
register shutdown function to handle out-of-memory errors
2019-06-09 10:20:19 +02:00
rlytleatrel8edto
2f3cf19afa Ubuntu18-nginx install instructions - fix php-fpm socket path (#1398)
Ubuntu18-nginx install instructions - fix php-fpm socket path
2019-06-02 17:04:02 +02:00
marc tobias
10fbda702b exclude postcode ranges separated by colon from centre point calculation 2019-05-25 20:43:38 +02:00
Sarah Hoffmann
17f130550e Merge pull request #1387 from joto/master
Fix some minor issues in docs.
2019-05-23 23:54:29 +02:00
Jochen Topf
251f335fe3 Revert layout changes in list. 2019-05-22 09:25:41 +02:00
marc tobias
ed2fb84e82 register shutdown function to handle out-of-memory errors 2019-05-21 18:41:06 +02:00
Jochen Topf
634684236c Fix some minor issues in docs. 2019-05-21 13:55:16 +02:00
marc tobias
11e0d9ec14 FAQ entry about managed database services 2019-05-14 19:45:56 +02:00
Sarah Hoffmann
5fd8f5aa27 Merge pull request #1372 from lonvia/raise-postgres-version
increase minimum versions for PostgreSQL, Postgis and PHP
2019-05-02 22:56:08 +02:00
Sarah Hoffmann
c05ddb6119 increase minimum versions for PostgreSQL, Postgis and PHP
Remove checks and hacks for older versions.
2019-05-02 21:48:40 +02:00
Sarah Hoffmann
ec86a972a2 prepare for 3.3.0 release 2019-05-01 09:38:45 +02:00
Sarah Hoffmann
62da8a34f3 add documentation for new reverse zoom 17 2019-04-30 23:27:04 +02:00
Sarah Hoffmann
6511ec3aa8 Convert importance to float value
Fixes #1369.
2019-04-30 23:21:53 +02:00
Sarah Hoffmann
1707157c4d fix indent in docs 2019-04-29 23:13:37 +02:00
Sarah Hoffmann
ee49ab84a4 Merge branch 'markdown-syntax-fix-gb-postcodes' of https://github.com/mtmail/Nominatim into mtmail-markdown-syntax-fix-gb-postcodes 2019-04-29 23:12:37 +02:00
marc tobias
b92a55f5fe Readme for GB postcodes had markdown syntax error 2019-04-28 23:18:36 +02:00
Sarah Hoffmann
7d3b16f24c Ignore no-fatal errors during dump file restore
The owner should never be restored, the table should be owned
by the caller instead. Non-existing indexes and similar only
started to throw a warning with Postgresql 9.4 and later, so
ignore them explicitly there.
2019-04-28 22:44:42 +02:00
Sarah Hoffmann
b612b99421 Merge pull request #1321 from mtmail/interpolating-0-housenumbers
Support housenumber=0 in interpolations
2019-04-19 18:29:43 +02:00
Sarah Hoffmann
5a5b3de79a Merge pull request #1359 from mtmail/fix-export-script
utils/export.php broke after switch to PDO DB abstraction
2019-04-17 23:04:51 +02:00
marc tobias
0862e21a1b utils/export.php broke after switch to PDO DB abstraction 2019-04-17 22:29:50 +02:00
Sarah Hoffmann
c148b768f4 Merge pull request #1358 from mtmail/travis-php-7dot1
travis-CI: use PHP 7.1
2019-04-17 22:05:28 +02:00
marc tobias
fab9f684af travis-CI: use PHP 7.1 2019-04-17 16:05:49 +02:00
Sarah Hoffmann
0af48fe802 exclude all objects without address rank from reverse
This was forgotten when looking for a housenumber for
a street point.

Fixes #1319.
2019-04-16 23:13:27 +02:00
Sarah Hoffmann
a9ae2c7457 add reverse zoom level that includes minor streets
Zoom 17 now also resolves service roads and similar.

Fixes #1350.
2019-04-15 22:43:07 +02:00
Sarah Hoffmann
87c0049e75 isaddress field may be missing in details view 2019-04-14 12:03:37 +02:00
Sarah Hoffmann
e5eb7ecdc1 Merge branch 'observe-bounded-viewbox-in-postcode-search' of https://github.com/mtmail/Nominatim into mtmail-observe-bounded-viewbox-in-postcode-search 2019-04-14 11:29:28 +02:00
Sarah Hoffmann
a7e7823535 Merge pull request #1336 from mtmail/faq-entry-about-buffer-not-owned
New FAQ entry about -buffer is not owned by resource owner-
2019-04-14 11:27:36 +02:00
Sarah Hoffmann
33ff96fd83 Merge pull request #1348 from mtmail/checkmodulepresence-to-raise-exception
checkModulePresence now raises exception instead of its callers
2019-04-14 11:25:52 +02:00
Sarah Hoffmann
58852b3eeb Merge pull request #1347 from mtmail/pdo-returns-proper-boolean
PDO library returns proper boolean. We dont need string comparison
2019-04-14 11:24:23 +02:00
Sarah Hoffmann
403ee260f6 Ensure that postcode relations are used in addresses
Postcode nodes are normally thrown away as they only play
a role for computing artifical postcodes. However, if we
have a postcode area this still should take part of the
address.

Fixes #1330.
2019-04-14 11:20:03 +02:00
marc tobias
84149f26df checkModulePresence now raises exception instead of its callers 2019-04-02 18:37:11 +02:00
marc tobias
2ab836c11c PDO library returns proper boolean. We dont need string comparison 2019-04-02 16:52:37 +02:00
marc tobias
7d9dbd62c7 Support housenumber=0 in interpolations 2019-04-02 15:13:45 +02:00
marc tobias
c9a6350894 On postcode searches observe given bounded viewbox 2019-04-02 14:49:31 +02:00
Sarah Hoffmann
2a4198f94d add test for issue #1343
Keyword details for countries (which don't have address details).
2019-03-26 21:49:44 +01:00
marc tobias
850ab6999c if nameaddress_vector was {} the database queries failed 2019-03-26 18:03:26 +01:00
marc tobias
2946e81995 New FAQ entry about -buffer is not owned by resource owner- 2019-03-19 01:52:35 +01:00
Sarah Hoffmann
c78a64ec9b Merge pull request #1334 from mtmail/fix-PDOException-call
PDOException call in catch was causing exception itself
2019-03-18 21:26:23 +01:00
marc tobias
61386c5b4d PDOException call in catch was causing exception itself 2019-03-17 02:47:28 +01:00
Sarah Hoffmann
279eae4b92 Merge pull request #1333 from Arun179/patch-1
Rectified a small spelling mistake
2019-03-14 19:06:07 +01:00
Arun Kumar
37f7af56e4 Rectified a small spelling mistake
changed "mailinglist" to "mailing list"
2019-03-14 22:53:09 +05:30
Sarah Hoffmann
ec2d491dc8 Merge pull request #1328 from mtmail/php-pdo-with-prepare
Nominatim::DB support input variables, custom error messages
2019-03-13 11:10:17 +01:00
marc tobias
890d415e1f Nominatim::DB support input variables, custom error messages 2019-03-10 16:56:36 +01:00
Sarah Hoffmann
75f951d254 Merge pull request #1318 from mtmail/php-pdo
replace database abstraction DB with PDO
2019-03-09 11:27:51 +01:00
marc tobias
d4b633bfc5 replace database abstraction DB with PDO 2019-03-09 00:18:15 +01:00
marc tobias
b20a534e0c add logEnd to reverse.php, just like search.php 2019-02-27 20:22:50 +01:00
111 changed files with 2587 additions and 39060 deletions

View File

@@ -11,6 +11,8 @@ git:
env:
- TEST_SUITE=tests
- TEST_SUITE=monaco
before_install:
- phpenv global 7.1
install:
- vagrant/install-on-travis-ci.sh
before_script:
@@ -19,7 +21,7 @@ script:
- cd $TRAVIS_BUILD_DIR/
- if [[ $TEST_SUITE == "tests" ]]; then phpcs --report-width=120 . ; fi
- cd $TRAVIS_BUILD_DIR/test/php
- if [[ $TEST_SUITE == "tests" ]]; then phpunit ./ ; fi
- if [[ $TEST_SUITE == "tests" ]]; then /usr/bin/phpunit ./ ; fi
- cd $TRAVIS_BUILD_DIR/test/bdd
- # behave --format=progress3 api
- if [[ $TEST_SUITE == "tests" ]]; then behave -DREMOVE_TEMPLATE=1 --format=progress3 db ; fi

View File

@@ -19,8 +19,8 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 3)
set(NOMINATIM_VERSION_MINOR 2)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION_MINOR 4)
set(NOMINATIM_VERSION_PATCH 2)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
@@ -35,57 +35,60 @@ add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")
set(BUILD_TESTS off CACHE BOOL "Build test suite" FORCE)
set(WITH_LUA off CACHE BOOL "Build with lua support" FORCE)
set(ONLY_DOCS off CACHE BOOL "Build documentation only")
if (NOT EXISTS "${CMAKE_SOURCE_DIR}/osm2pgsql/CMakeLists.txt")
message(FATAL_ERROR "The osm2pgsql directory is empty.\
Did you forget to check out Nominatim recursively?\
\nTry updating submodules with: git submodule update --init")
if (NOT ONLY_DOCS)
if (NOT EXISTS "${CMAKE_SOURCE_DIR}/osm2pgsql/CMakeLists.txt")
message(FATAL_ERROR "The osm2pgsql directory is empty.\
Did you forget to check out Nominatim recursively?\
\nTry updating submodules with: git submodule update --init")
endif()
add_subdirectory(osm2pgsql)
find_package(Threads REQUIRED)
unset(PostgreSQL_TYPE_INCLUDE_DIR CACHE)
set(PostgreSQL_TYPE_INCLUDE_DIR "/usr/include/")
find_package(PostgreSQL REQUIRED)
include_directories(${PostgreSQL_INCLUDE_DIRS})
link_directories(${PostgreSQL_LIBRARY_DIRS})
find_program(PYOSMIUM pyosmium-get-changes)
if (NOT EXISTS "${PYOSMIUM}")
set(PYOSMIUM_PATH "")
message(WARNING "pyosmium-get-changes not found (required for updates)")
else()
set(PYOSMIUM_PATH "${PYOSMIUM}")
message(STATUS "Using pyosmium-get-changes at ${PYOSMIUM_PATH}")
endif()
find_program(PG_CONFIG pg_config)
execute_process(COMMAND ${PG_CONFIG} --pgxs
OUTPUT_VARIABLE PGXS
OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT EXISTS "${PGXS}")
message(FATAL_ERROR "Postgresql server package not found.")
endif()
find_package(ZLIB REQUIRED)
find_package(BZip2 REQUIRED)
find_package(LibXml2 REQUIRED)
include_directories(${LIBXML2_INCLUDE_DIR})
# Setting PHP binary variable as to command line (prevailing) or auto detect
if (NOT PHP_BIN)
find_program (PHP_BIN php)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHP_BIN})
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
endif()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
add_subdirectory(osm2pgsql)
find_package(Threads REQUIRED)
unset(PostgreSQL_TYPE_INCLUDE_DIR CACHE)
set(PostgreSQL_TYPE_INCLUDE_DIR "/usr/include/")
find_package(PostgreSQL REQUIRED)
include_directories(${PostgreSQL_INCLUDE_DIRS})
link_directories(${PostgreSQL_LIBRARY_DIRS})
find_program(PYOSMIUM pyosmium-get-changes)
if (NOT EXISTS "${PYOSMIUM}")
set(PYOSMIUM_PATH "")
message(WARNING "pyosmium-get-changes not found (required for updates)")
else()
set(PYOSMIUM_PATH "${PYOSMIUM}")
message(STATUS "Using pyosmium-get-changes at ${PYOSMIUM_PATH}")
endif()
find_program(PG_CONFIG pg_config)
execute_process(COMMAND ${PG_CONFIG} --pgxs
OUTPUT_VARIABLE PGXS
OUTPUT_STRIP_TRAILING_WHITESPACE)
if (NOT EXISTS "${PGXS}")
message(FATAL_ERROR "Postgresql server package not found.")
endif()
find_package(ZLIB REQUIRED)
find_package(BZip2 REQUIRED)
find_package(LibXml2 REQUIRED)
include_directories(${LIBXML2_INCLUDE_DIR})
# Setting PHP binary variable as to command line (prevailing) or auto detect
if (NOT PHP_BIN)
find_program (PHP_BIN php)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHP_BIN})
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
endif()
message (STATUS "Using PHP binary " ${PHP_BIN})
#-----------------------------------------------------------------------------
#
@@ -143,26 +146,30 @@ endforeach()
#
#-----------------------------------------------------------------------------
include(CTest)
if (NOT ONLY_DOCS)
include(CTest)
set(TEST_BDD db osm2pgsql api)
set(TEST_BDD db osm2pgsql api)
foreach (test ${TEST_BDD})
add_test(NAME bdd_${test}
COMMAND lettuce features/${test}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests)
set_tests_properties(bdd_${test}
PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
endforeach()
foreach (test ${TEST_BDD})
add_test(NAME bdd_${test}
COMMAND lettuce features/${test}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests)
set_tests_properties(bdd_${test}
PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
endforeach()
add_test(NAME php
COMMAND phpunit ./
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests-php)
add_test(NAME php
COMMAND phpunit ./
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests-php)
endif()
#-----------------------------------------------------------------------------
add_subdirectory(module)
add_subdirectory(nominatim)
if (NOT ONLY_DOCS)
add_subdirectory(module)
add_subdirectory(nominatim)
endif()
add_subdirectory(docs)
#-----------------------------------------------------------------------------

View File

@@ -1,3 +1,54 @@
3.4.2
* security fix: fix possible SQL injection via details API
3.4.1
* update osm2pgsql
* move deletion to copy thread (fixes deadlock in updates)
* fix filtering where valid address objects got dropped
* fix typo in import styles
3.4.0
* increase required version for PostgreSQL(9.3), PostGIS(2.2) and PHP(7.0)
* better error reporting for out-of-memory errors
* exclude postcode ranges separated by colon from centre point calculation
* update osm2pgsql, better handling of imports without flatnode file
* switch to more efficient algorithm for word set computation
* use only boundries for country and state parts of addresses
* improve updates of addresses with housenumbers and interpolations
* remove country from place_addressline table and use country_code instead
* optimise indexes on search_name partition tables
* improve searching of attached streets for large objects like airports
* drop support for python 2
* new scripts for importing Wikidata for importance
* create and drop indexes concurrently to not clash with auto vacuum
* various documentation improvements
3.3.0
* zoom 17 in reverse now zooms in on minor streets
* fix use of postcode relations in address
* support for housenumber 0 on interpolations
* replace database abstraction DB with PDO and switch to using exceptions
* exclude line features at rank 30 from reverse geocoding
* remove self-reference and country from place_addressline
* make json output more readable (less escaping)
* update conversion scripts for postcodes
* scripts in utils/ are no longer executable (always use scripts in build dir)
* remove Natural Earth country fallback (OSM is complete enough)
* make rank assignments configurable
* allow accept languages with underscore
* new reverse-only import mode (without search index table)
* rely on boundaries only for states and countries
* update osm2pgsql, now using a configurable style
* provide multiple import styles
* improve search when house number and postcodes are dropped
* overhaul of setup code
* add support for PHPUnit 6
* update test database
* various documentation improvements
3.2.0
* complete rewrite of reverse search algorithm

View File

@@ -59,5 +59,5 @@ Both bug reports and pull requests are welcome.
Mailing list
============
For questions you can join the geocoding mailinglist, see
For questions you can join the geocoding mailing list, see
https://lists.openstreetmap.org/listinfo/geocoding

View File

@@ -171,7 +171,7 @@ If the Postgres installation is behind a firewall, you can try
inside the virtual machine. It will map the port to `localhost:9999` and then
you edit `settings/local.php` with
@define('CONST_Database_DSN', 'pgsql://postgres@localhost:9999/nominatim_it');
@define('CONST_Database_DSN', 'pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it');
To access postgres directly remember to specify the hostname, e.g. `psql --host localhost --port 9999 nominatim_it`

View File

@@ -25,41 +25,32 @@ If you forgot to download the file, or have a new version, you can import it sep
2. `unzip codepo_gb.zip`
Unpacked you'll see a directory of CSV files.
Unpacked you'll see a directory of CSV files.
```
$ more codepo_gb/Data/CSV/n.csv
"N1 0AA",10,530626,183961,"E92000001","E19000003","E18000007","","E09000019","E05000368"
"N1 0AB",10,530559,183978,"E92000001","E19000003","E18000007","","E09000019","E05000368"
```
$ more codepo_gb/Data/CSV/n.csv
"N1 0AA",10,530626,183961,"E92000001","E19000003","E18000007","","E09000019","E05000368"
"N1 0AB",10,530559,183978,"E92000001","E19000003","E18000007","","E09000019","E05000368"
The coordinates are "Northings" and "Eastings" in [OSGB 1936](http://epsg.io/1314) projection. They can be projected to WGS84 like this
The coordinates are "Northings" and "Eastings" in [OSGB 1936](http://epsg.io/1314) projection. They can be projected to WGS84 like this
```
SELECT ST_AsText(ST_Transform(ST_SetSRID('POINT(530626 183961)'::geometry,27700), 4326));
POINT(-0.117872733220225 51.5394424719303)
```
[-0.117872733220225 51.5394424719303 on OSM map](https://www.openstreetmap.org/?mlon=-0.117872733220225&mlat=51.5394424719303&zoom=16)
SELECT ST_AsText(ST_Transform(ST_SetSRID('POINT(530626 183961)'::geometry,27700), 4326));
POINT(-0.117872733220225 51.5394424719303)
[-0.117872733220225 51.5394424719303 on OSM map](https://www.openstreetmap.org/?mlon=-0.117872733220225&mlat=51.5394424719303&zoom=16)
3. Create database, import CSV files, add geometry column, dump into file
```
DBNAME=create_gb_postcode_file
createdb $DBNAME
echo 'CREATE EXTENSION postgis' | psql $DBNAME
cat data/gb_postcode_table.sql | psql $DBNAME
cat codepo_gb/Data/CSV/*.csv | ./data-sources/gb-postcodes/convert_codepoint.php | psql $DBNAME
cat codepo_gb/Doc/licence.txt | iconv -f iso-8859-1 -t utf-8 | dos2unix | sed 's/^/-- /g' > gb_postcode_data.sql
pg_dump -a -t gb_postcode $DBNAME | grep -v '^--' >> gb_postcode_data.sql
gzip -9 -f gb_postcode_data.sql
ls -lah gb_postcode_data.*
# dropdb $DBNAME
```
DBNAME=create_gb_postcode_file
createdb $DBNAME
echo 'CREATE EXTENSION postgis' | psql $DBNAME
cat data/gb_postcode_table.sql | psql $DBNAME
cat codepo_gb/Data/CSV/*.csv | ./data-sources/gb-postcodes/convert_codepoint.php | psql $DBNAME
cat codepo_gb/Doc/licence.txt | iconv -f iso-8859-1 -t utf-8 | dos2unix | sed 's/^/-- /g' > gb_postcode_data.sql
pg_dump -a -t gb_postcode $DBNAME | grep -v '^--' >> gb_postcode_data.sql
gzip -9 -f gb_postcode_data.sql
ls -lah gb_postcode_data.*
# dropdb $DBNAME

View File

@@ -2,28 +2,25 @@
Convert [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)/Line dataset of the US Census Bureau to SQL files which can be imported by Nominatim. The created tables in the Nominatim database are separate from OpenStreetMap tables and get queried at search time separately.
The dataset gets updated once per year. Downloading is prown to be slow (can take a full day) and converting them can take hours as well.
The dataset gets updated once per year. Downloading is prone to be slow (can take a full day) and converting them can take hours as well.
Replace '2018' with the current year throughout.
Replace '2019' with the current year throughout.
1. Install the GDAL library and python bindings and the unzip tool
# Ubuntu:
sudo apt-get install python-gdal unzip
# CentOS:
sudo yum install gdal-python unzip
sudo apt-get install python3-gdal unzip
2. Get the TIGER 2018 data. You will need the EDGES files
2. Get the TIGER 2019 data. You will need the EDGES files
(3,233 zip files, 11GB total).
wget -r ftp://ftp2.census.gov/geo/tiger/TIGER2018/EDGES/
wget -r ftp://ftp2.census.gov/geo/tiger/TIGER2019/EDGES/
3. Convert the data into SQL statements. Adjust the file paths in the scripts as needed
cd data-sources/us-tiger
./convert.sh <input-path> <output-path>
4. Maybe: package the created files
tar -czf tiger2018-nominatim-preprocessed.tar.gz tiger
tar -czf tiger2019-nominatim-preprocessed.tar.gz tiger

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/python3
# Tiger road data to OSM conversion script
# Creates Karlsruhe-style address ways beside the main way
# based on the Massachusetts GIS script by christopher schmidt
@@ -164,7 +164,7 @@ def parse_shp_for_geom_and_tags( filename ):
if (statefp != None) and (countyfp != None):
county_name = county_fips_data.get(statefp + '' + countyfp)
if county_name:
tags["tiger:county"] = county_name.encode("utf-8")
tags["tiger:county"] = county_name
# tlid = poFeature.GetField("TLID")
# if tlid != None:

View File

@@ -0,0 +1,58 @@
## Add Wikipedia and Wikidata to Nominatim
OSM contributors frequently tag items with links to Wikipedia and Wikidata. Nominatim can use the page ranking of Wikipedia pages to help indicate the relative importance of osm features. This is done by calculating an importance score between 0 and 1 based on the number of inlinks to an article for a location. If two places have the same name and one is more important than the other, the wikipedia score often points to the correct place.
These scripts extract and prepare both Wikipedia page rank and Wikidata links for use in Nominatim.
#### Create a new postgres DB for Processing
Due to the size of initial and intermediate tables, processing can be done in an external database:
```
CREATE DATABASE wikiprocessingdb;
```
---
Wikipedia
---
Processing these data requires a large amount of disk space (~1TB) and considerable time (>24 hours).
#### Import & Process Wikipedia tables
This step downloads and converts [Wikipedia](https://dumps.wikimedia.org/) page data SQL dumps to postgreSQL files which can be imported and processed with pagelink information from Wikipedia language sites to calculate importance scores.
- The script will processes data from whatever set of Wikipedia languages are specified in the initial languages array
- Note that processing the top 40 Wikipedia languages can take over a day, and will add nearly 1TB to the processing database. The final output tables will be approximately 11GB and 2GB in size
To download, convert, and import the data, then process summary statistics and compute importance scores, run:
```
./wikipedia_import.sh
```
---
Wikidata
---
This script downloads and processes Wikidata to enrich the previously created Wikipedia tables for use in Nominatim.
#### Import & Process Wikidata
This step downloads and converts [Wikidata](https://dumps.wikimedia.org/wikidatawiki/) page data SQL dumps to postgreSQL files which can be processed and imported into Nominatim database. Also utilizes Wikidata Query Service API to discover and include place types.
- Script presumes that the user has already processed Wikipedia tables as specified above
- Script requires wikidata_place_types.txt and wikidata_place_type_levles.csv
- script requires the [jq json parser](https://stedolan.github.io/jq/)
- Script processes data from whatever set of Wikipedia languages are specified in the initial languages array
- Script queries Wikidata Query Service API and imports all instances of place types listed in wikidata_place_types.txt
- Script updates wikipedia_articles table with extracted wikidata
By including Wikidata in the wikipedia_articles table, new connections can be made on the fly from the Nominatim placex table to wikipedia_article importance scores.
To download, convert, and import the data, then process required items, run:
```
./wikidata_import.sh
```

View File

@@ -0,0 +1,95 @@
#!/bin/bash
psqlcmd() {
psql wikiprocessingdb
}
mysql2pgsqlcmd() {
./mysql2pgsql.perl /dev/stdin /dev/stdout
}
# list the languages to process (refer to List of Wikipedias here: https://en.wikipedia.org/wiki/List_of_Wikipedias)
language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
# get a few wikidata dump tables
wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-geo_tags.sql.gz
wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-page.sql.gz
wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-wb_items_per_site.sql.gz
# import wikidata tables
gzip -dc wikidatawiki-latest-geo_tags.sql.gz | mysql2pgsqlcmd | psqlcmd
gzip -dc wikidatawiki-latest-page.sql.gz | mysql2pgsqlcmd | psqlcmd
gzip -dc wikidatawiki-latest-wb_items_per_site.sql.gz | mysql2pgsqlcmd | psqlcmd
# get wikidata places from wikidata query API
while read F ; do
wget "https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query=SELECT ?item WHERE{?item wdt:P31*/wdt:P279*wd:$F;}" -O $F.json
jq -r '.results | .[] | .[] | [.item.value] | @csv' $F.json >> $F.txt
awk -v qid=$F '{print $0 ","qid}' $F.txt | sed -e 's!"http://www.wikidata.org/entity/!!' | sed 's/"//g' >> $F.csv
cat $F.csv >> wikidata_place_dump.csv
rm $F.json $F.txt $F.csv
done < wikidata_place_types.txt
# import wikidata places
echo "CREATE TABLE wikidata_place_dump (item text, instance_of text);" | psqlcmd
echo "COPY wikidata_place_dump (item, instance_of) FROM '/srv/nominatim/Nominatim/data-sources/wikipedia-wikidata/wikidata_place_dump.csv' DELIMITER ',' CSV;" | psqlcmd
echo "CREATE TABLE wikidata_place_type_levels (place_type text, level integer);" | psqlcmd
echo "COPY wikidata_place_type_levels (place_type, level) FROM '/srv/nominatim/Nominatim/data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv' DELIMITER ',' CSV HEADER;" | psqlcmd
# create derived tables
echo "CREATE TABLE geo_earth_primary AS SELECT gt_page_id, gt_lat, gt_lon FROM geo_tags WHERE gt_globe = 'earth' AND gt_primary = 1 AND NOT( gt_lat < -90 OR gt_lat > 90 OR gt_lon < -180 OR gt_lon > 180 OR gt_lat=0 OR gt_lon=0) ;" | psqlcmd
echo "CREATE TABLE geo_earth_wikidata AS SELECT DISTINCT geo_earth_primary.gt_page_id, geo_earth_primary.gt_lat, geo_earth_primary.gt_lon, page.page_title, page.page_namespace FROM geo_earth_primary LEFT OUTER JOIN page ON (geo_earth_primary.gt_page_id = page.page_id) ORDER BY geo_earth_primary.gt_page_id;" | psqlcmd
echo "ALTER TABLE wikidata_place_dump ADD COLUMN ont_level integer, ADD COLUMN lat numeric(11,8), ADD COLUMN lon numeric(11,8);" | psqlcmd
echo "UPDATE wikidata_place_dump SET ont_level = wikidata_place_type_levels.level FROM wikidata_place_type_levels WHERE wikidata_place_dump.instance_of = wikidata_place_type_levels.place_type;" | psqlcmd
echo "CREATE TABLE wikidata_places AS SELECT DISTINCT ON (item) item, instance_of, MAX(ont_level) AS ont_level, lat, lon FROM wikidata_place_dump GROUP BY item, instance_of, ont_level, lat, lon ORDER BY item;" | psqlcmd
echo "UPDATE wikidata_places SET lat = geo_earth_wikidata.gt_lat, lon = geo_earth_wikidata.gt_lon FROM geo_earth_wikidata WHERE wikidata_places.item = geo_earth_wikidata.page_title" | psqlcmd
# process language pages
echo "CREATE TABLE wikidata_pages (item text, instance_of text, lat numeric(11,8), lon numeric(11,8), ips_site_page text, language text );" | psqlcmd
for i in "${language[@]}"
do
echo "CREATE TABLE wikidata_${i}_pages as select wikidata_places.item, wikidata_places.instance_of, wikidata_places.lat, wikidata_places.lon, wb_items_per_site.ips_site_page FROM wikidata_places LEFT JOIN wb_items_per_site ON (CAST (( LTRIM(wikidata_places.item, 'Q')) AS INTEGER) = wb_items_per_site.ips_item_id) WHERE ips_site_id = '${i}wiki' AND LEFT(wikidata_places.item,1) = 'Q' order by wikidata_places.item;" | psqlcmd
echo "ALTER TABLE wikidata_${i}_pages ADD COLUMN language text;" | psqlcmd
echo "UPDATE wikidata_${i}_pages SET language = '${i}';" | psqlcmd
echo "INSERT INTO wikidata_pages SELECT item, instance_of, lat, lon, ips_site_page, language FROM wikidata_${i}_pages;" | psqlcmd
done
echo "ALTER TABLE wikidata_pages ADD COLUMN wp_page_title text;" | psqlcmd
echo "UPDATE wikidata_pages SET wp_page_title = REPLACE(ips_site_page, ' ', '_');" | psqlcmd
echo "ALTER TABLE wikidata_pages DROP COLUMN ips_site_page;" | psqlcmd
# add wikidata to wikipedia_article table
echo "UPDATE wikipedia_article SET lat = wikidata_pages.lat, lon = wikidata_pages.lon, wd_page_title = wikidata_pages.item, instance_of = wikidata_pages.instance_of FROM wikidata_pages WHERE wikipedia_article.language = wikidata_pages.language AND wikipedia_article.title = wikidata_pages.wp_page_title;" | psqlcmd
echo "CREATE TABLE wikipedia_article_slim AS SELECT * FROM wikipedia_article WHERE wikidata_id IS NOT NULL;" | psqlcmd
echo "ALTER TABLE wikipedia_article RENAME TO wikipedia_article_full;" | psqlcmd
echo "ALTER TABLE wikipedia_article_slim RENAME TO wikipedia_article;" | psqlcmd
# clean up intermediate tables
echo "DROP TABLE wikidata_place_dump;" | psqlcmd
echo "DROP TABLE geo_earth_primary;" | psqlcmd
for i in "${language[@]}"
do
echo "DROP TABLE wikidata_${i}_pages;" | psqlcmd
done

View File

@@ -0,0 +1,77 @@
#!/bin/bash
psqlcmd() {
psql wikiprocessingdb
}
mysql2pgsqlcmd() {
./mysql2pgsql.perl /dev/stdin /dev/stdout
}
# list the languages to process (refer to List of Wikipedias here: https://en.wikipedia.org/wiki/List_of_Wikipedias)
language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
# create wikipedia calculation tables
echo "CREATE TABLE linkcounts (language text, title text, count integer, sumcount integer, lat double precision, lon double precision);" | psqlcmd
echo "CREATE TABLE wikipedia_article (language text NOT NULL, title text NOT NULL, langcount integer, othercount integer, totalcount integer, lat double precision, lon double precision, importance double precision, title_en text, osm_type character(1), osm_id bigint );" | psqlcmd
echo "CREATE TABLE wikipedia_redirect (language text, from_title text, to_title text );" | psqlcmd
# download individual wikipedia language tables
for i in "${language[@]}"
do
wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-page.sql.gz
wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-pagelinks.sql.gz
wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-langlinks.sql.gz
wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-redirect.sql.gz
done
# import individual wikipedia language tables
for i in "${language[@]}"
do
gzip -dc ${i}wiki-latest-pagelinks.sql.gz | sed "s/\`pagelinks\`/\`${i}pagelinks\`/g" | mysql2pgsqlcmd | psqlcmd
gzip -dc ${i}wiki-latest-page.sql.gz | sed "s/\`page\`/\`${i}page\`/g" | mysql2pgsqlcmd | psqlcmd
gzip -dc ${i}wiki-latest-langlinks.sql.gz | sed "s/\`langlinks\`/\`${i}langlinks\`/g" | mysql2pgsqlcmd | psqlcmd
gzip -dc ${i}wiki-latest-redirect.sql.gz | sed "s/\`redirect\`/\`${i}redirect\`/g" | mysql2pgsqlcmd | psqlcmd
done
# process language tables and associated pagelink counts
for i in "${language[@]}"
do
echo "create table ${i}pagelinkcount as select pl_title as title,count(*) as count from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | psqlcmd
echo "insert into linkcounts select '${i}',pl_title,count(*) from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | psqlcmd
echo "insert into wikipedia_redirect select '${i}',page_title,rd_title from ${i}redirect join ${i}page on (rd_from = page_id) where page_namespace = 0 and rd_namespace = 0;" | psqlcmd
echo "alter table ${i}pagelinkcount add column othercount integer;" | psqlcmd
echo "update ${i}pagelinkcount set othercount = 0;" | psqlcmd
for j in "${language[@]}"
do
echo "update ${i}pagelinkcount set othercount = ${i}pagelinkcount.othercount + x.count from (select page_title as title,count from ${i}langlinks join ${i}page on (ll_from = page_id) join ${j}pagelinkcount on (ll_lang = '${j}' and ll_title = title)) as x where x.title = ${i}pagelinkcount.title;" | psqlcmd
done
echo "insert into wikipedia_article select '${i}', title, count, othercount, count+othercount from ${i}pagelinkcount;" | psqlcmd
done
# calculate importance score for each wikipedia page
echo "update wikipedia_article set importance = log(totalcount)/log((select max(totalcount) from wikipedia_article))" | psqlcmd
# clean up intermediate tables to conserve space
for i in "${language[@]}"
do
echo "DROP TABLE ${i}pagelinks;" | psqlcmd
echo "DROP TABLE ${i}page;" | psqlcmd
echo "DROP TABLE ${i}langlinks;" | psqlcmd
echo "DROP TABLE ${i}redirect;" | psqlcmd
echo "DROP TABLE ${i}pagelinkcount;" | psqlcmd
done

View File

@@ -341,7 +341,7 @@ if (/(create\s+table\s+)([-_\w]+)\s/i) { # example: CREATE TABLE `english_engli
# in the foreign-key case it will only remove the foreign-key constraint, not the other table entirely.)
# (source: 8.1.3 docs, section "drop table")
warn "table $table will be dropped CASCADE\n";
$pre_create_sql .= "DROP TABLE $table CASCADE\\g\n"; # custom dumps may be missing the 'dump' commands
$pre_create_sql .= "DROP TABLE $table CASCADE;\n"; # custom dumps may be missing the 'dump' commands
}
s/(create\s+table\s+)([-_\w]+)\s/$1 $table /i;
@@ -367,6 +367,7 @@ if ($create_sql ne "") { # we are inside create table statement so lets
s/INSERT METHOD[=\s+][^;\s]+//i;
s/PASSWORD=[^;\s]+//i;
s/ROW_FORMAT=(?:DEFAULT|DYNAMIC|FIXED|COMPRESSED|REDUNDANT|COMPACT)+//i;
s/KEY_BLOCK_SIZE=8//i;
s/DELAY KEY WRITE=[^;\s]+//i;
s/INDEX DIRECTORY[=\s+][^;\s]+//i;
s/DATA DIRECTORY=[^;\s]+//i;
@@ -389,6 +390,7 @@ if ($create_sql ne "") { # we are inside create table statement so lets
s/DEFAULT CHARSET=[^;\s]+//i; # my mysql version is 4.1.11
s/ENGINE\s*=\s*[^;\s]+//i; # my mysql version is 4.1.11
s/ROW_FORMAT=[^;\s]+//i; # my mysql version is 5.0.22
s/KEY_BLOCK_SIZE=8//i;
s/MIN_ROWS=[^;\s]+//i;
s/MAX_ROWS=[^;\s]+//i;
s/AVG_ROW_LENGTH=[^;\s]+//i;

View File

@@ -0,0 +1,199 @@
place_type,level
Q9842,4
Q9430,3
Q928830,4
Q9259,1
Q91028,5
Q8514,2
Q8502,2
Q83405,3
Q82794,2
Q820477,1
Q811979,1
Q8072,2
Q79007,2
Q786014,3
Q75848,2
Q75520,2
Q728937,4
Q7275,2
Q719456,3
Q7075,3
Q697295,4
Q6852233,2
Q682943,3
Q665487,5
Q655686,3
Q643589,5
Q641226,2
Q631305,2
Q6256,2
Q6023295,2
Q5773747,5
Q56061,1
Q55659167,4
Q55488,4
Q55465477,3
Q54050,2
Q532,3
Q53060,2
Q52177058,4
Q515716,5
Q5153984,4
Q515,3
Q5144960,5
Q5119,4
Q5119,4
Q5107,2
Q5084,4
Q5031071,4
Q5003624,2
Q4989906,1
Q4976993,3
Q486972,1
Q486972,2
Q483110,3
Q4830453,4
Q47521,3
Q473972,1
Q46831,2
Q46614560,5
Q44782,3
Q44613,4
Q44539,4
Q44494,2
Q44377,2
Q4421,2
Q43501,2
Q4286337,3
Q42523,3
Q41176,2
Q40357,3
Q4022,4
Q40080,2
Q39816,2
Q39715,3
Q39614,1
Q3957,3
Q3947,4
Q3914,3
Q38723,2
Q38720,3
Q3623867,5
Q35666,2
Q355304,3
Q35509,2
Q35112127,3
Q34985575,4
Q34876,5
Q34763,2
Q34627,4
Q3455524,3
Q34442,4
Q33837,2
Q33506,3
Q32815,4
Q3257686,2
Q3240715,2
Q3191695,5
Q3153117,2
Q30198,2
Q30139652,3
Q294422,3
Q2870166,3
Q27686,3
Q274153,3
Q271669,1
Q2659904,2
Q24529780,2
Q24354,3
Q2354973,4
Q23442,2
Q23413,3
Q23397,3
Q2327515,4
Q2311958,5
Q22927291,6
Q22698,1
Q2175765,4
Q205495,4
Q204832,3
Q2042028,2
Q202216,6
Q1970725,3
Q194203,5
Q194195,2
Q190429,2
Q185187,3
Q185113,2
Q183366,2
Q1799794,1
Q1788454,4
Q1785071,3
Q1777138,3
Q177634,2
Q177380,2
Q174814,4
Q174782,2
Q17350442,2
Q17343829,3
Q17334923,0
Q17018380,3
Q16970,4
Q16917,3
Q16831714,4
Q165,3
Q160742,4
Q159719,3
Q159334,4
Q15640612,5
Q15324,2
Q15284,5
Q15243209,6
Q152081,1
Q15195406,4
Q1500350,5
Q149621,5
Q14757767,4
Q14350,3
Q1410668,3
Q1394476,3
Q1377575,2
Q1353183,3
Q134447,4
Q133215,3
Q133056,2
Q13221722,3
Q13220204,2
Q1311958,4
Q1303167,3
Q130003,3
Q12518,2
Q12516,3
Q1248784,3
Q123705,3
Q12323,3
Q12284,4
Q12280,4
Q121359,2
Q1210950,2
Q11755880,3
Q11707,3
Q11315,3
Q11303,3
Q1115575,4
Q1107656,1
Q10864048,1
Q1076486,2
Q105731,3
Q105190,3
Q1048525,3
Q102496,5
Q28872924,1
Q15617994,1
Q159313,2
Q24398318,3
Q327333,2
Q43229,1
Q860861,1
Q4989906,1
1 place_type level
2 Q9842 4
3 Q9430 3
4 Q928830 4
5 Q9259 1
6 Q91028 5
7 Q8514 2
8 Q8502 2
9 Q83405 3
10 Q82794 2
11 Q820477 1
12 Q811979 1
13 Q8072 2
14 Q79007 2
15 Q786014 3
16 Q75848 2
17 Q75520 2
18 Q728937 4
19 Q7275 2
20 Q719456 3
21 Q7075 3
22 Q697295 4
23 Q6852233 2
24 Q682943 3
25 Q665487 5
26 Q655686 3
27 Q643589 5
28 Q641226 2
29 Q631305 2
30 Q6256 2
31 Q6023295 2
32 Q5773747 5
33 Q56061 1
34 Q55659167 4
35 Q55488 4
36 Q55465477 3
37 Q54050 2
38 Q532 3
39 Q53060 2
40 Q52177058 4
41 Q515716 5
42 Q5153984 4
43 Q515 3
44 Q5144960 5
45 Q5119 4
46 Q5119 4
47 Q5107 2
48 Q5084 4
49 Q5031071 4
50 Q5003624 2
51 Q4989906 1
52 Q4976993 3
53 Q486972 1
54 Q486972 2
55 Q483110 3
56 Q4830453 4
57 Q47521 3
58 Q473972 1
59 Q46831 2
60 Q46614560 5
61 Q44782 3
62 Q44613 4
63 Q44539 4
64 Q44494 2
65 Q44377 2
66 Q4421 2
67 Q43501 2
68 Q4286337 3
69 Q42523 3
70 Q41176 2
71 Q40357 3
72 Q4022 4
73 Q40080 2
74 Q39816 2
75 Q39715 3
76 Q39614 1
77 Q3957 3
78 Q3947 4
79 Q3914 3
80 Q38723 2
81 Q38720 3
82 Q3623867 5
83 Q35666 2
84 Q355304 3
85 Q35509 2
86 Q35112127 3
87 Q34985575 4
88 Q34876 5
89 Q34763 2
90 Q34627 4
91 Q3455524 3
92 Q34442 4
93 Q33837 2
94 Q33506 3
95 Q32815 4
96 Q3257686 2
97 Q3240715 2
98 Q3191695 5
99 Q3153117 2
100 Q30198 2
101 Q30139652 3
102 Q294422 3
103 Q2870166 3
104 Q27686 3
105 Q274153 3
106 Q271669 1
107 Q2659904 2
108 Q24529780 2
109 Q24354 3
110 Q2354973 4
111 Q23442 2
112 Q23413 3
113 Q23397 3
114 Q2327515 4
115 Q2311958 5
116 Q22927291 6
117 Q22698 1
118 Q2175765 4
119 Q205495 4
120 Q204832 3
121 Q2042028 2
122 Q202216 6
123 Q1970725 3
124 Q194203 5
125 Q194195 2
126 Q190429 2
127 Q185187 3
128 Q185113 2
129 Q183366 2
130 Q1799794 1
131 Q1788454 4
132 Q1785071 3
133 Q1777138 3
134 Q177634 2
135 Q177380 2
136 Q174814 4
137 Q174782 2
138 Q17350442 2
139 Q17343829 3
140 Q17334923 0
141 Q17018380 3
142 Q16970 4
143 Q16917 3
144 Q16831714 4
145 Q165 3
146 Q160742 4
147 Q159719 3
148 Q159334 4
149 Q15640612 5
150 Q15324 2
151 Q15284 5
152 Q15243209 6
153 Q152081 1
154 Q15195406 4
155 Q1500350 5
156 Q149621 5
157 Q14757767 4
158 Q14350 3
159 Q1410668 3
160 Q1394476 3
161 Q1377575 2
162 Q1353183 3
163 Q134447 4
164 Q133215 3
165 Q133056 2
166 Q13221722 3
167 Q13220204 2
168 Q1311958 4
169 Q1303167 3
170 Q130003 3
171 Q12518 2
172 Q12516 3
173 Q1248784 3
174 Q123705 3
175 Q12323 3
176 Q12284 4
177 Q12280 4
178 Q121359 2
179 Q1210950 2
180 Q11755880 3
181 Q11707 3
182 Q11315 3
183 Q11303 3
184 Q1115575 4
185 Q1107656 1
186 Q10864048 1
187 Q1076486 2
188 Q105731 3
189 Q105190 3
190 Q1048525 3
191 Q102496 5
192 Q28872924 1
193 Q15617994 1
194 Q159313 2
195 Q24398318 3
196 Q327333 2
197 Q43229 1
198 Q860861 1
199 Q4989906 1

View File

@@ -0,0 +1,195 @@
Q9842
Q9430
Q928830
Q9259
Q91028
Q8514
Q8502
Q83405
Q82794
Q820477
Q811979
Q8072
Q79007
Q786014
Q75848
Q75520
Q728937
Q7275
Q719456
Q7075
Q697295
Q6852233
Q682943
Q665487
Q655686
Q643589
Q641226
Q631305
Q6256
Q6023295
Q5773747
Q56061
Q55659167
Q55488
Q55465477
Q54050
Q532
Q53060
Q52177058
Q515716
Q5153984
Q515
Q5144960
Q5119
Q5107
Q5084
Q5031071
Q5003624
Q4989906
Q4976993
Q486972
Q483110
Q4830453
Q47521
Q473972
Q46831
Q46614560
Q44782
Q44613
Q44539
Q44494
Q44377
Q4421
Q43501
Q4286337
Q42523
Q41176
Q40357
Q4022
Q40080
Q39816
Q39715
Q39614
Q3957
Q3947
Q3914
Q38723
Q38720
Q3623867
Q35666
Q355304
Q35509
Q35112127
Q34985575
Q34876
Q34763
Q34627
Q3455524
Q34442
Q33837
Q33506
Q32815
Q3257686
Q3240715
Q3191695
Q3153117
Q30198
Q30139652
Q294422
Q2870166
Q27686
Q274153
Q271669
Q2659904
Q24529780
Q24354
Q2354973
Q23442
Q23413
Q23397
Q2327515
Q2311958
Q22927291
Q22698
Q2175765
Q205495
Q204832
Q2042028
Q202216
Q1970725
Q194203
Q194195
Q190429
Q185187
Q185113
Q183366
Q1799794
Q1788454
Q1785071
Q1777138
Q177634
Q177380
Q174814
Q174782
Q17350442
Q17343829
Q17334923
Q17018380
Q16970
Q16917
Q16831714
Q165
Q160742
Q159719
Q159334
Q15640612
Q15324
Q15284
Q15243209
Q152081
Q15195406
Q1500350
Q149621
Q14757767
Q14350
Q1410668
Q1394476
Q1377575
Q1353183
Q134447
Q133215
Q133056
Q13221722
Q13220204
Q1311958
Q1303167
Q130003
Q12518
Q12516
Q1248784
Q123705
Q12323
Q12284
Q12280
Q121359
Q1210950
Q11755880
Q11707
Q11315
Q11303
Q1115575
Q1107656
Q10864048
Q1076486
Q105731
Q105190
Q1048525
Q102496
Q28872924
Q15617994
Q159313
Q24398318
Q327333
Q43229
Q860861

View File

@@ -0,0 +1,200 @@
## Wikidata place types and related OSM Tags
Wikidata does not have any official ontologies, however the [DBpedia project](https://wiki.dbpedia.org/) has created an [ontology](https://wiki.dbpedia.org/services-resources/ontology) that covered [place types](http://mappings.dbpedia.org/server/ontology/classes/#Place). The table below used the DBpedia place ontology as a starting point, and is provided as a cross-reference to the relevant OSM tags.
The Wikidata place types listed in the table below can be used in conjunction with the [Wikidata Query Service](https://query.wikidata.org/) to retrieve instances of those place types from the Wikidata knowledgebase.
```
SELECT ?item ?lat ?lon
WHERE {
?item wdt:P31*/wdt:P279*wd:Q9430; wdt:P625 ?pt.
?item p:P625?loc.
?loc psv:P625?cnode.
?cnode wikibase:geoLatitude?lat.
?cnode wikibase:geoLongitude?lon.
}
```
An example json return for all instances of the Wikidata item "Q9430" (Ocean) can be seen at [json](https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query=SELECT?item?lat?lon%20WHERE{?item%20wdt:P31*/wdt:P279*wd:Q9430;wdt:P625?pt.?item%20p:P625?loc.?loc%20psv:P625?cnode.?cnode%20wikibase:geoLatitude?lat.?cnode%20wikibase:geoLongitude?lon.})
**NOTE** the OSM tags listed are those listed in the wikidata entries, and not all the possible matches for tags within OSM.
title | concept | OSM Tag |
-----------|---------------------------------------|------------------|
[Q17334923](https://www.wikidata.org/entity/Q17334923) | Location | |
[Q811979](https://www.wikidata.org/entity/Q811979) | Architectural Structure | |
[Q194195](https://www.wikidata.org/entity/Q194195) | Amusement park |
[Q204832](https://www.wikidata.org/entity/Q204832) | Roller coaster | [attraction=roller_coaster](https://wiki.openstreetmap.org/wiki/Tag:attraction=roller_coaster) |
[Q2870166](https://www.wikidata.org/entity/Q2870166) | Water ride | |
[Q641226](https://www.wikidata.org/entity/Q641226) | Arena | [amenity=events_centre](https://wiki.openstreetmap.org/wiki/Tag:amenity=events_centre) |
[Q41176](https://www.wikidata.org/entity/Q41176) | Building | [building=yes](https://wiki.openstreetmap.org/wiki/Key:building) |
[Q1303167](https://www.wikidata.org/entity/Q1303167) | Barn | [building=barn](https://wiki.openstreetmap.org/wiki/Tag:building=barn) |
[Q655686](https://www.wikidata.org/entity/Q655686) | Commercial building | [building=commercial](https://wiki.openstreetmap.org/wiki/Tag:building=commercial) |
[Q4830453](https://www.wikidata.org/entity/Q4830453) | Business | |
[Q7075](https://www.wikidata.org/entity/Q7075) | Library | [amenity=library](https://wiki.openstreetmap.org/wiki/Tag:amenity=library) |
[Q133215](https://www.wikidata.org/entity/Q133215) | Casino | [amenity=casino](https://wiki.openstreetmap.org/wiki/Tag:amenity=casino) |
[Q23413](https://www.wikidata.org/entity/Q23413) | Castle | [historic=castle](https://wiki.openstreetmap.org/wiki/Tag:historic=castle) |
[Q83405](https://www.wikidata.org/entity/Q83405) | Factory | |
[Q53060](https://www.wikidata.org/entity/Q53060) | Gate | [barrier=gate](https://wiki.openstreetmap.org/wiki/Tag:barrier=gate) |cnode%20wikibase:geoLatitude?lat.?cnode%20wikibase:geoLongitude?lon.})
[Q11755880](https://www.wikidata.org/entity/Q11755880) | Residential Building | [building=residential](https://wiki.openstreetmap.org/wiki/Tag:building=residential) |
[Q3947](https://www.wikidata.org/entity/Q3947) | House | [building=house](https://wiki.openstreetmap.org/wiki/Tag:building=house) |
[Q35112127](https://www.wikidata.org/entity/Q35112127) | Historic Building | |
[Q5773747](https://www.wikidata.org/entity/Q5773747) | Historic house | |
[Q38723](https://www.wikidata.org/entity/Q38723) | Higher Education Institution |
[Q3914](https://www.wikidata.org/entity/Q3914) | School | [amenity=school](https://wiki.openstreetmap.org/wiki/Tag:amenity=school) |
[Q9842](https://www.wikidata.org/entity/Q9842) | Primary school | |
[Q159334](https://www.wikidata.org/entity/Q159334) | Secondary school | |
[Q16917](https://www.wikidata.org/entity/Q16917) | Hospital | [amenity=hospital](https://wiki.openstreetmap.org/wiki/Tag:amenity=hospital), [healthcare=hospital](https://wiki.openstreetmap.org/wiki/Tag:healthcare=hospital), [building=hospital](https://wiki.openstreetmap.org/wiki/Tag:building=hospital) |
[Q27686](https://www.wikidata.org/entity/Q27686) | Hotel | [tourism=hotel](https://wiki.openstreetmap.org/wiki/Tag:tourism=hotel), [building=hotel](https://wiki.openstreetmap.org/wiki/Tag:building=hotel) |
[Q33506](https://www.wikidata.org/entity/Q33506) | Museum | [tourism=museum](https://wiki.openstreetmap.org/wiki/Tag:tourism=museum) |
[Q40357](https://www.wikidata.org/entity/Q40357) | Prison | [amenity=prison](https://wiki.openstreetmap.org/wiki/Tag:amenity=prison) |
[Q24398318](https://www.wikidata.org/entity/Q24398318) | Religious Building | |
[Q160742](https://www.wikidata.org/entity/Q160742) | Abbey | |
[Q16970](https://www.wikidata.org/entity/Q16970) | Church (building) | [building=church](https://wiki.openstreetmap.org/wiki/Tag:building=church) |
[Q44613](https://www.wikidata.org/entity/Q44613) | Monastery | [amenity=monastery](https://wiki.openstreetmap.org/wiki/Tag:amenity=monastery) |
[Q32815](https://www.wikidata.org/entity/Q32815) | Mosque | [building=mosque](https://wiki.openstreetmap.org/wiki/Tag:building=mosque) |
[Q697295](https://www.wikidata.org/entity/Q697295) | Shrine | [building=shrine](https://wiki.openstreetmap.org/wiki/Tag:building=shrine) |
[Q34627](https://www.wikidata.org/entity/Q34627) | Synagogue | [building=synagogue](https://wiki.openstreetmap.org/wiki/Tag:building=synagogue) |
[Q44539](https://www.wikidata.org/entity/Q44539) | Temple | [building=temple](https://wiki.openstreetmap.org/wiki/Tag:building=temple) |
[Q11707](https://www.wikidata.org/entity/Q11707) | Restaurant | [amenity=restaurant](https://wiki.openstreetmap.org/wiki/Tag:amenity=restaurant) |
[Q11315](https://www.wikidata.org/entity/Q11315) | Shopping mall | [shop=mall](https://wiki.openstreetmap.org/wiki/Tag:shop=mall), [shop=shopping_centre](https://wiki.openstreetmap.org/wiki/Tag:shop=shopping_centre) |
[Q11303](https://www.wikidata.org/entity/Q11303) | Skyscraper | |
[Q17350442](https://www.wikidata.org/entity/Q17350442) | Venue | |
[Q41253](https://www.wikidata.org/entity/Q41253) | Movie Theater | [amenity=cinema](https://wiki.openstreetmap.org/wiki/Tag:amenity=cinema) |
[Q483110](https://www.wikidata.org/entity/Q483110) | Stadium | [leisure=stadium](https://wiki.openstreetmap.org/wiki/Tag:leisure=stadium), [building=stadium](https://wiki.openstreetmap.org/wiki/Tag:building=stadium) |
[Q24354](https://www.wikidata.org/entity/Q24354) | Theater (structure) | [amenity=theatre](https://wiki.openstreetmap.org/wiki/Tag:amenity=theatre) |
[Q121359](https://www.wikidata.org/entity/Q121359) | Infrastructure | |
[Q1248784](https://www.wikidata.org/entity/Q1248784) | Airport | |
[Q12323](https://www.wikidata.org/entity/Q12323) | Dam | [waterway=dam](https://wiki.openstreetmap.org/wiki/Tag:waterway=dam) |
[Q1353183](https://www.wikidata.org/entity/Q1353183) | Launch pad | |
[Q105190](https://www.wikidata.org/entity/Q105190) | Levee | [man_made=dyke](https://wiki.openstreetmap.org/wiki/Tag:man_made=dyke) |
[Q105731](https://www.wikidata.org/entity/Q105731) | Lock (water navigation) | [lock=yes](https://wiki.openstreetmap.org/wiki/Key:lock) |
[Q44782](https://www.wikidata.org/entity/Q44782) | Port | |
[Q159719](https://www.wikidata.org/entity/Q159719) | Power station | [power=plant](https://wiki.openstreetmap.org/wiki/Tag:power=plant) |
[Q174814](https://www.wikidata.org/entity/Q174814) | Electrical substation | |
[Q134447](https://www.wikidata.org/entity/Q134447) | Nuclear power plant | [plant:source=nuclear](https://wiki.openstreetmap.org/wiki/Tag:plant:source=nuclear) |
[Q786014](https://www.wikidata.org/entity/Q786014) | Rest area | [highway=rest_area](https://wiki.openstreetmap.org/wiki/Tag:highway=rest_area), [highway=services](https://wiki.openstreetmap.org/wiki/Tag:highway=services) |
[Q12280](https://www.wikidata.org/entity/Q12280) | Bridge | [bridge=* ](https://wiki.openstreetmap.org/wiki/Key:bridge), [man_made=bridge](https://wiki.openstreetmap.org/wiki/Tag:man_made=bridge) |
[Q728937](https://www.wikidata.org/entity/Q728937) | Railroad Line | [railway=rail](https://wiki.openstreetmap.org/wiki/Tag:railway=rail) |
[Q1311958](https://www.wikidata.org/entity/Q1311958) | Railway Tunnel | |
[Q34442](https://www.wikidata.org/entity/Q34442) | Road | [highway=* ](https://wiki.openstreetmap.org/wiki/Key:highway), [route=road](https://wiki.openstreetmap.org/wiki/Tag:route=road) |
[Q1788454](https://www.wikidata.org/entity/Q1788454) | Road junction | |
[Q44377](https://www.wikidata.org/entity/Q44377) | Tunnel | [tunnel=* ](https://wiki.openstreetmap.org/wiki/Key:tunnel) |
[Q5031071](https://www.wikidata.org/entity/Q5031071) | Canal tunnel | |
[Q719456](https://www.wikidata.org/entity/Q719456) | Station | [public_transport=station](https://wiki.openstreetmap.org/wiki/Tag:public_transport=station) |
[Q205495](https://www.wikidata.org/entity/Q205495) | Filling station | [amenity=fuel](https://wiki.openstreetmap.org/wiki/Tag:amenity=fuel) |
[Q928830](https://www.wikidata.org/entity/Q928830) | Metro station | [station=subway](https://wiki.openstreetmap.org/wiki/Tag:station=subway) |
[Q55488](https://www.wikidata.org/entity/Q55488) | Train station | [railway=station](https://wiki.openstreetmap.org/wiki/Tag:railway=station) |
[Q2175765](https://www.wikidata.org/entity/Q2175765) | Tram stop | [railway=tram_stop](https://wiki.openstreetmap.org/wiki/Tag:railway=tram_stop), [public_transport=stop_position](https://wiki.openstreetmap.org/wiki/Tag:public_transport=stop_position) |
[Q6852233](https://www.wikidata.org/entity/Q6852233) | Military building | |
[Q44494](https://www.wikidata.org/entity/Q44494) | Mill (grinding) | |
[Q185187](https://www.wikidata.org/entity/Q185187) | Watermill | [man_made=watermill](https://wiki.openstreetmap.org/wiki/Tag:man_made=watermill) |
[Q38720](https://www.wikidata.org/entity/Q38720) | Windmill | [man_made=windmill](https://wiki.openstreetmap.org/wiki/Tag:man_made=windmill) |
[Q4989906](https://www.wikidata.org/entity/Q4989906) | Monument | [historic=monument](https://wiki.openstreetmap.org/wiki/Tag:historic=monument) |
[Q5003624](https://www.wikidata.org/entity/Q5003624) | Memorial | [historic=memorial](https://wiki.openstreetmap.org/wiki/Tag:historic=memorial) |
[Q271669](https://www.wikidata.org/entity/Q271669) | Landform | |
[Q190429](https://www.wikidata.org/entity/Q190429) | Depression (geology) | |
[Q17018380](https://www.wikidata.org/entity/Q17018380) | Bight (geography) | |
[Q54050](https://www.wikidata.org/entity/Q54050) | Hill | |
[Q1210950](https://www.wikidata.org/entity/Q1210950) | Channel (geography) | |
[Q23442](https://www.wikidata.org/entity/Q23442) | Island | [place=island](https://wiki.openstreetmap.org/wiki/Tag:place=island) |
[Q42523](https://www.wikidata.org/entity/Q42523) | Atoll | |
[Q34763](https://www.wikidata.org/entity/Q34763) | Peninsula | |
[Q355304](https://www.wikidata.org/entity/Q355304) | Watercourse | |
[Q30198](https://www.wikidata.org/entity/Q30198) | Marsh | [wetland=marsh](https://wiki.openstreetmap.org/wiki/Tag:wetland=marsh) |
[Q75520](https://www.wikidata.org/entity/Q75520) | Plateau | |
[Q2042028](https://www.wikidata.org/entity/Q2042028) | Ravine | |
[Q631305](https://www.wikidata.org/entity/Q631305) | Rock formation | |
[Q12516](https://www.wikidata.org/entity/Q12516) | Pyramid | |
[Q1076486](https://www.wikidata.org/entity/Q1076486) | Sports venue | |
[Q682943](https://www.wikidata.org/entity/Q682943) | Cricket field | [sport=cricket](https://wiki.openstreetmap.org/wiki/Tag:sport=cricket) |
[Q1048525](https://www.wikidata.org/entity/Q1048525) | Golf course | [leisure=golf_course](https://wiki.openstreetmap.org/wiki/Tag:leisure=golf_course) |
[Q1777138](https://www.wikidata.org/entity/Q1777138) | Race track | [highway=raceway](https://wiki.openstreetmap.org/wiki/Tag:highway=raceway) |
[Q130003](https://www.wikidata.org/entity/Q130003) | Ski resort | |
[Q174782](https://www.wikidata.org/entity/Q174782) | Town square | [place=square](https://wiki.openstreetmap.org/wiki/Tag:place=square) |
[Q12518](https://www.wikidata.org/entity/Q12518) | Tower | [building=tower](https://wiki.openstreetmap.org/wiki/Tag:building=tower), [man_made=tower](https://wiki.openstreetmap.org/wiki/Tag:man_made=tower) |
[Q39715](https://www.wikidata.org/entity/Q39715) | Lighthouse | [man_made=lighthouse](https://wiki.openstreetmap.org/wiki/Tag:man_made=lighthouse) |
[Q274153](https://www.wikidata.org/entity/Q274153) | Water tower | [building=water_tower](https://wiki.openstreetmap.org/wiki/Tag:building=water_tower), [man_made=water_tower](https://wiki.openstreetmap.org/wiki/Tag:man_made=water_tower) |
[Q43501](https://www.wikidata.org/entity/Q43501) | Zoo | [tourism=zoo](https://wiki.openstreetmap.org/wiki/Tag:tourism=zoo) |
[Q39614](https://www.wikidata.org/entity/Q39614) | Cemetery | [amenity=grave_yard](https://wiki.openstreetmap.org/wiki/Tag:amenity=grave_yard), [landuse=cemetery](https://wiki.openstreetmap.org/wiki/Tag:landuse=cemetery) |
[Q152081](https://www.wikidata.org/entity/Q152081) | Concentration camp | |
[Q1107656](https://www.wikidata.org/entity/Q1107656) | Garden | [leisure=garden](https://wiki.openstreetmap.org/wiki/Tag:leisure=garden) |
[Q820477](https://www.wikidata.org/entity/Q820477) | Mine | |
[Q33837](https://www.wikidata.org/entity/Q33837) | Archipelago | [place=archipelago](https://wiki.openstreetmap.org/wiki/Tag:place=archipelago) |
[Q40080](https://www.wikidata.org/entity/Q40080) | Beach | [natural=beach](https://wiki.openstreetmap.org/wiki/Tag:natural=beach) |
[Q15324](https://www.wikidata.org/entity/Q15324) | Body of water | [natural=water](https://wiki.openstreetmap.org/wiki/Tag:natural=water) |
[Q23397](https://www.wikidata.org/entity/Q23397) | Lake | [water=lake](https://wiki.openstreetmap.org/wiki/Tag:water=lake) |
[Q9430](https://www.wikidata.org/entity/Q9430) | Ocean | |
[Q165](https://www.wikidata.org/entity/Q165) | Sea | |
[Q47521](https://www.wikidata.org/entity/Q47521) | Stream | |
[Q12284](https://www.wikidata.org/entity/Q12284) | Canal | [waterway=canal](https://wiki.openstreetmap.org/wiki/Tag:waterway=canal) |
[Q4022](https://www.wikidata.org/entity/Q4022) | River | [waterway=river](https://wiki.openstreetmap.org/wiki/Tag:waterway=river), [type=waterway](https://wiki.openstreetmap.org/wiki/Relation:waterway) |
[Q185113](https://www.wikidata.org/entity/Q185113) | Cape | [natural=cape](https://wiki.openstreetmap.org/wiki/Tag:natural=cape) |
[Q35509](https://www.wikidata.org/entity/Q35509) | Cave | [natural=cave_entrance](https://wiki.openstreetmap.org/wiki/Tag:natural=cave_entrance) |
[Q8514](https://www.wikidata.org/entity/Q8514) | Desert | |
[Q4421](https://www.wikidata.org/entity/Q4421) | Forest | [natural=wood](https://wiki.openstreetmap.org/wiki/Tag:natural=wood) |
[Q35666](https://www.wikidata.org/entity/Q35666) | Glacier | [natural=glacier](https://wiki.openstreetmap.org/wiki/Tag:natural=glacier) |
[Q177380](https://www.wikidata.org/entity/Q177380) | Hot spring | |
[Q8502](https://www.wikidata.org/entity/Q8502) | Mountain | [natural=peak](https://wiki.openstreetmap.org/wiki/Tag:natural=peak) |
[Q133056](https://www.wikidata.org/entity/Q133056) | Mountain pass | |
[Q46831](https://www.wikidata.org/entity/Q46831) | Mountain range | |
[Q39816](https://www.wikidata.org/entity/Q39816) | Valley | [natural=valley](https://wiki.openstreetmap.org/wiki/Tag:natural=valley) |
[Q8072](https://www.wikidata.org/entity/Q8072) | Volcano | [natural=volcano](https://wiki.openstreetmap.org/wiki/Tag:natural=volcano) |
[Q43229](https://www.wikidata.org/entity/Q43229) | Organization | |
[Q327333](https://www.wikidata.org/entity/Q327333) | Government agency | [office=government](https://wiki.openstreetmap.org/wiki/Tag:office=government)|
[Q22698](https://www.wikidata.org/entity/Q22698) | Park | [leisure=park](https://wiki.openstreetmap.org/wiki/Tag:leisure=park) |
[Q159313](https://www.wikidata.org/entity/Q159313) | Urban agglomeration | |
[Q177634](https://www.wikidata.org/entity/Q177634) | Community | |
[Q5107](https://www.wikidata.org/entity/Q5107) | Continent | [place=continent](https://wiki.openstreetmap.org/wiki/Tag:place=continent) |
[Q6256](https://www.wikidata.org/entity/Q6256) | Country | [place=country](https://wiki.openstreetmap.org/wiki/Tag:place=country) |
[Q75848](https://www.wikidata.org/entity/Q75848) | Gated community | |
[Q3153117](https://www.wikidata.org/entity/Q3153117) | Intercommunality | |
[Q82794](https://www.wikidata.org/entity/Q82794) | Region | |
[Q56061](https://www.wikidata.org/entity/Q56061) | Administrative division | [boundary=administrative](https://wiki.openstreetmap.org/wiki/Tag:boundary=administrative) |
[Q665487](https://www.wikidata.org/entity/Q665487) | Diocese | |
[Q4976993](https://www.wikidata.org/entity/Q4976993) | Parish | [boundary=civil_parish](https://wiki.openstreetmap.org/wiki/Tag:boundary=civil_parish) |
[Q194203](https://www.wikidata.org/entity/Q194203) | Arrondissements of France | |
[Q91028](https://www.wikidata.org/entity/Q91028) | Arrondissements of Belgium | |
[Q3623867](https://www.wikidata.org/entity/Q3623867) | Arrondissements of Benin | |
[Q2311958](https://www.wikidata.org/entity/Q2311958) | Canton (country subdivision) | [political_division=canton](https://wiki.openstreetmap.org/wiki/FR:Cantons_in_France) |
[Q643589](https://www.wikidata.org/entity/Q643589) | Department | |
[Q202216](https://www.wikidata.org/entity/Q202216) | Overseas department and region | |
[Q149621](https://www.wikidata.org/entity/Q149621) | District | [place=district](https://wiki.openstreetmap.org/wiki/Tag:place=district) |
[Q15243209](https://www.wikidata.org/wiki/Q15243209) | Historic district | |
[Q5144960](https://www.wikidata.org/entity/Q5144960) | Microregion | |
[Q15284](https://www.wikidata.org/entity/Q15284) | Municipality | |
[Q515716](https://www.wikidata.org/entity/Q515716) | Prefecture | |
[Q34876](https://www.wikidata.org/entity/Q34876) | Province | |
[Q3191695](https://www.wikidata.org/entity/Q3191695) | Regency (Indonesia) | |
[Q1970725](https://www.wikidata.org/entity/Q1970725) | Natural region | |
[Q486972](https://www.wikidata.org/entity/Q486972) | Human settlement | |
[Q515](https://www.wikidata.org/entity/Q515) | City | [place=city](https://wiki.openstreetmap.org/wiki/Tag:place=city) |
[Q5119](https://www.wikidata.org/entity/Q5119) | Capital city | [capital=yes](https://wiki.openstreetmap.org/wiki/Key:capital) |
[Q4286337](https://www.wikidata.org/entity/Q4286337) | City district | |
[Q1394476](https://www.wikidata.org/entity/Q1394476) | Civil township | |
[Q1115575](https://www.wikidata.org/entity/Q1115575) | Civil parish | [designation=civil_parish](https://wiki.openstreetmap.org/wiki/Tag:designation=civil_parish) |
[Q5153984](https://www.wikidata.org/entity/Q5153984) | Commune-level subdivisions | |
[Q123705](https://www.wikidata.org/entity/Q123705) | Neighbourhood | [place=neighbourhood](https://wiki.openstreetmap.org/wiki/Tag:place=neighbourhood) |
[Q1500350](https://www.wikidata.org/entity/Q1500350) | Townships of China | |
[Q17343829](https://www.wikidata.org/entity/Q17343829) | Unincorporated Community | |
[Q3957](https://www.wikidata.org/entity/Q3957) | Town | [place=town](https://wiki.openstreetmap.org/wiki/Tag:place=town) |
[Q532](https://www.wikidata.org/entity/Q532) | Village | [place=village](https://wiki.openstreetmap.org/wiki/Tag:place=village) |
[Q5084](https://www.wikidata.org/entity/Q5084) | Hamlet | [place=hamlet](https://wiki.openstreetmap.org/wiki/Tag:place=hamlet) |
[Q7275](https://www.wikidata.org/entity/Q7275) | State | |
[Q79007](https://www.wikidata.org/entity/Q79007) | Street | |
[Q473972](https://www.wikidata.org/entity/Q473972) | Protected area | [boundary=protected_area](https://wiki.openstreetmap.org/wiki/Tag:boundary=protected_area) |
[Q1377575](https://www.wikidata.org/entity/Q1377575) | Wildlife refuge | |
[Q1410668](https://www.wikidata.org/entity/Q1410668) | National Wildlife Refuge | [protection_title=National Wildlife Refuge](ownership=national), [ownership=national](https://wiki.openstreetmap.org/wiki/Tag:ownership=national)|
[Q9259](https://www.wikidata.org/entity/Q9259) | World Heritage Site | |
---
### Future Work
The Wikidata improvements to Nominatim can be further enhanced by:
- continuing to add new Wikidata links to OSM objects
- increasing the number of place types accounted for in the wikipedia_articles table
- working to use place types in the wikipedia_article matching process

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,16 @@
SET statement_timeout = 0;
SET client_encoding = 'UTF8';
SET check_function_bodies = false;
SET client_min_messages = warning;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
CREATE TABLE us_postcode (
postcode text,
x double precision,
y double precision
);

View File

@@ -18,6 +18,7 @@ ADD_CUSTOM_TARGET(doc
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/gb-postcodes/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/GB-Postcodes.md
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/country-grid/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/Country-Grid.md
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/country-grid/mexico.quad.png ${CMAKE_CURRENT_BINARY_DIR}/data-sources/mexico.quad.png
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/wikipedia-wikidata/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/Wikipedia-Wikidata.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-16.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-16.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md

View File

@@ -24,14 +24,14 @@ If the reported rank is 26 or higher, you can also safely add `--index-noanalyse
### PHP "open_basedir restriction in effect" warnings
`PHP Warning: file_get_contents(): open_basedir restriction in effect.`
PHP Warning: file_get_contents(): open_basedir restriction in effect.
You need to adjust the [open_basedir](http://www.php.net/manual/en/ini.core.php#ini.open-basedir) setting
You need to adjust the [open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir) setting
in your PHP configuration (`php.ini file`). By default this setting may look like this:
open_basedir = /srv/http/:/home/:/tmp/:/usr/share/pear/
Either add reported directories to the list or disable this setting temporarily by
Either add reported directories to the list or disable this setting temporarily by
dding ";" at the beginning of the line. Don't forget to enable this setting again
once you are done with the PHP command line operations.
@@ -44,9 +44,9 @@ The Apache log may contain lots of PHP warnings like this:
You should set the default time zone as instructed in the warning in
your `php.ini` file. Find the entry about timezone and set it to
something like this:
; Defines the default timezone used by the date functions
; http://php.net/date.timezone
; https://php.net/date.timezone
date.timezone = 'America/Denver'
Or
@@ -66,6 +66,14 @@ server development libraries (`postgresql-server-dev-9.5` on Ubuntu)
and recompile (`cmake .. && make`).
## I see the error "ERROR: permission denied for language c"
`nominatim.so`, written in C, is required to be installed on the database
server. Some managed database (cloud) services like Amazon RDS do not allow
this. There is currently no work-around other than installing a database
on a non-managed machine.
### I see the error: "function transliteration(text) does not exist"
Reinstall the nominatim functions with `setup.php --create--functions`
@@ -81,6 +89,14 @@ If you are using a flatnode file, then it may also be that the underlying
filesystem does not fully support 'mmap'. A notable candidate is virtualbox's
vboxfs.
### nominatim UPDATE failed: ERROR: buffer 179261 is not owned by resource owner Portal
Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168) during the initial import of the database. It's
something PostgreSQL internal Nominatim doesn't control. And PostgreSQL forums
suggest it's threading related but definitely some kind of crash of a process.
Users reported either rebooting the server, different hardware or just trying
the import again worked.
### The website shows: "Could not get word tokens"
The server cannot access your database. Add `&debug=1` to your URL
@@ -104,11 +120,8 @@ However, you can solve this the quick and dirty way by commenting out that line
### "must be an array or an object that implements Countable" warning in /usr/share/pear/DB.php
As reported starting PHP 7.2. This external DB library is no longer maintained and will be replaced in future Nominatim versions. In the meantime you'd have to manually change the line near 774 from
`if (!count($dsn)) {` to `if (!$dsn && !count($dsn))`. [More details](https://github.com/openstreetmap/Nominatim/issues/1184)
The warning started with PHP 7.2. Make sure you have at least [version 1.9.3 of PEAR DB](https://github.com/pear/DB/releases)
installed.
### Website reports "DB Error: insufficient permissions"
@@ -135,7 +148,7 @@ Example error message
CONTEXT: PL/pgSQL function make_standard_name(text) line 5 at assignment]
```
The Postgresql database, i.e. user postgres, needs to have access to that file.
The PostgreSQL database, i.e. user `postgres`, needs to have access to that file.
The permission need to be read & executable by everybody, e.g.
@@ -150,7 +163,7 @@ When running SELinux, make sure that the
### Setup.php fails with "DB Error: extension not found"
Make sure you have the Postgres extensions hstore and postgis installed.
Make sure you have the PostgreSQL extensions "hstore" and "postgis" installed.
See the installation instruction for a full list of required packages.
@@ -160,7 +173,7 @@ See the installation instruction for a full list of required packages.
The message is a bit misleading as PHP needs to load the file `DB.php` and
instead re-loads Nominatim's `db.php`. To solve this make sure you
have the [Pear module 'DB'](http://pear.php.net/package/DB/) installed.
have the [Pear module 'DB'](https://pear.php.net/package/DB/) installed.
sudo pear install DB
@@ -185,7 +198,7 @@ For updates you need to download the change files for each country
once per day and apply them **separately** using
./utils/update.php --import-diff <filename> --index
See [this issue](https://github.com/openstreetmap/Nominatim/issues/60#issuecomment-18679446)
for a script that runs the updates using osmosis.

View File

@@ -36,7 +36,7 @@ the directory exists. There should be at least 40GB of free space.
### Wikipedia rankings
Wikipedia can be used as an optional auxiliary data source to help indicate
the importance of osm features. Nominatim will work without this information
the importance of OSM features. Nominatim will work without this information
but it will improve the quality of the results if this is installed.
This data is available as a binary download:
@@ -45,18 +45,20 @@ This data is available as a binary download:
wget https://www.nominatim.org/data/wikipedia_redirect.sql.bin
Combined the 2 files are around 1.5GB and add around 30GB to the install
size of nominatim. They also increase the install time by an hour or so.
size of Nominatim. They also increase the install time by an hour or so.
*NOTE:* you'll need to download the Wikipedia rankings before performing
the initial import of the data if you want the rankings applied to the
loaded data.
### UK postcodes
### Great Britain, USA postcodes
Nominatim can use postcodes from an external source to improve searches that involve a UK postcode. This data can be optionally downloaded:
Nominatim can use postcodes from an external source to improve searches that
involve a GB or US postcode. This data can be optionally downloaded:
cd $NOMINATIM_SOURCE_DIR/data
wget https://www.nominatim.org/data/gb_postcode_data.sql.gz
wget https://www.nominatim.org/data/us_postcode_data.sql.gz
## Choosing the Data to Import
@@ -77,7 +79,7 @@ below. There are also
Please be aware that some extracts are not cut exactly along the country
boundaries. As a result some parts of the boundary may be missing which means
that cannot compute the areas for some administrative areas.
that Nominatim cannot compute the areas for some administrative areas.
### Dropping Data Required for Dynamic Updates
@@ -99,7 +101,7 @@ database or reuse the space later.
If you only want to use the Nominatim database for reverse lookups or
if you plan to use the installation only for exports to a
[photon](http://photon.komoot.de/) database, then you can set up a database
[photon](https://photon.komoot.de/) database, then you can set up a database
without search indexes. Add `--reverse-only` to your setup command above.
This saves about 5% of disk space.
@@ -121,7 +123,7 @@ import styles available which only read selected data:
The style can be changed with the configuration `CONST_Import_Style`.
To give you an idea of the impact of using the different style, the table
To give you an idea of the impact of using the different styles, the table
below gives rough estimates of the final database size after import of a
2018 planet and after using the `--drop` option. It also shows the time
needed for the import on a machine with 32GB RAM, 4 CPUS and SSDs. Note that
@@ -136,7 +138,7 @@ address | 59h | 500 GB | 260 GB
full | 80h | 575 GB | 300 GB
You can also customize the styles further. For an description of the
style format see [the developement section](../develop/Import.md).
style format see [the development section](../develop/Import.md).
## Initial import of the data
@@ -157,7 +159,7 @@ about the same size as the file you are importing but never more than
2/3 of RAM available. If your machine starts swapping reduce the size.
Computing word frequency for search terms can improve the performance of
forward geocoding in particular under high load as it helps Postgres' query
forward geocoding in particular under high load as it helps PostgreSQL's query
planner to make the right decisions. To recompute word counts run:
```sh
@@ -186,16 +188,16 @@ address set to complement the OSM house number data in the US. You can add
TIGER data to your own Nominatim instance by following these steps. The
entire US adds about 10GB to your database.
1. Get preprocessed TIGER 2018 data and unpack it into the
1. Get preprocessed TIGER 2019 data and unpack it into the
data directory in your Nominatim sources:
cd Nominatim/data
wget https://nominatim.org/data/tiger2018-nominatim-preprocessed.tar.gz
tar xf tiger2018-nominatim-preprocessed.tar.gz
wget https://nominatim.org/data/tiger2019-nominatim-preprocessed.tar.gz
tar xf tiger2019-nominatim-preprocessed.tar.gz
`data-source/us-tiger/README.md` explains how the data got preprocessed.
2. Import the data into your Nominatim database:
2. Import the data into your Nominatim database:
./utils/setup.php --import-tiger-data
@@ -212,20 +214,20 @@ entire US adds about 10GB to your database.
## Updates
There are many different possibilities to update your Nominatim database.
There are many different ways to update your Nominatim database.
The following section describes how to keep it up-to-date with Pyosmium.
For a list of other methods see the output of `./utils/update.php --help`.
#### Installing the newest version of Pyosmium
It is recommended to install Pyosmium via pip. Run (as the same user who
will later run the updates):
It is recommended to install Pyosmium via pip. Make sure to use python3.
Run (as the same user who will later run the updates):
```sh
pip install --user osmium
pip3 install --user osmium
```
Nominatim needs a tool called `pyosmium-get-updates`, which comes with
Nominatim needs a tool called `pyosmium-get-updates` which comes with
Pyosmium. You need to tell Nominatim where to find it. Add the
following line to your `settings/local.php`:
@@ -241,7 +243,7 @@ to update using the global minutely diffs.
If you want a different update source you will need to add some settings
to `settings/local.php`. For example, to use the daily country extracts
diffs for Ireland from geofabrik add the following:
diffs for Ireland from Geofabrik add the following:
// base URL of the replication service
@define('CONST_Replication_Url', 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates');
@@ -257,7 +259,7 @@ To set up the update process now run the following command:
It outputs the date where updates will start. Recheck that this date is
what you expect.
The --init-updates command needs to be rerun whenever the replication service
The `--init-updates` command needs to be rerun whenever the replication service
is changed.
#### Updating Nominatim

View File

@@ -34,28 +34,28 @@ osm2pgsql README for additional dependencies required for compiling osm2pgsql.
For running tests:
* [behave](http://pythonhosted.org/behave/)
* [Psycopg2](http://initd.org/psycopg)
* [Psycopg2](https://initd.org/psycopg)
* [nose](https://nose.readthedocs.io)
* [phpunit](https://phpunit.de)
For running Nominatim:
* [PostgreSQL](http://www.postgresql.org) (9.1 or later)
* [PostGIS](http://postgis.refractions.net) (2.0 or later)
* [PHP](http://php.net) (5.4 or later)
* [PostgreSQL](https://www.postgresql.org) (9.3 or later)
* [PostGIS](https://postgis.org) (2.2 or later)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
* [PEAR::DB](http://pear.php.net/package/DB)
* [PEAR::DB](https://pear.php.net/package/DB)
* a webserver (apache or nginx are recommended)
For running continuous updates:
* [pyosmium](http://osmcode.org/pyosmium/)
* [pyosmium](https://osmcode.org/pyosmium/) (with Python 3)
### Hardware
A minimum of 2GB of RAM is required or installation will fail. For a full
planet import 32GB of RAM or more strongly are recommended.
planet import 32GB of RAM or more are strongly recommended.
For a full planet install you will need at least 700GB of hard disk space
(take into account that the OSM database is growing fast). SSD disks

View File

@@ -3,11 +3,39 @@
This page describes database migrations necessary to update existing databases
to newer versions of Nominatim.
SQL statements should be executed from the postgres commandline. Execute
SQL statements should be executed from the PostgreSQL commandline. Execute
`psql nominatim` to enter command line mode.
## 3.3.0 -> 3.4.0
## 3.2.0 -> master
### Reorganisation of location_area_country table
The table `location_area_country` has been optimized. You need to switch to the
new format when you run updates. While updates are disabled, run the following
SQL commands:
```sql
CREATE TABLE location_area_country_new AS
SELECT place_id, country_code, geometry FROM location_area_country;
DROP TABLE location_area_country;
ALTER TABLE location_area_country_new RENAME TO location_area_country;
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry);
CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id);
```
## 3.2.0 -> 3.3.0
### New database connection string (DSN) format
Previously database connection setting (`CONST_Database_DSN` in `settings/*.php`) had the format
* (simple) `pgsql://@/nominatim`
* (complex) `pgsql://johndoe:secret@machine1.domain.com:1234/db1`
The new format is
* (simple) `pgsql:dbname=nominatim`
* (complex) `pgsql:dbname=db1;host=machine1.domain.com;port=1234;user=johndoe;password=secret`
### Natural Earth country boundaries no longer needed as fallback

View File

@@ -24,7 +24,7 @@ but the `class` parameter is left out, then one of the places will be chosen
at random and displayed.
```
https://nominatim.openstreetmap.org/details?placeid=<value>
https://nominatim.openstreetmap.org/details?place_id=<value>
```
Placeids are assigned sequentially during Nominatim data import. The id for a place is different between Nominatim installation (servers) and changes when data gets reimported. Therefore it can't be used as permanent id and shouldn't be used in bug reports.
@@ -40,7 +40,7 @@ See [Place Output Formats](Output.md) for details on each format. (Default: html
* `json_callback=<string>`
Wrap json output in a callback function (JSONP) i.e. `<string>(<json>)`.
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
* `pretty=[0|1]`

View File

@@ -7,11 +7,11 @@
Nominatim computes the address from two sources in the OpenStreetMap data:
from administrative boundaries and from place nodes. Boundaries are the more
useful source. They precisely describe an area. So it is very clear for
Nominatim if a point belongs to an area of not. Place nodes are more complicated.
These are only points without any precise extend. So Nominatim has to take a
guess and assume that an address belongs to the closest place nose it can find.
Nominatim if a point belongs to an area or not. Place nodes are more complicated.
These are only points without any precise extent. So Nominatim has to take a
guess and assume that an address belongs to the closest place node it can find.
In an ideal world, Nominatim would not need the place nodes but there are
many places on earth where there are not precise boundaries available for
many places on earth where there are no precise boundaries available for
all parts that make up an address. This is in particular true for the more
local address parts, like villages and suburbs. Therefore it is not possible
to completely dismiss place nodes. And sometimes they sneak in where they
@@ -21,7 +21,7 @@ As a OpenStreetMap mapper, you can improve the situation in two ways: if you
see a place node for which already an administrative area exists, then you
should _link_ the two by adding the node with a 'label' role to the boundary
relation. If there is no administrative area, you can add the approximate
extend of the place and tag it place=<something> as well.
extent of the place and tag it place=<something> as well.
#### 2. When doing reverse search, the address details have parts that don't contain the point I was looking up.
@@ -30,7 +30,7 @@ Reverse does not give you the address of the point you asked for. Reverse
returns the closest object to the point you asked for and then returns the
address of that object. Now, if you are close to a border, then the closest
object may be across that border. When Nominatim then returns the address,
contains the county/state/country across the border.
it contains the county/state/country across the border.
#### 3. I get different counties/states/countries when I change the zoom parameter in the reverse query. How is that possible?
@@ -45,12 +45,12 @@ sometimes the other for the closest point.
Nominatim assigns each map feature one country. Those outside any administrative
boundaries are assigned a special no-country. Continents or other super-national
administrations (e.g. European Union, NATO, Custom unions) are not supported,
administrations (e.g. European Union, NATO, Custom unions) are not supported,
see also [Administrative Boundary](https://wiki.openstreetmap.org/wiki/Tag:boundary%3Dadministrative#Super-national_administrations).
#### 5. Can you return the timezone?
See this separate OpenStreetMap-based project [Timezone Boundary Builder](https://github.com/evansiroky/timezone-boundary-builder)
See this separate OpenStreetMap-based project [Timezone Boundary Builder](https://github.com/evansiroky/timezone-boundary-builder).
#### 6. I want to download a list of streets/restaurants of a city/region

View File

@@ -19,13 +19,13 @@ Additional optional parameters are explained below.
### Output format
* `format=[html|xml|json|jsonv2|geojson|geocodejson]`
* `format=[xml|json|jsonv2|geojson|geocodejson]`
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
* `json_callback=<string>`
Wrap json output in a callback function (JSONP) i.e. `<string>(<json>)`.
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
### Output details

View File

@@ -1,6 +1,6 @@
# Place Output
The [\reverse](Reverse.md), [\search](Search.md) and [\lookup](Lookup.md)
The [/reverse](Reverse.md), [/search](Search.md) and [/lookup](Lookup.md)
API calls produce very similar output which is explained in this section.
There is one section for each format which is selectable via the `format`
parameter.
@@ -70,7 +70,7 @@ This is the same as the JSON format with two changes:
### GeoJSON
This format follows the [RFC7946](http://geojson.org). Every feature includes
This format follows the [RFC7946](https://geojson.org). Every feature includes
a bounding box (`bbox`).
The feature list has the following fields:
@@ -83,7 +83,7 @@ The feature list has the following fields:
* `importance` - computed importance rank
* `icon` - link to class icon (if available)
* `address` - dictionary of address details (only with `addressdetails=1`)
* `extratags` - dictionary with additional useful tags like website or maxspeed
* `extratags` - dictionary with additional useful tags like `website` or `maxspeed`
(only with `extratags=1`)
* `namedetails` - dictionary with full list of available names including ref etc.
@@ -120,7 +120,7 @@ formats depending on the API call.
```
<reversegeocode timestamp="Sat, 11 Aug 18 11:53:21 +0000"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright"
querystring="lat=48.400381&lon=11.745876&zoom=5&format=xml">
<result place_id="179509537" osm_type="relation" osm_id="2145268" ref="BY"
lat="48.9467562" lon="11.4038717"
@@ -154,7 +154,7 @@ The place information can be found in the `result` element. The attributes of th
* `lat`, `lon` - latitude and longitude of the centroid of the object
* `boundingbox` - comma-separated list of corner coordinates
The full address address of the result can be found in the content of the
The full address of the result can be found in the content of the
`result` element as a comma-separated list.
Additional information requested with `addressdetails=1`, `extratags=1` and
@@ -164,12 +164,12 @@ Additional information requested with `addressdetails=1`, `extratags=1` and
```
<searchresults timestamp="Sat, 11 Aug 18 11:55:35 +0000"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright"
querystring="london" polygon="false" exclude_place_ids="100149"
more_url="https://nominatim.openstreetmap.org/search.php?q=london&addressdetails=1&extratags=1&exclude_place_ids=100149&format=xml&accept-language=en-US%2Cen%3Bq%3D0.7%2Cde%3Bq%3D0.3">
<place place_id="100149" osm_type="node" osm_id="107775" place_rank="15"
boundingbox="51.3473219,51.6673219,-0.2876474,0.0323526" lat="51.5073219" lon="-0.1276474"
display_name="London, Greater London, England, SW1A 2DU, United Kingdom"
display_name="London, Greater London, England, SW1A 2DU, United Kingdom"
class="place" type="city" importance="0.9654895765402"
icon="https://nominatim.openstreetmap.org/images/mapicons/poi_place_city.p.20.png">
<extratags>
@@ -232,7 +232,7 @@ permanent for later use.
The combination `osm_type`+`osm_id` is slighly better but remember in
OpenStreetMap mappers can delete, split, recreate places (and those
get a new `osm_id`), there is no link between those old and new id.
get a new `osm_id`), there is no link between those old and new ids.
Places can also change their meaning without changing their `osm_id`,
e.g. when a restaurant is retagged as supermarket. For a more in-depth
discussion see [Permanent ID](https://wiki.openstreetmap.org/wiki/Permanent_ID).

View File

@@ -22,7 +22,7 @@ There are two ways how the requested location can be specified:
A specific OSM node(N), way(W) or relation(R) to return an address for.
In both cases exactly one object is returned. The two input paramters cannot
In both cases exactly one object is returned. The two input parameters cannot
be used at the same time. Both accept the additional optional parameters listed
below.
@@ -34,7 +34,7 @@ See [Place Output Formats](Output.md) for details on each format. (Default: html
* `json_callback=<string>`
Wrap json output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
### Output details
@@ -80,7 +80,8 @@ In terms of address details the zoom levels are as follows:
8 | county
10 | city
14 | suburb
16 | street
16 | major streets
17 | major and minor streets
18 | building
@@ -134,7 +135,7 @@ This overrides the specified machine readable format. (Default: 0)
<postcode>B72</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
</addressparts>
</addressparts>
</reversegeocode>
```
@@ -145,7 +146,7 @@ This overrides the specified machine readable format. (Default: 0)
```json
{
"place_id":"134140761",
"licence":"Data © OpenStreetMap contributors, ODbL 1.0. http:\/\/www.openstreetmap.org\/copyright",
"licence":"Data © OpenStreetMap contributors, ODbL 1.0. https:\/\/www.openstreetmap.org\/copyright",
"osm_type":"way",
"osm_id":"280940520",
"lat":"-34.4391708",

View File

@@ -1,6 +1,6 @@
# Search queries
The search API allows to look up a location from a textual description.
The search API allows you to look up a location from a textual description.
Nominatim supports structured as well as free-form search queries.
The search query may also contain
@@ -46,7 +46,7 @@ In this form, the query may be given through two different sets of parameters:
Structured requests are faster but are less robust against alternative
OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
All three query forms accept the additional paramters listed below.
All three query forms accept the additional parameters listed below.
### Output format
@@ -56,7 +56,7 @@ See [Place Output Formats](Output.md) for details on each format. (Default: html
* `json_callback=<string>`
Wrap json output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
### Output details
@@ -112,7 +112,8 @@ Limit the number of returned results. (Default: 10, Maximum: 50)
* `viewbox=<x1>,<y1>,<x2>,<y2>`
The preferred area to find search results. Any two corner points of the box
are accepted in any order as long as they span a real box.
are accepted in any order as long as they span a real box. `x` is longitude,
`y` is latitude.
* `bounded=[0|1]`
@@ -175,12 +176,12 @@ This overrides the specified machine readable format. (Default: 0)
```xml
<searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
<place
place_id="1620612" osm_type="node" osm_id="452010817"
boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
polygonpoints="[['-1.81592098644987','52.5487429714954'],['-1.81592290792183','52.5487234624632'],...]"
lat="52.5487429714954" lon="-1.81602098644987"
display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
<place
place_id="1620612" osm_type="node" osm_id="452010817"
boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
polygonpoints="[['-1.81592098644987','52.5487429714954'],['-1.81592290792183','52.5487234624632'],...]"
lat="52.5487429714954" lon="-1.81602098644987"
display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
class="place" type="house">
<house_number>135</house_number>
<road>Pilkington Avenue</road>
@@ -237,7 +238,7 @@ This overrides the specified machine readable format. (Default: 0)
##### JSON with address details
[https://nominatim.openstreetmap.org/?format=json&addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?format=json&addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
```json
{

View File

@@ -0,0 +1,36 @@
# Documentation Pages
The [Nominatim documentation](https://nominatim.org/release-docs/develop/) is built using the [MkDocs](https://www.mkdocs.org/) static site generation framework. The master branch is automatically deployed every night on under [https://nominatim.org/release-docs/develop/]()
To preview local changes:
1. Install MkDocs
```
pip3 install --user mkdocs
```
2. In build directory run
```
make doc
INFO - Cleaning site directory
INFO - Building documentation to directory: /home/vagrant/build/site-html
```
This runs `mkdocs build` plus extra transformion of some files and adds symlinks (see `CMakeLists.txt` for the exact steps).
3. Start webserver for local testing
```
mkdocs serve
[server:296] Serving on http://127.0.0.1:8000
[handlers:62] Start watching changes
```
If you develop inside a Vagrant virtual machine:
* add port forwarding to your Vagrantfile, e.g. `config.vm.network "forwarded_port", guest: 8000, host: 8000`
* use `mkdocs serve --dev-addr 0.0.0.0:8000` because the default localhost
IP does not get forwarded.

View File

@@ -24,7 +24,7 @@ with multiple tags that may constitute a principal tag. Take for example a
motorway bridge. In OSM, this would be a way which is tagged with
`highway=motorway` and `bridge=yes`. This way would appear in the `place` table
once with `class` of `highway` and once with a `class` of `bridge`. Thus the
*uique key* for `place` is (`osm_type`, `osm_id`, `class`).
*unique key* for `place` is (`osm_type`, `osm_id`, `class`).
## Configuring the Import
@@ -55,8 +55,8 @@ suffix match can be defined similarly with a string that starts with a `*`. Any
other string constitutes an exact match.
The second part of the rules defines a list of values and the properties that
apply to a successful match. Value strings may be either empty, which again
means that thy match against any value, or describe an exact match. Prefix
apply to a successful match. Value strings may be either empty, which
means that they match any value, or describe an exact match. Prefix
or suffix matching of values is not possible.
For a rule to match, it has to find a valid combination of keys and values. The
@@ -66,7 +66,7 @@ The rules in a configuration file are processed sequentially and the first
match for each tag wins.
A rule where key and value are the empty string is special. This defines the
fallback when none of the rules matches. The fallback is always used as a last
fallback when none of the rules match. The fallback is always used as a last
resort when nothing else matches, no matter where the rule appears in the file.
Defining multiple fallback rules is not allowed. What happens in this case,
is undefined.
@@ -121,17 +121,17 @@ One or more of the following properties may be given for each tag:
* `address`
At tag to the list of address tags. If the tag starts with `addr:` or
Add tag to the list of address tags. If the tag starts with `addr:` or
`is_in:`, then this prefix is cut off before adding it to the list.
* `postcode`
At the value as a postcode to the address tags. If multiple tags are
Add the value as a postcode to the address tags. If multiple tags are
candidate for postcodes, one wins out and the others are dropped.
* `country`
At the value as a country code to the address tags. The value must be a
Add the value as a country code to the address tags. The value must be a
two letter country code, otherwise it is ignored. If there are multiple
tags that match, then one wins out and the others are dropped.

View File

@@ -21,7 +21,7 @@ Usually only administrative boundaries and place nodes and areas are
eligible to be part of an address. All other objects have an address rank
of 0.
Note that the search rank of a place place a role in the address computation
Note that the search rank of a place plays a role in the address computation
as well. When collecting the places that should make up the address parts
then only places are taken into account that have a lower address rank than
the search rank of the base object.
@@ -37,7 +37,7 @@ into the database. There are a few hard-coded rules for the assignment:
* highway nodes
* landuse that is not an area
Other than that, the ranks can be freely assigned via the json file
Other than that, the ranks can be freely assigned via the JSON file
defined with `CONST_Address_Level_Config` according to their type and
the country they are in.
@@ -78,12 +78,13 @@ definition is used as a fallback, when nothing more specific for a given
country exists.
`tags` contains the ranks for key/value pairs. The ranks can be either a
single number, in which case they are to search and address rank, or a tuple
single number, in which case they are the search and address rank, or an array
of search and address rank (in that order). The value may be left empty.
Then the rank is used when no more specific value is found for the given
key.
Countries and key/value combination may appear in multiple defintions. Just
Countries and key/value combination may appear in multiple definitions. Just
make sure that each combination of counrty/key/value appears only once per
file. Otherwise the import will fail with a UNIQUE INDEX constraint violation
on import.

View File

@@ -1,6 +1,6 @@
# Basic Architecture
Nominatim provides geocoding based on OpenStreetMap data. It uses a Postgresql
Nominatim provides geocoding based on OpenStreetMap data. It uses a PostgreSQL
database as a backend for storing the data.
There are three basic parts to Nominatim's architecture: the data import,
@@ -15,10 +15,10 @@ the import can be found in the database table `place`.
The __address computation__ or __indexing__ stage takes the data from `place`
and adds additional information needed for geocoding. It ranks the places by
importance, links objects that belong together and computes addresses and
the search index. Most of this work is done in Pl/pqSQL via database triggers
the search index. Most of this work is done in PL/pgSQL via database triggers
and can be found in the file `sql/functions.sql`.
The __search frontend__ implements the actual API. It takes queries for
search and reverse geocoding queries from the user, looks up the data and
The __search frontend__ implements the actual API. It takes search
and reverse geocoding queries from the user, looks up the data and
returns the results in the requested format. This part is written in PHP
and can be found in the `lib/` and `website/` directories.

View File

@@ -22,11 +22,13 @@ pages:
- 'Overview' : 'develop/overview.md'
- 'OSM Data Import' : 'develop/Import.md'
- 'Place Ranking' : 'develop/Ranking.md'
- 'Documentation' : 'develop/Documentation.md'
- 'External Data Sources':
- 'Overview' : 'data-sources/overview.md'
- 'US Census (Tiger)': 'data-sources/US-Tiger.md'
- 'GB Postcodes': 'data-sources/GB-Postcodes.md'
- 'Country Grid': 'data-sources/Country-Grid.md'
- 'Wikipedia & Wikidata': 'data-sources/Wikipedia-Wikidata.md'
- 'Appendix':
- 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
- 'Installation on Ubuntu 16' : 'appendix/Install-on-Ubuntu-16.md'

View File

@@ -14,10 +14,10 @@ class AddressDetails
public function __construct(&$oDB, $iPlaceID, $sHousenumber, $mLangPref)
{
if (is_array($mLangPref)) {
$mLangPref = 'ARRAY['.join(',', array_map('getDBQuoted', $mLangPref)).']';
$mLangPref = $oDB->getArraySQL($oDB->getDBQuotedList($mLangPref));
}
if (!$sHousenumber) {
if (!isset($sHousenumber)) {
$sHousenumber = -1;
}
@@ -26,12 +26,12 @@ class AddressDetails
$sSQL .= ' FROM get_addressdata('.$iPlaceID.','.$sHousenumber.')';
$sSQL .= ' ORDER BY rank_address DESC, isaddress DESC';
$this->aAddressLines = chksql($oDB->getAll($sSQL));
$this->aAddressLines = $oDB->getAll($sSQL);
}
private static function isAddress($aLine)
{
return $aLine['isaddress'] == 't' || $aLine['type'] == 'country_code';
return $aLine['isaddress'] || $aLine['type'] == 'country_code';
}
public function getAddressDetails($bAll = false)
@@ -49,7 +49,7 @@ class AddressDetails
$sPrevResult = '';
foreach ($this->aAddressLines as $aLine) {
if ($aLine['isaddress'] == 't' && $sPrevResult != $aLine['localname']) {
if ($aLine['isaddress'] && $sPrevResult != $aLine['localname']) {
$sPrevResult = $aLine['localname'];
$aParts[] = $sPrevResult;
}

298
lib/DB.php Normal file
View File

@@ -0,0 +1,298 @@
<?php
namespace Nominatim;
require_once(CONST_BasePath.'/lib/DatabaseError.php');
/**
* Uses PDO to access the database specified in the CONST_Database_DSN
* setting.
*/
class DB
{
protected $connection;
public function __construct($sDSN = CONST_Database_DSN)
{
$this->sDSN = $sDSN;
}
public function connect($bNew = false, $bPersistent = true)
{
if (isset($this->connection) && !$bNew) {
return true;
}
$aConnOptions = array(
\PDO::ATTR_ERRMODE => \PDO::ERRMODE_EXCEPTION,
\PDO::ATTR_DEFAULT_FETCH_MODE => \PDO::FETCH_ASSOC,
\PDO::ATTR_PERSISTENT => $bPersistent
);
// https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
try {
$conn = new \PDO($this->sDSN, null, null, $aConnOptions);
} catch (\PDOException $e) {
$sMsg = 'Failed to establish database connection:' . $e->getMessage();
throw new \Nominatim\DatabaseError($sMsg, 500, null, $e->getMessage());
}
$conn->exec("SET DateStyle TO 'sql,european'");
$conn->exec("SET client_encoding TO 'utf-8'");
$iMaxExecution = ini_get('max_execution_time');
if ($iMaxExecution > 0) $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
$this->connection = $conn;
return true;
}
// returns the number of rows that were modified or deleted by the SQL
// statement. If no rows were affected returns 0.
public function exec($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
{
$val = null;
try {
if (isset($aInputVars)) {
$stmt = $this->connection->prepare($sSQL);
$stmt->execute($aInputVars);
} else {
$val = $this->connection->exec($sSQL);
}
} catch (\PDOException $e) {
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
}
return $val;
}
/**
* Executes query. Returns first row as array.
* Returns false if no result found.
*
* @param string $sSQL
*
* @return array[]
*/
public function getRow($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
{
try {
$stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
$row = $stmt->fetch();
} catch (\PDOException $e) {
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
}
return $row;
}
/**
* Executes query. Returns first value of first result.
* Returns false if no results found.
*
* @param string $sSQL
*
* @return array[]
*/
public function getOne($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
{
try {
$stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
$row = $stmt->fetch(\PDO::FETCH_NUM);
if ($row === false) return false;
} catch (\PDOException $e) {
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
}
return $row[0];
}
/**
* Executes query. Returns array of results (arrays).
* Returns empty array if no results found.
*
* @param string $sSQL
*
* @return array[]
*/
public function getAll($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
{
try {
$stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
$rows = $stmt->fetchAll();
} catch (\PDOException $e) {
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
}
return $rows;
}
/**
* Executes query. Returns array of the first value of each result.
* Returns empty array if no results found.
*
* @param string $sSQL
*
* @return array[]
*/
public function getCol($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
{
$aVals = array();
try {
$stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
while ($val = $stmt->fetchColumn(0)) { // returns first column or false
$aVals[] = $val;
}
} catch (\PDOException $e) {
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
}
return $aVals;
}
/**
* Executes query. Returns associate array mapping first value to second value of each result.
* Returns empty array if no results found.
*
* @param string $sSQL
*
* @return array[]
*/
public function getAssoc($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
{
try {
$stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
$aList = array();
while ($aRow = $stmt->fetch(\PDO::FETCH_NUM)) {
$aList[$aRow[0]] = $aRow[1];
}
} catch (\PDOException $e) {
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
}
return $aList;
}
/**
* Executes query. Returns a PDO statement to iterate over.
*
* @param string $sSQL
*
* @return PDOStatement
*/
public function getQueryStatement($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
{
try {
if (isset($aInputVars)) {
$stmt = $this->connection->prepare($sSQL);
$stmt->execute($aInputVars);
} else {
$stmt = $this->connection->query($sSQL);
}
} catch (\PDOException $e) {
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
}
return $stmt;
}
/**
* St. John's Way => 'St. John\'s Way'
*
* @param string $sVal Text to be quoted.
*
* @return string
*/
public function getDBQuoted($sVal)
{
return $this->connection->quote($sVal);
}
/**
* Like getDBQuoted, but takes an array.
*
* @param array $aVals List of text to be quoted.
*
* @return array[]
*/
public function getDBQuotedList($aVals)
{
return array_map(function ($sVal) {
return $this->getDBQuoted($sVal);
}, $aVals);
}
/**
* [1,2,'b'] => 'ARRAY[1,2,'b']''
*
* @param array $aVals List of text to be quoted.
*
* @return string
*/
public function getArraySQL($a)
{
return 'ARRAY['.join(',', $a).']';
}
/**
* Check if a table exists in the database. Returns true if it does.
*
* @param string $sTableName
*
* @return boolean
*/
public function tableExists($sTableName)
{
$sSQL = 'SELECT count(*) FROM pg_tables WHERE tablename = :tablename';
return ($this->getOne($sSQL, array(':tablename' => $sTableName)) == 1);
}
/**
* Since the DSN includes the database name, checks if the connection works.
*
* @return boolean
*/
public function databaseExists()
{
$bExists = true;
try {
$this->connect(true);
} catch (\Nominatim\DatabaseError $e) {
$bExists = false;
}
return $bExists;
}
/**
* e.g. 9.6, 10, 11.2
*
* @return float
*/
public function getPostgresVersion()
{
$sVersionString = $this->getOne('SHOW server_version_num');
preg_match('#([0-9]?[0-9])([0-9][0-9])[0-9][0-9]#', $sVersionString, $aMatches);
return (float) ($aMatches[1].'.'.$aMatches[2]);
}
/**
* e.g. 2, 2.2
*
* @return float
*/
public function getPostgisVersion()
{
$sVersionString = $this->getOne('select postgis_lib_version()');
preg_match('#^([0-9]+)[.]([0-9]+)[.]#', $sVersionString, $aMatches);
return (float) ($aMatches[1].'.'.$aMatches[2]);
}
public static function parseDSN($sDSN)
{
// https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
$aInfo = array();
if (preg_match('/^pgsql:(.+)/', $sDSN, $aMatches)) {
foreach (explode(';', $aMatches[1]) as $sKeyVal) {
list($sKey, $sVal) = explode('=', $sKeyVal, 2);
if ($sKey == 'host') $sKey = 'hostspec';
if ($sKey == 'dbname') $sKey = 'database';
if ($sKey == 'user') $sKey = 'username';
$aInfo[$sKey] = $sVal;
}
}
return $aInfo;
}
}

View File

@@ -5,10 +5,12 @@ namespace Nominatim;
class DatabaseError extends \Exception
{
public function __construct($message, $code = 500, Exception $previous = null, $oSql)
public function __construct($message, $code = 500, Exception $previous = null, $oPDOErr, $sSql = null)
{
parent::__construct($message, $code, $previous);
$this->oSql = $oSql;
// https://secure.php.net/manual/en/class.pdoexception.php
$this->oPDOErr = $oPDOErr;
$this->sSql = $sSql;
}
public function __toString()
@@ -18,15 +20,15 @@ class DatabaseError extends \Exception
public function getSqlError()
{
return $this->oSql->getMessage();
return $this->oPDOErr->getMessage();
}
public function getSqlDebugDump()
{
if (CONST_Debug) {
return var_export($this->oSql, true);
return var_export($this->oPDOErr, true);
} else {
return $this->oSql->getUserInfo();
return $this->sSql;
}
}
}

View File

@@ -348,10 +348,7 @@ class Geocode
$aNewPhraseSearches = array();
$sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) {
// Too many permutations - too expensive
if ($iWordSet > 120) break;
foreach ($oPhrase->getWordSets() as $aWordset) {
$aWordsetSearches = $aSearches;
// Add all words from this wordset
@@ -527,8 +524,8 @@ class Geocode
$sNormQuery = $this->normTerm($this->sQuery);
Debug::printVar('Normalized query', $sNormQuery);
$sLanguagePrefArraySQL = getArraySQL(
array_map('getDBQuoted', $this->aLangPrefOrder)
$sLanguagePrefArraySQL = $this->oDB->getArraySQL(
$this->oDB->getDBQuotedList($this->aLangPrefOrder)
);
$sQuery = $this->sQuery;
@@ -581,8 +578,9 @@ class Geocode
if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
$sSpecialTerm = pg_escape_string($sSpecialTerm);
$sToken = chksql(
$this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"),
$sToken = $this->oDB->getOne(
'SELECT make_standard_name(:term)',
array(':term' => $sSpecialTerm),
'Cannot decode query. Wrong encoding?'
);
$sSQL = 'SELECT class, type FROM word ';
@@ -590,7 +588,7 @@ class Geocode
$sSQL .= ' AND class is not null AND class not in (\'place\')';
Debug::printSQL($sSQL);
$aSearchWords = chksql($this->oDB->getAll($sSQL));
$aSearchWords = $this->oDB->getAll($sSQL);
$aNewSearches = array();
foreach ($aSearches as $oSearch) {
foreach ($aSearchWords as $aSearchTerm) {
@@ -628,8 +626,9 @@ class Geocode
$aTokens = array();
$aPhrases = array();
foreach ($aInPhrases as $iPhrase => $sPhrase) {
$sPhrase = chksql(
$this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'),
$sPhrase = $this->oDB->getOne(
'SELECT make_standard_name(:phrase)',
array(':phrase' => $sPhrase),
'Cannot normalize query string (is it a UTF-8 string?)'
);
if (trim($sPhrase)) {
@@ -639,7 +638,6 @@ class Geocode
}
}
Debug::printDebugTable('Phrases', $aPhrases);
Debug::printVar('Tokens', $aTokens);
$oValidTokens = new TokenList();
@@ -647,7 +645,7 @@ class Geocode
if (!empty($aTokens)) {
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
$sSQL .= ' FROM word ';
$sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')';
$sSQL .= ' WHERE word_token in ('.join(',', $this->oDB->getDBQuotedList($aTokens)).')';
Debug::printSQL($sSQL);
@@ -684,6 +682,11 @@ class Geocode
Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
foreach ($aPhrases as $oPhrase) {
$oPhrase->computeWordSets($oValidTokens);
}
Debug::printDebugTable('Phrases', $aPhrases);
Debug::newSection('Search candidates');
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
@@ -830,7 +833,7 @@ class Geocode
if ($aFilterSql) {
$sSQL = join(' UNION ', $aFilterSql);
Debug::printSQL($sSQL);
$aFilteredIDs = chksql($this->oDB->getCol($sSQL));
$aFilteredIDs = $this->oDB->getCol($sSQL);
}
$tempIDs = array();

View File

@@ -9,7 +9,8 @@ namespace Nominatim;
*/
class Phrase
{
const MAX_DEPTH = 7;
const MAX_WORDSET_LEN = 20;
const MAX_WORDSETS = 100;
// Complete phrase as a string.
private $sPhrase;
@@ -20,13 +21,24 @@ class Phrase
// Possible segmentations of the phrase.
private $aWordSets;
public static function cmpByArraylen($aA, $aB)
{
$iALen = count($aA);
$iBLen = count($aB);
if ($iALen == $iBLen) {
return 0;
}
return ($iALen < $iBLen) ? -1 : 1;
}
public function __construct($sPhrase, $sPhraseType)
{
$this->sPhrase = trim($sPhrase);
$this->sPhraseType = $sPhraseType;
$this->aWords = explode(' ', $this->sPhrase);
$this->aWordSets = $this->createWordSets($this->aWords, 0);
}
/**
@@ -60,10 +72,17 @@ class Phrase
*/
public function addTokens(&$aTokens)
{
foreach ($this->aWordSets as $aSet) {
foreach ($aSet as $sWord) {
$aTokens[' '.$sWord] = ' '.$sWord;
$aTokens[$sWord] = $sWord;
$iNumWords = count($this->aWords);
for ($i = 0; $i < $iNumWords; $i++) {
$sPhrase = $this->aWords[$i];
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
$aTokens[$sPhrase] = $sPhrase;
for ($j = $i + 1; $j < $iNumWords; $j++) {
$sPhrase .= ' '.$this->aWords[$j];
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
$aTokens[$sPhrase] = $sPhrase;
}
}
}
@@ -75,44 +94,59 @@ class Phrase
*/
public function invertWordSets()
{
$this->aWordSets = $this->createInverseWordSets($this->aWords, 0);
foreach ($this->aWordSets as $i => $aSet) {
$this->aWordSets[$i] = array_reverse($aSet);
}
}
private function createWordSets($aWords, $iDepth)
public function computeWordSets($oTokens)
{
$aResult = array(array(join(' ', $aWords)));
$sFirstToken = '';
if ($iDepth < Phrase::MAX_DEPTH) {
while (count($aWords) > 1) {
$sWord = array_shift($aWords);
$sFirstToken .= ($sFirstToken?' ':'').$sWord;
$aRest = $this->createWordSets($aWords, $iDepth + 1);
foreach ($aRest as $aSet) {
$aResult[] = array_merge(array($sFirstToken), $aSet);
$iNumWords = count($this->aWords);
// Caches the word set for the partial phrase up to word i.
$aSetCache = array_fill(0, $iNumWords, array());
// Initialise first element of cache. There can only be the word.
if ($oTokens->containsAny($this->aWords[0])) {
$aSetCache[0][] = array($this->aWords[0]);
}
// Now do the next elements using what we already have.
for ($i = 1; $i < $iNumWords; $i++) {
for ($j = $i; $j > 0; $j--) {
$sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
$aPartial = array($sPartial);
foreach ($aSetCache[$j - 1] as $aSet) {
if (count($aSet) < Phrase::MAX_WORDSET_LEN) {
$aSetCache[$i][] = array_merge($aSet, $aPartial);
}
}
if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) {
usort(
$aSetCache[$i],
array('\Nominatim\Phrase', 'cmpByArraylen')
);
$aSetCache[$i] = array_slice(
$aSetCache[$i],
0,
Phrase::MAX_WORDSETS
);
}
}
}
// finally the current full phrase
$sPartial = $this->aWords[0].' '.$sPartial;
if ($oTokens->containsAny($sPartial)) {
$aSetCache[$i][] = array($sPartial);
}
}
return $aResult;
$this->aWordSets = $aSetCache[$iNumWords - 1];
usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen'));
$this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS);
}
private function createInverseWordSets($aWords, $iDepth)
{
$aResult = array(array(join(' ', $aWords)));
$sFirstToken = '';
if ($iDepth < Phrase::MAX_DEPTH) {
while (count($aWords) > 1) {
$sWord = array_pop($aWords);
$sFirstToken = $sWord.($sFirstToken?' ':'').$sFirstToken;
$aRest = $this->createInverseWordSets($aWords, $iDepth + 1);
foreach ($aRest as $aSet) {
$aResult[] = array_merge(array($sFirstToken), $aSet);
}
}
}
return $aResult;
}
public function debugInfo()
{

View File

@@ -52,7 +52,7 @@ class PlaceLookup
{
$aLangs = $oParams->getPreferredLanguages();
$this->aLangPrefOrderSql =
'ARRAY['.join(',', array_map('getDBQuoted', $aLangs)).']';
'ARRAY['.join(',', $this->oDB->getDBQuotedList($aLangs)).']';
$this->bExtraTags = $oParams->getBool('extratags', false);
$this->bNameDetails = $oParams->getBool('namedetails', false);
@@ -132,8 +132,9 @@ class PlaceLookup
public function setLanguagePreference($aLangPrefOrder)
{
$this->aLangPrefOrderSql =
'ARRAY['.join(',', array_map('getDBQuoted', $aLangPrefOrder)).']';
$this->aLangPrefOrderSql = $this->oDB->getArraySQL(
$this->oDB->getDBQuotedList($aLangPrefOrder)
);
}
private function addressImportanceSql($sGeometry, $sPlaceId)
@@ -162,8 +163,8 @@ class PlaceLookup
public function lookupOSMID($sType, $iID)
{
$sSQL = "select place_id from placex where osm_type = '".$sType."' and osm_id = ".$iID;
$iPlaceID = chksql($this->oDB->getOne($sSQL));
$sSQL = 'select place_id from placex where osm_type = :type and osm_id = :id';
$iPlaceID = $this->oDB->getOne($sSQL, array(':type' => $sType, ':id' => $iID));
if (!$iPlaceID) {
return null;
@@ -424,9 +425,10 @@ class PlaceLookup
$sSQL = join(' UNION ', $aSubSelects);
Debug::printSQL($sSQL);
$aPlaces = chksql($this->oDB->getAll($sSQL), 'Could not lookup place');
$aPlaces = $this->oDB->getAll($sSQL, null, 'Could not lookup place');
foreach ($aPlaces as &$aPlace) {
$aPlace['importance'] = (float) $aPlace['importance'];
if ($this->bAddressDetails) {
// to get addressdetails for tiger data, the housenumber is needed
$aPlace['address'] = new AddressDetails(
@@ -513,9 +515,9 @@ class PlaceLookup
$sSQL .= $sFrom;
}
$aPointPolygon = chksql($this->oDB->getRow($sSQL), 'Could not get outline');
$aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');
if ($aPointPolygon['place_id']) {
if ($aPointPolygon && $aPointPolygon['place_id']) {
if ($aPointPolygon['centrelon'] !== null && $aPointPolygon['centrelat'] !== null) {
$aOutlineResult['lat'] = $aPointPolygon['centrelat'];
$aOutlineResult['lon'] = $aPointPolygon['centrelon'];

View File

@@ -36,8 +36,8 @@ class ReverseGeocode
13 => 18,
14 => 22, // Suburb
15 => 22,
16 => 26, // Street, TODO: major street?
17 => 26,
16 => 26, // major street
17 => 27, // minor street
18 => 30, // or >, Building
19 => 30, // or >, Building
);
@@ -63,8 +63,9 @@ class ReverseGeocode
$sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
$sSQL .= ' ORDER BY distance ASC limit 1';
return chksql(
$this->oDB->getRow($sSQL),
return $this->oDB->getRow(
$sSQL,
null,
'Could not determine closest housenumber on an osm interpolation line.'
);
}
@@ -92,8 +93,9 @@ class ReverseGeocode
$sSQL = 'SELECT country_code FROM country_osm_grid';
$sSQL .= ' WHERE ST_CONTAINS(geometry, '.$sPointSQL.') LIMIT 1';
$sCountryCode = chksql(
$this->oDB->getOne($sSQL),
$sCountryCode = $this->oDB->getOne(
$sSQL,
null,
'Could not determine country polygon containing the point.'
);
if ($sCountryCode) {
@@ -115,10 +117,7 @@ class ReverseGeocode
$sSQL .= ' LIMIT 1';
if (CONST_Debug) var_dump($sSQL);
$aPlace = chksql(
$this->oDB->getRow($sSQL),
'Could not determine place node.'
);
$aPlace = $this->oDB->getRow($sSQL, null, 'Could not determine place node.');
if ($aPlace) {
return new Result($aPlace['place_id']);
}
@@ -134,10 +133,7 @@ class ReverseGeocode
$sSQL .= ' ORDER BY distance ASC';
if (CONST_Debug) var_dump($sSQL);
$aPlace = chksql(
$this->oDB->getRow($sSQL),
'Could not determine place node.'
);
$aPlace = $this->oDB->getRow($sSQL, null, 'Could not determine place node.');
if ($aPlace) {
return new Result($aPlace['place_id']);
}
@@ -178,10 +174,8 @@ class ReverseGeocode
$sSQL .= ' WHERE ST_CONTAINS(geometry, '.$sPointSQL.' )';
$sSQL .= ' ORDER BY rank_address DESC LIMIT 1';
$aPoly = chksql(
$this->oDB->getRow($sSQL),
'Could not determine polygon containing the point.'
);
$aPoly = $this->oDB->getRow($sSQL, null, 'Could not determine polygon containing the point.');
if ($aPoly) {
// if a polygon is found, search for placenodes begins ...
$iParentPlaceID = $aPoly['parent_place_id'];
@@ -213,10 +207,7 @@ class ReverseGeocode
$sSQL .= ' LIMIT 1';
if (CONST_Debug) var_dump($sSQL);
$aPlacNode = chksql(
$this->oDB->getRow($sSQL),
'Could not determine place node.'
);
$aPlacNode = $this->oDB->getRow($sSQL, null, 'Could not determine place node.');
if ($aPlacNode) {
return $aPlacNode;
}
@@ -255,12 +246,7 @@ class ReverseGeocode
$sSQL .= ' placex';
$sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', geometry, '.$fSearchDiam.')';
$sSQL .= ' AND';
// only streets
if ($iMaxRank == 26) {
$sSQL .= ' rank_address = 26';
} else {
$sSQL .= ' rank_address between 26 and '.$iMaxRank;
}
$sSQL .= ' rank_address between 26 and '.$iMaxRank;
$sSQL .= ' and (name is not null or housenumber is not null';
$sSQL .= ' or rank_address between 26 and 27)';
$sSQL .= ' and (rank_address between 26 and 27';
@@ -271,10 +257,7 @@ class ReverseGeocode
$sSQL .= ' OR ST_DWithin('.$sPointSQL.', centroid, '.$fSearchDiam.'))';
$sSQL .= ' ORDER BY distance ASC limit 1';
if (CONST_Debug) var_dump($sSQL);
$aPlace = chksql(
$this->oDB->getRow($sSQL),
'Could not determine closest place.'
);
$aPlace = $this->oDB->getRow($sSQL, null, 'Could not determine closest place.');
if (CONST_Debug) var_dump($aPlace);
if ($aPlace) {
@@ -316,17 +299,14 @@ class ReverseGeocode
// radius ?
$sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', geometry, 0.001)';
$sSQL .= ' AND parent_place_id = '.$iPlaceID;
$sSQL .= ' and rank_address != 28';
$sSQL .= ' and rank_address > 28';
$sSQL .= ' and ST_GeometryType(geometry) != \'ST_LineString\'';
$sSQL .= ' and (name is not null or housenumber is not null)';
$sSQL .= ' and class not in (\'boundary\')';
$sSQL .= ' and indexed_status = 0 and linked_place_id is null';
$sSQL .= ' ORDER BY distance ASC limit 1';
if (CONST_Debug) var_dump($sSQL);
$aStreet = chksql(
$this->oDB->getRow($sSQL),
'Could not determine closest place.'
);
$aStreet = $this->oDB->getRow($sSQL, null, 'Could not determine closest place.');
if ($aStreet) {
if (CONST_Debug) var_dump($aStreet);
$oResult = new Result($aStreet['place_id']);
@@ -347,10 +327,7 @@ class ReverseGeocode
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', linegeo, 0.001)';
$sSQL .= ' ORDER BY distance ASC limit 1';
if (CONST_Debug) var_dump($sSQL);
$aPlaceTiger = chksql(
$this->oDB->getRow($sSQL),
'Could not determine closest Tiger place.'
);
$aPlaceTiger = $this->oDB->getRow($sSQL, null, 'Could not determine closest Tiger place.');
if ($aPlaceTiger) {
if (CONST_Debug) var_dump('found Tiger housenumber', $aPlaceTiger);
$oResult = new Result($aPlaceTiger['place_id'], Result::TABLE_TIGER);

View File

@@ -126,7 +126,7 @@ class SearchContext
* The viewbox may be bounded which means that no search results
* must be outside the viewbox.
*
* @param object $oDB DB connection to use for computing the box.
* @param object $oDB Nominatim::DB instance to use for computing the box.
* @param string[] $aRoutePoints List of x,y coordinates along a route.
* @param float $fRouteWidth Buffer around the route to use.
* @param bool $bBounded True if the viewbox bounded.
@@ -146,11 +146,11 @@ class SearchContext
$this->sqlViewboxCentre .= ")'::geometry,4326)";
$sSQL = 'ST_BUFFER('.$this->sqlViewboxCentre.','.($fRouteWidth/69).')';
$sGeom = chksql($oDB->getOne('select '.$sSQL), 'Could not get small viewbox');
$sGeom = $oDB->getOne('select '.$sSQL, null, 'Could not get small viewbox');
$this->sqlViewboxSmall = "'".$sGeom."'::geometry";
$sSQL = 'ST_BUFFER('.$this->sqlViewboxCentre.','.($fRouteWidth/30).')';
$sGeom = chksql($oDB->getOne('select '.$sSQL), 'Could not get large viewbox');
$sGeom = $oDB->getOne('select '.$sSQL, null, 'Could not get large viewbox');
$this->sqlViewboxLarge = "'".$sGeom."'::geometry";
}

View File

@@ -404,7 +404,7 @@ class SearchDescription
/**
* Query database for places that match this search.
*
* @param object $oDB Database connection to use.
* @param object $oDB Nominatim::DB instance to use.
* @param integer $iMinRank Minimum address rank to restrict search to.
* @param integer $iMaxRank Maximum address rank to restrict search to.
* @param integer $iLimit Maximum number of results.
@@ -479,7 +479,7 @@ class SearchDescription
$sSQL .= ' WHERE place_id in ('.$sPlaceIds.')';
$sSQL .= " AND postcode != '".$this->sPostcode."'";
Debug::printSQL($sSQL);
$aFilteredPlaceIDs = chksql($oDB->getCol($sSQL));
$aFilteredPlaceIDs = $oDB->getCol($sSQL);
if ($aFilteredPlaceIDs) {
foreach ($aFilteredPlaceIDs as $iPlaceId) {
$aResults[$iPlaceId]->iResultRank++;
@@ -504,8 +504,10 @@ class SearchDescription
Debug::printSQL($sSQL);
$iPlaceId = $oDB->getOne($sSQL);
$aResults = array();
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
if ($iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
}
@@ -521,8 +523,7 @@ class SearchDescription
$aDBResults = array();
$sPoiTable = $this->poiTable();
$sSQL = 'SELECT count(*) FROM pg_tables WHERE tablename = \''.$sPoiTable."'";
if (chksql($oDB->getOne($sSQL))) {
if ($oDB->tableExists($sPoiTable)) {
$sSQL = 'SELECT place_id FROM '.$sPoiTable.' ct';
if ($this->oContext->sqlCountryList) {
$sSQL .= ' JOIN placex USING (place_id)';
@@ -542,14 +543,14 @@ class SearchDescription
} elseif ($this->oContext->hasNearPoint()) {
$sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('ct.centroid').' ASC';
}
$sSQL .= " limit $iLimit";
$sSQL .= " LIMIT $iLimit";
Debug::printSQL($sSQL);
$aDBResults = chksql($oDB->getCol($sSQL));
$aDBResults = $oDB->getCol($sSQL);
}
if ($this->oContext->hasNearPoint()) {
$sSQL = 'SELECT place_id FROM placex WHERE ';
$sSQL .= 'class=\''.$this->sClass."' and type='".$this->sType."'";
$sSQL .= 'class = :class and type = :type';
$sSQL .= ' AND '.$this->oContext->withinSQL('geometry');
$sSQL .= ' AND linked_place_id is null';
if ($this->oContext->sqlCountryList) {
@@ -558,7 +559,10 @@ class SearchDescription
$sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid').' ASC';
$sSQL .= " LIMIT $iLimit";
Debug::printSQL($sSQL);
$aDBResults = chksql($oDB->getCol($sSQL));
$aDBResults = $oDB->getCol(
$sSQL,
array(':class' => $this->sClass, ':type' => $this->sType)
);
}
$aResults = array();
@@ -577,20 +581,23 @@ class SearchDescription
$sSQL .= ', search_name s ';
$sSQL .= 'WHERE s.place_id = p.parent_place_id ';
$sSQL .= 'AND array_cat(s.nameaddress_vector, s.name_vector)';
$sSQL .= ' @> '.getArraySQL($this->aAddress).' AND ';
$sSQL .= ' @> '.$oDB->getArraySQL($this->aAddress).' AND ';
} else {
$sSQL .= 'WHERE ';
}
$sSQL .= "p.postcode = '".reset($this->aName)."'";
$sSQL .= $this->countryCodeSQL(' AND p.country_code');
if ($this->oContext->bViewboxBounded) {
$sSQL .= ' AND ST_Intersects('.$this->oContext->sqlViewboxSmall.', geometry)';
}
$sSQL .= $this->oContext->excludeSQL(' AND p.place_id');
$sSQL .= " LIMIT $iLimit";
Debug::printSQL($sSQL);
$aResults = array();
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_POSTCODE);
}
@@ -633,14 +640,14 @@ class SearchDescription
}
if (!empty($this->aName)) {
$aTerms[] = 'name_vector @> '.getArraySQL($this->aName);
$aTerms[] = 'name_vector @> '.$oDB->getArraySQL($this->aName);
}
if (!empty($this->aAddress)) {
// For infrequent name terms disable index usage for address
if ($this->bRareName) {
$aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
$aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.$oDB->getArraySQL($this->aAddress);
} else {
$aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);
$aTerms[] = 'nameaddress_vector @> '.$oDB->getArraySQL($this->aAddress);
}
}
@@ -695,7 +702,7 @@ class SearchDescription
if (!empty($this->aFullNameAddress)) {
$sExactMatchSQL = ' ( ';
$sExactMatchSQL .= ' SELECT count(*) FROM ( ';
$sExactMatchSQL .= ' SELECT unnest('.getArraySQL($this->aFullNameAddress).')';
$sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($this->aFullNameAddress).')';
$sExactMatchSQL .= ' INTERSECT ';
$sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)';
$sExactMatchSQL .= ' ) s';
@@ -720,10 +727,7 @@ class SearchDescription
Debug::printSQL($sSQL);
$aDBResults = chksql(
$oDB->getAll($sSQL),
'Could not get places for search terms.'
);
$aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.');
foreach ($aDBResults as $aResult) {
$oResult = new Result($aResult['place_id']);
@@ -753,7 +757,7 @@ class SearchDescription
Debug::printSQL($sSQL);
// XXX should inherit the exactMatches from its parent
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
}
@@ -779,7 +783,7 @@ class SearchDescription
Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_OSMLINE);
$oResult->iHouseNumber = $iHousenumber;
$aResults[$iPlaceId] = $oResult;
@@ -795,7 +799,7 @@ class SearchDescription
Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX);
}
}
@@ -816,7 +820,7 @@ class SearchDescription
Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_TIGER);
$oResult->iHouseNumber = $iHousenumber;
$aResults[$iPlaceId] = $oResult;
@@ -850,7 +854,7 @@ class SearchDescription
Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
}
}
@@ -858,12 +862,11 @@ class SearchDescription
// NEAR and IN are handled the same
if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NEAR) {
$sClassTable = $this->poiTable();
$sSQL = "SELECT count(*) FROM pg_tables WHERE tablename = '$sClassTable'";
$bCacheTable = (bool) chksql($oDB->getOne($sSQL));
$bCacheTable = $oDB->tableExists($sClassTable);
$sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)";
Debug::printSQL($sSQL);
$iMaxRank = (int)chksql($oDB->getOne($sSQL));
$iMaxRank = (int) $oDB->getOne($sSQL);
// For state / country level searches the normal radius search doesn't work very well
$sPlaceGeom = false;
@@ -876,7 +879,7 @@ class SearchDescription
$sSQL .= ' ORDER BY rank_search ASC ';
$sSQL .= ' LIMIT 1';
Debug::printSQL($sSQL);
$sPlaceGeom = chksql($oDB->getOne($sSQL));
$sPlaceGeom = $oDB->getOne($sSQL);
}
if ($sPlaceGeom) {
@@ -886,7 +889,7 @@ class SearchDescription
$sSQL = 'SELECT place_id FROM placex';
$sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank";
Debug::printSQL($sSQL);
$aPlaceIDs = chksql($oDB->getCol($sSQL));
$aPlaceIDs = $oDB->getCol($sSQL);
$sPlaceIDs = join(',', $aPlaceIDs);
}
@@ -932,7 +935,7 @@ class SearchDescription
Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
}
} else {
@@ -964,7 +967,7 @@ class SearchDescription
Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
}
}

View File

@@ -3,7 +3,6 @@
namespace Nominatim;
use Exception;
use PEAR;
class Status
{
@@ -16,12 +15,18 @@ class Status
public function status()
{
if (!$this->oDB || PEAR::isError($this->oDB)) {
if (!$this->oDB) {
throw new Exception('No database', 700);
}
try {
$this->oDB->connect();
} catch (\Nominatim\DatabaseError $e) {
throw new Exception('Database connection failed', 700);
}
$sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
if (PEAR::isError($sStandardWord)) {
if ($sStandardWord === false) {
throw new Exception('Module failed', 701);
}
@@ -32,7 +37,7 @@ class Status
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code, ';
$sSQL .= "operator, search_name_count FROM word WHERE word_token IN (' a')";
$iWordID = $this->oDB->getOne($sSQL);
if (PEAR::isError($iWordID)) {
if ($iWordID === false) {
throw new Exception('Query failed', 703);
}
if (!$iWordID) {
@@ -45,7 +50,7 @@ class Status
$sSQL = 'SELECT EXTRACT(EPOCH FROM lastimportdate) FROM import_status LIMIT 1';
$iDataDateEpoch = $this->oDB->getOne($sSQL);
if (PEAR::isError($iDataDateEpoch)) {
if ($iDataDateEpoch === false) {
throw Exception('Data date query failed '.$iDataDateEpoch->getMessage(), 705);
}

View File

@@ -55,6 +55,18 @@ class TokenList
return isset($this->aTokens[$sWord]);
}
/**
* Check if there are partial or full tokens for the given word.
*
* @param string $sWord Token word to look for.
*
* @return bool True if there is one or more token for the token word.
*/
public function containsAny($sWord)
{
return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]);
}
/**
* Get the list of tokens for the given token word.
*
@@ -71,7 +83,7 @@ class TokenList
/**
* Add token information from the word table in the database.
*
* @param object $oDB Database connection.
* @param object $oDB Nominatim::DB instance.
* @param string[] $aTokens List of tokens to look up in the database.
* @param string[] $aCountryCodes List of country restrictions.
* @param string $sNormQuery Normalized query string.
@@ -85,11 +97,11 @@ class TokenList
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
$sSQL .= ' operator, coalesce(search_name_count, 0) as count';
$sSQL .= ' FROM word WHERE word_token in (';
$sSQL .= join(',', array_map('getDBQuoted', $aTokens)).')';
$sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
Debug::printSQL($sSQL);
$aDBWords = chksql($oDB->getAll($sSQL), 'Could not get word tokens.');
$aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
foreach ($aDBWords as $aWord) {
$oToken = null;

View File

@@ -120,15 +120,6 @@ function showUsage($aSpec, $bExit = false, $sError = false)
exit;
}
function chksql($oSql, $sMsg = false)
{
if (PEAR::isError($oSql)) {
fail($sMsg || $oSql->getMessage(), $oSql->userinfo);
}
return $oSql;
}
function info($sMsg)
{
echo date('Y-m-d H:i:s == ').$sMsg."\n";
@@ -155,7 +146,7 @@ function repeatWarnings()
function runSQLScript($sScript, $bfatal = true, $bVerbose = false, $bIgnoreErrors = false)
{
// Convert database DSN to psql parameters
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
$aDSNInfo = \Nominatim\DB::parseDSN(CONST_Database_DSN);
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
$sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {

View File

@@ -1,43 +0,0 @@
<?php
require_once('DB.php');
function &getDB($bNew = false, $bPersistent = false)
{
// Get the database object
$oDB = chksql(
DB::connect(CONST_Database_DSN.($bNew?'?new_link=true':''), $bPersistent),
'Failed to establish database connection'
);
$oDB->setFetchMode(DB_FETCHMODE_ASSOC);
$oDB->query("SET DateStyle TO 'sql,european'");
$oDB->query("SET client_encoding TO 'utf-8'");
$iMaxExecution = ini_get('max_execution_time') * 1000;
if ($iMaxExecution > 0) $oDB->query("SET statement_timeout TO $iMaxExecution");
return $oDB;
}
function getDBQuoted($s)
{
return "'".pg_escape_string($s)."'";
}
function getArraySQL($a)
{
return 'ARRAY['.join(',', $a).']';
}
function getPostgresVersion(&$oDB)
{
$sVersionString = $oDB->getOne('SHOW server_version_num');
preg_match('#([0-9]?[0-9])([0-9][0-9])[0-9][0-9]#', $sVersionString, $aMatches);
return (float) ($aMatches[1].'.'.$aMatches[2]);
}
function getPostgisVersion(&$oDB)
{
$sVersionString = $oDB->getOne('select postgis_lib_version()');
preg_match('#^([0-9]+)[.]([0-9]+)[.]#', $sVersionString, $aMatches);
return (float) ($aMatches[1].'.'.$aMatches[2]);
}

View File

@@ -2,7 +2,6 @@
require_once('init.php');
require_once('ParameterParser.php');
require_once('DatabaseError.php');
require_once(CONST_Debug ? 'DebugHtml.php' : 'DebugNone.php');
/***************************************************************************
@@ -11,15 +10,6 @@ require_once(CONST_Debug ? 'DebugHtml.php' : 'DebugNone.php');
*
*/
function chksql($oSql, $sMsg = 'Database request failed')
{
if (!PEAR::isError($oSql)) return $oSql;
throw new Nominatim\DatabaseError($sMsg, 500, null, $oSql);
}
function userError($sMsg)
{
throw new Exception($sMsg, 400);
@@ -31,6 +21,7 @@ function exception_handler_html($exception)
http_response_code($exception->getCode());
header('Content-type: text/html; charset=UTF-8');
include(CONST_BasePath.'/lib/template/error-html.php');
exit();
}
function exception_handler_json($exception)
@@ -38,6 +29,7 @@ function exception_handler_json($exception)
http_response_code($exception->getCode());
header('Content-type: application/json; charset=utf-8');
include(CONST_BasePath.'/lib/template/error-json.php');
exit();
}
function exception_handler_xml($exception)
@@ -46,17 +38,51 @@ function exception_handler_xml($exception)
header('Content-type: text/xml; charset=utf-8');
echo '<?xml version="1.0" encoding="UTF-8" ?>'."\n";
include(CONST_BasePath.'/lib/template/error-xml.php');
exit();
}
function shutdown_exception_handler_html()
{
$error = error_get_last();
if ($error !== null && $error['type'] === E_ERROR) {
exception_handler_html(new Exception($error['message'], 500));
}
}
function shutdown_exception_handler_xml()
{
$error = error_get_last();
if ($error !== null && $error['type'] === E_ERROR) {
exception_handler_xml(new Exception($error['message'], 500));
}
}
function shutdown_exception_handler_json()
{
$error = error_get_last();
if ($error !== null && $error['type'] === E_ERROR) {
exception_handler_json(new Exception($error['message'], 500));
}
}
function set_exception_handler_by_format($sFormat = 'html')
function set_exception_handler_by_format($sFormat = null)
{
if ($sFormat == 'html') {
// Multiple calls to register_shutdown_function will cause multiple callbacks
// to be executed, we only want the last executed. Thus we don't want to register
// one by default without an explicit $sFormat set.
if (!isset($sFormat)) {
set_exception_handler('exception_handler_html');
} elseif ($sFormat == 'html') {
set_exception_handler('exception_handler_html');
register_shutdown_function('shutdown_exception_handler_html');
} elseif ($sFormat == 'xml') {
set_exception_handler('exception_handler_xml');
register_shutdown_function('shutdown_exception_handler_xml');
} else {
set_exception_handler('exception_handler_json');
register_shutdown_function('shutdown_exception_handler_json');
}
}
// set a default

View File

@@ -1,7 +1,7 @@
<?php
require_once(CONST_BasePath.'/lib/lib.php');
require_once(CONST_BasePath.'/lib/db.php');
require_once(CONST_BasePath.'/lib/DB.php');
if (get_magic_quotes_gpc()) {
echo "Please disable magic quotes in your php.ini configuration\n";

View File

@@ -4,7 +4,7 @@ function fail($sError, $sUserError = false)
{
if (!$sUserError) $sUserError = $sError;
error_log('ERROR: '.$sError);
echo $sUserError."\n";
var_dump($sUserError)."\n";
exit(-1);
}

View File

@@ -36,9 +36,19 @@ function logStart(&$oDB, $sType = '', $sQuery = '', $aLanguageList = array())
$sUserAgent = $_SERVER['HTTP_USER_AGENT'];
else $sUserAgent = '';
$sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)';
$sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]);
$sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',', $aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')';
$oDB->query($sSQL);
$sSQL .= ' values (';
$sSQL .= join(',', $oDB->getDBQuotedList(array(
$sType,
$hLog[0],
$hLog[2],
$hLog[1],
$sUserAgent,
join(',', $aLanguageList),
$sOutputFormat,
$hLog[3]
)));
$sSQL .= ')';
$oDB->exec($sSQL);
}
return $hLog;
@@ -53,11 +63,11 @@ function logEnd(&$oDB, $hLog, $iNumResults)
if (!$aEndTime[1]) $aEndTime[1] = '0';
$sEndTime = date('Y-m-d H:i:s', $aEndTime[0]).'.'.$aEndTime[1];
$sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
$sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
$sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
$sSQL .= ' and query = '.getDBQuoted($hLog[2]);
$oDB->query($sSQL);
$sSQL = 'update new_query_log set endtime = '.$oDB->getDBQuoted($sEndTime).', results = '.$iNumResults;
$sSQL .= ' where starttime = '.$oDB->getDBQuoted($hLog[0]);
$sSQL .= ' and ipaddress = '.$oDB->getDBQuoted($hLog[1]);
$sSQL .= ' and query = '.$oDB->getDBQuoted($hLog[2]);
$oDB->exec($sSQL);
}
if (CONST_Log_File) {

View File

@@ -53,21 +53,21 @@ class AddressLevelParser
*/
public function createTable($oDB, $sTable)
{
chksql($oDB->query('DROP TABLE IF EXISTS '.$sTable));
$oDB->exec('DROP TABLE IF EXISTS '.$sTable);
$sSql = 'CREATE TABLE '.$sTable;
$sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
$sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
chksql($oDB->query($sSql));
$oDB->exec($sSql);
$sSql = 'CREATE UNIQUE INDEX ON '.$sTable.'(country_code, class, type)';
chksql($oDB->query($sSql));
$sSql = 'CREATE UNIQUE INDEX ON '.$sTable.' (country_code, class, type)';
$oDB->exec($sSql);
$sSql = 'INSERT INTO '.$sTable.' VALUES ';
foreach ($this->aLevels as $aLevel) {
$aCountries = array();
if (isset($aLevel['countries'])) {
foreach ($aLevel['countries'] as $sCountry) {
$aCountries[$sCountry] = getDBQuoted($sCountry);
$aCountries[$sCountry] = $oDB->getDBQuoted($sCountry);
}
} else {
$aCountries['NULL'] = 'NULL';
@@ -75,8 +75,8 @@ class AddressLevelParser
foreach ($aLevel['tags'] as $sKey => $aValues) {
foreach ($aValues as $sValue => $mRanks) {
$aFields = array(
getDBQuoted($sKey),
$sValue ? getDBQuoted($sValue) : 'NULL'
$oDB->getDBQuoted($sKey),
$sValue ? $oDB->getDBQuoted($sValue) : 'NULL'
);
if (is_array($mRanks)) {
$aFields[] = (string) $mRanks[0];
@@ -93,6 +93,6 @@ class AddressLevelParser
}
}
}
chksql($oDB->query(rtrim($sSql, ',')));
$oDB->exec(rtrim($sSql, ','));
}
}

View File

@@ -40,7 +40,7 @@ class SetupFunctions
info('module path: ' . $this->sModulePath);
// parse database string
$this->aDSNInfo = array_filter(\DB::parseDSN(CONST_Database_DSN));
$this->aDSNInfo = \Nominatim\DB::parseDSN(CONST_Database_DSN);
if (!isset($this->aDSNInfo['port'])) {
$this->aDSNInfo['port'] = 5432;
}
@@ -74,8 +74,9 @@ class SetupFunctions
public function createDB()
{
info('Create DB');
$sDB = \DB::connect(CONST_Database_DSN, false);
if (!\PEAR::isError($sDB)) {
$oDB = new \Nominatim\DB;
if ($oDB->databaseExists()) {
fail('database already exists ('.CONST_Database_DSN.')');
}
@@ -94,47 +95,33 @@ class SetupFunctions
public function connect()
{
$this->oDB =& getDB();
$this->oDB = new \Nominatim\DB();
$this->oDB->connect();
}
public function setupDB()
{
info('Setup DB');
$fPostgresVersion = getPostgresVersion($this->oDB);
$fPostgresVersion = $this->oDB->getPostgresVersion();
echo 'Postgres version found: '.$fPostgresVersion."\n";
if ($fPostgresVersion < 9.01) {
fail('Minimum supported version of Postgresql is 9.1.');
if ($fPostgresVersion < 9.03) {
fail('Minimum supported version of Postgresql is 9.3.');
}
$this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore');
$this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis');
// For extratags and namedetails the hstore_to_json converter is
// needed which is only available from Postgresql 9.3+. For older
// versions add a dummy function that returns nothing.
$iNumFunc = chksql($this->oDB->getOne("select count(*) from pg_proc where proname = 'hstore_to_json'"));
if ($iNumFunc == 0) {
$this->pgsqlRunScript("create function hstore_to_json(dummy hstore) returns text AS 'select null::text' language sql immutable");
warn('Postgresql is too old. extratags and namedetails API not available.');
}
$fPostgisVersion = getPostgisVersion($this->oDB);
$fPostgisVersion = $this->oDB->getPostgisVersion();
echo 'Postgis version found: '.$fPostgisVersion."\n";
if ($fPostgisVersion < 2.1) {
// Functions were renamed in 2.1 and throw an annoying deprecation warning
$this->pgsqlRunScript('ALTER FUNCTION st_line_interpolate_point(geometry, double precision) RENAME TO ST_LineInterpolatePoint');
$this->pgsqlRunScript('ALTER FUNCTION ST_Line_Locate_Point(geometry, geometry) RENAME TO ST_LineLocatePoint');
}
if ($fPostgisVersion < 2.2) {
$this->pgsqlRunScript('ALTER FUNCTION ST_Distance_Spheroid(geometry, geometry, spheroid) RENAME TO ST_DistanceSpheroid');
echo "Minimum required Postgis version 2.2\n";
exit(1);
}
$i = chksql($this->oDB->getOne("select count(*) from pg_user where usename = '".CONST_Database_Web_User."'"));
$i = $this->oDB->getOne("select count(*) from pg_user where usename = '".CONST_Database_Web_User."'");
if ($i == 0) {
echo "\nERROR: Web user '".CONST_Database_Web_User."' does not exist. Create it with:\n";
echo "\n createuser ".CONST_Database_Web_User."\n\n";
@@ -142,9 +129,7 @@ class SetupFunctions
}
// Try accessing the C module, so we know early if something is wrong
if (!checkModulePresence()) {
fail('error loading nominatim.so module');
}
checkModulePresence(); // raises exception on failure
if (!file_exists(CONST_ExtraDataPath.'/country_osm_grid.sql.gz')) {
echo 'Error: you need to download the country_osm_grid first:';
@@ -154,16 +139,20 @@ class SetupFunctions
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql.gz');
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode_table.sql');
$sPostcodeFilename = CONST_BasePath.'/data/gb_postcode_data.sql.gz';
if (file_exists($sPostcodeFilename)) {
$this->pgsqlRunScriptFile($sPostcodeFilename);
} else {
warn('optional external UK postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
}
if (CONST_Use_Extra_US_Postcodes) {
$this->pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
$sPostcodeFilename = CONST_BasePath.'/data/us_postcode_data.sql.gz';
if (file_exists($sPostcodeFilename)) {
$this->pgsqlRunScriptFile($sPostcodeFilename);
} else {
warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
}
if ($this->bNoPartitions) {
@@ -216,7 +205,7 @@ class SetupFunctions
$this->runWithPgEnv($osm2pgsql);
if (!$this->sIgnoreErrors && !chksql($this->oDB->getRow('select * from place limit 1'))) {
if (!$this->sIgnoreErrors && !$this->oDB->getRow('select * from place limit 1')) {
fail('No Data');
}
}
@@ -225,11 +214,9 @@ class SetupFunctions
{
info('Create Functions');
// Try accessing the C module, so we know eif something is wrong
// update.php calls this function
if (!checkModulePresence()) {
fail('error loading nominatim.so module');
}
// Try accessing the C module, so we know early if something is wrong
checkModulePresence(); // raises exception on failure
$this->createSqlFunctions();
}
@@ -378,7 +365,8 @@ class SetupFunctions
echo '.';
$sSQL = 'select distinct partition from country_name';
$aPartitions = chksql($this->oDB->getCol($sSQL));
$aPartitions = $this->oDB->getCol($sSQL);
if (!$this->bNoPartitions) $aPartitions[] = 0;
foreach ($aPartitions as $sPartition) {
$this->pgExec('TRUNCATE location_road_'.$sPartition);
@@ -399,34 +387,48 @@ class SetupFunctions
info('Load Data');
$sColumns = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry';
$aDBInstances = array();
$iLoadThreads = max(1, $this->iInstances - 1);
for ($i = 0; $i < $iLoadThreads; $i++) {
$aDBInstances[$i] =& getDB(true);
// https://secure.php.net/manual/en/function.pg-connect.php
$DSN = CONST_Database_DSN;
$DSN = preg_replace('/^pgsql:/', '', $DSN);
$DSN = preg_replace('/;/', ' ', $DSN);
$aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW);
pg_ping($aDBInstances[$i]);
}
for ($i = 0; $i < $iLoadThreads; $i++) {
$sSQL = "INSERT INTO placex ($sColumns) SELECT $sColumns FROM place WHERE osm_id % $iLoadThreads = $i";
$sSQL .= " and not (class='place' and type='houses' and osm_type='W'";
$sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')";
$sSQL .= ' and ST_IsValid(geometry)';
if ($this->bVerbose) echo "$sSQL\n";
if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) {
fail(pg_last_error($aDBInstances[$i]->connection));
if (!pg_send_query($aDBInstances[$i], $sSQL)) {
fail(pg_last_error($aDBInstances[$i]));
}
}
// last thread for interpolation lines
$aDBInstances[$iLoadThreads] =& getDB(true);
// https://secure.php.net/manual/en/function.pg-connect.php
$DSN = CONST_Database_DSN;
$DSN = preg_replace('/^pgsql:/', '', $DSN);
$DSN = preg_replace('/;/', ' ', $DSN);
$aDBInstances[$iLoadThreads] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW);
pg_ping($aDBInstances[$iLoadThreads]);
$sSQL = 'insert into location_property_osmline';
$sSQL .= ' (osm_id, address, linegeo)';
$sSQL .= ' SELECT osm_id, address, geometry from place where ';
$sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'";
if ($this->bVerbose) echo "$sSQL\n";
if (!pg_send_query($aDBInstances[$iLoadThreads]->connection, $sSQL)) {
fail(pg_last_error($aDBInstances[$iLoadThreads]->connection));
if (!pg_send_query($aDBInstances[$iLoadThreads], $sSQL)) {
fail(pg_last_error($aDBInstances[$iLoadThreads]));
}
$bFailed = false;
for ($i = 0; $i <= $iLoadThreads; $i++) {
while (($hPGresult = pg_get_result($aDBInstances[$i]->connection)) !== false) {
while (($hPGresult = pg_get_result($aDBInstances[$i])) !== false) {
$resultStatus = pg_result_status($hPGresult);
// PGSQL_EMPTY_QUERY, PGSQL_COMMAND_OK, PGSQL_TUPLES_OK,
// PGSQL_COPY_OUT, PGSQL_COPY_IN, PGSQL_BAD_RESPONSE,
@@ -442,17 +444,22 @@ class SetupFunctions
if ($bFailed) {
fail('SQL errors loading placex and/or location_property_osmline tables');
}
for ($i = 0; $i < $this->iInstances; $i++) {
pg_close($aDBInstances[$i]);
}
echo "\n";
info('Reanalysing database');
$this->pgsqlRunScript('ANALYSE');
$sDatabaseDate = getDatabaseDate($this->oDB);
pg_query($this->oDB->connection, 'TRUNCATE import_status');
if ($sDatabaseDate === false) {
$this->oDB->exec('TRUNCATE import_status');
if (!$sDatabaseDate) {
warn('could not determine database date.');
} else {
$sSQL = "INSERT INTO import_status (lastimportdate) VALUES('".$sDatabaseDate."')";
pg_query($this->oDB->connection, $sSQL);
$this->oDB->exec($sSQL);
echo "Latest data imported from $sDatabaseDate.\n";
}
}
@@ -477,7 +484,12 @@ class SetupFunctions
$aDBInstances = array();
for ($i = 0; $i < $this->iInstances; $i++) {
$aDBInstances[$i] =& getDB(true);
// https://secure.php.net/manual/en/function.pg-connect.php
$DSN = CONST_Database_DSN;
$DSN = preg_replace('/^pgsql:/', '', $DSN);
$DSN = preg_replace('/;/', ' ', $DSN);
$aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW | PGSQL_CONNECT_ASYNC);
pg_ping($aDBInstances[$i]);
}
foreach (glob(CONST_Tiger_Data_Path.'/*.sql') as $sFile) {
@@ -487,11 +499,11 @@ class SetupFunctions
$iLines = 0;
while (true) {
for ($i = 0; $i < $this->iInstances; $i++) {
if (!pg_connection_busy($aDBInstances[$i]->connection)) {
while (pg_get_result($aDBInstances[$i]->connection));
if (!pg_connection_busy($aDBInstances[$i])) {
while (pg_get_result($aDBInstances[$i]));
$sSQL = fgets($hFile, 100000);
if (!$sSQL) break 2;
if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($this->oDB->connection));
if (!pg_send_query($aDBInstances[$i], $sSQL)) fail(pg_last_error($aDBInstances[$i]));
$iLines++;
if ($iLines == 1000) {
echo '.';
@@ -507,13 +519,17 @@ class SetupFunctions
while ($bAnyBusy) {
$bAnyBusy = false;
for ($i = 0; $i < $this->iInstances; $i++) {
if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
if (pg_connection_busy($aDBInstances[$i])) $bAnyBusy = true;
}
usleep(10);
}
echo "\n";
}
for ($i = 0; $i < $this->iInstances; $i++) {
pg_close($aDBInstances[$i]);
}
info('Creating indexes on Tiger data');
$sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_finish.sql');
$sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
@@ -546,17 +562,15 @@ class SetupFunctions
$sSQL .= ' GROUP BY country_code, pc';
$this->pgExec($sSQL);
if (CONST_Use_Extra_US_Postcodes) {
// only add postcodes that are not yet available in OSM
$sSQL = 'INSERT INTO location_postcode';
$sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
$sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
$sSQL .= ' ST_SetSRID(ST_Point(x,y),4326)';
$sSQL .= ' FROM us_postcode WHERE postcode NOT IN';
$sSQL .= ' (SELECT postcode FROM location_postcode';
$sSQL .= " WHERE country_code = 'us')";
$this->pgExec($sSQL);
}
// only add postcodes that are not yet available in OSM
$sSQL = 'INSERT INTO location_postcode';
$sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
$sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
$sSQL .= ' ST_SetSRID(ST_Point(x,y),4326)';
$sSQL .= ' FROM us_postcode WHERE postcode NOT IN';
$sSQL .= ' (SELECT postcode FROM location_postcode';
$sSQL .= " WHERE country_code = 'us')";
$this->pgExec($sSQL);
// add missing postcodes for GB (if available)
$sSQL = 'INSERT INTO location_postcode';
@@ -697,7 +711,7 @@ class SetupFunctions
);
$aDropTables = array();
$aHaveTables = chksql($this->oDB->getCol("SELECT tablename FROM pg_tables WHERE schemaname='public'"));
$aHaveTables = $this->oDB->getCol("SELECT tablename FROM pg_tables WHERE schemaname='public'");
foreach ($aHaveTables as $sTable) {
$bFound = false;
@@ -711,9 +725,7 @@ class SetupFunctions
}
foreach ($aDropTables as $sDrop) {
if ($this->bVerbose) echo "Dropping table $sDrop\n";
@pg_query($this->oDB->connection, "DROP TABLE $sDrop CASCADE");
// ignore warnings/errors as they might be caused by a table having
// been deleted already by CASCADE
$this->oDB->exec("DROP TABLE IF EXISTS $sDrop CASCADE");
}
if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
@@ -726,7 +738,10 @@ class SetupFunctions
private function pgsqlRunDropAndRestore($sDumpFile)
{
$sCMD = 'pg_restore -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database'].' -Fc --clean '.$sDumpFile;
$sCMD = 'pg_restore -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database'].' --no-owner -Fc --clean '.$sDumpFile;
if ($this->oDB->getPostgresVersion() >= 9.04) {
$sCMD .= ' --if-exists';
}
if (isset($this->aDSNInfo['hostspec'])) {
$sCMD .= ' -h '.$this->aDSNInfo['hostspec'];
}
@@ -776,7 +791,7 @@ class SetupFunctions
private function pgsqlRunPartitionScript($sTemplate)
{
$sSQL = 'select distinct partition from country_name';
$aPartitions = chksql($this->oDB->getCol($sSQL));
$aPartitions = $this->oDB->getCol($sSQL);
if (!$this->bNoPartitions) $aPartitions[] = 0;
preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
@@ -883,9 +898,7 @@ class SetupFunctions
*/
private function pgExec($sSQL)
{
if (!pg_query($this->oDB->connection, $sSQL)) {
fail(pg_last_error($this->oDB->connection));
}
$this->oDB->exec($sSQL);
}
/**
@@ -895,7 +908,6 @@ class SetupFunctions
*/
private function dbReverseOnly()
{
$sSQL = "SELECT count(*) FROM pg_tables WHERE tablename = 'search_name'";
return !(chksql($this->oDB->getOne($sSQL)));
return !($this->oDB->tableExists('search_name'));
}
}

View File

@@ -17,22 +17,15 @@ function checkInFile($sOSMFile)
function checkModulePresence()
{
// Try accessing the C module, so we know early if something is wrong
// and can simply error out.
// Try accessing the C module, so we know early if something is wrong.
// Raises Nominatim\DatabaseError on failure
$sModulePath = CONST_Database_Module_Path;
$sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '";
$sSQL .= $sModulePath . "/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT";
$sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);';
$oDB = &getDB();
$oResult = $oDB->query($sSQL);
$bResult = true;
if (PEAR::isError($oResult)) {
echo "\nERROR: Failed to load nominatim module. Reason:\n";
echo $oResult->userinfo . "\n\n";
$bResult = false;
}
return $bResult;
$oDB = new \Nominatim\DB();
$oDB->connect();
$oDB->exec($sSQL, null, 'Database server failed to load '.$sModulePath.'/nominatim.so module');
}

View File

@@ -61,7 +61,7 @@
function _one_row($aAddressLine){
$bNotUsed = (isset($aAddressLine['isaddress']) && $aAddressLine['isaddress'] == 'f');
$bNotUsed = isset($aAddressLine['isaddress']) && !$aAddressLine['isaddress'];
echo '<tr class="' . ($bNotUsed?'notused':'') . '">'."\n";
echo ' <td class="name">'.(trim($aAddressLine['localname'])?$aAddressLine['localname']:'<span class="noname">No Name</span>')."</td>\n";
@@ -119,7 +119,7 @@
if ($aPointDetails['calculated_importance']) {
kv('Importance' , $aPointDetails['calculated_importance'].($aPointDetails['importance']?'':' (estimated)') );
}
kv('Coverage' , ($aPointDetails['isarea']=='t'?'Polygon':'Point') );
kv('Coverage' , ($aPointDetails['isarea']?'Polygon':'Point') );
kv('Centre Point' , $aPointDetails['lat'].','.$aPointDetails['lon'] );
kv('OSM' , osmLink($aPointDetails) );
if ($aPointDetails['wikipedia'])

View File

@@ -33,7 +33,7 @@ if ($aPointDetails['icon']) {
$aPlaceDetails['rank_address'] = (int) $aPointDetails['rank_address'];
$aPlaceDetails['rank_search'] = (int) $aPointDetails['rank_search'];
$aPlaceDetails['isarea'] = ($aPointDetails['isarea'] == 't');
$aPlaceDetails['isarea'] = $aPointDetails['isarea'];
$aPlaceDetails['centroid'] = array(
'type' => 'Point',
'coordinates' => array( (float) $aPointDetails['lon'], (float) $aPointDetails['lat'] )

View File

@@ -3,8 +3,8 @@
"place" : {
"sea" : [2, 0],
"continent" : [2, 0],
"country" : [4, 4],
"state" : [8, 8],
"country" : [4, 0],
"state" : [8, 0],
"region" : [18, 0],
"county" : 12,
"city" : 16,
@@ -25,6 +25,8 @@
"islet" : [20, 0],
"mountain_pass" : [20, 0],
"neighbourhood" : 22,
"quarter" : 22,
"city_block" : 22,
"houses" : [28, 0]
},
"boundary" : {
@@ -78,6 +80,26 @@
},
"mountain_pass" : {
"" : [20, 0]
},
"historic" : {
"neighbourhood" : [30, 0]
}
}
},
{ "countries" : [ "de" ],
"tags" : {
"place" : {
"county" : [12, 0]
},
"boundary" : {
"administrative5" : [10, 0]
}
}
},
{ "countries" : [ "be" ],
"tags" : {
"boundary" : {
"administrative7" : [14, 0]
}
}
}

View File

@@ -7,7 +7,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
// General settings
@define('CONST_Debug', false);
@define('CONST_Database_DSN', 'pgsql://@/nominatim'); // <driver>://<username>:<password>@<host>:<port>/<database>
@define('CONST_Database_DSN', 'pgsql:dbname=nominatim'); // or add ;host=...;port=...;user=...;password=...
@define('CONST_Database_Web_User', 'www-data');
@define('CONST_Database_Module_Path', CONST_InstallPath.'/module');
@define('CONST_Max_Word_Frequency', '50000');
@@ -23,8 +23,6 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
// term. Spaces are kept but collapsed to one standard space.
@define('CONST_Term_Normalization_Rules', ":: NFD (); [[:Nonspacing Mark:] [:Cf:]] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
// Set to false to avoid importing extra postcodes for the US.
@define('CONST_Use_Extra_US_Postcodes', true);
/* Set to true after importing Tiger house number data for the US.
Note: The tables must already exist or queries will throw errors.
After changing this setting run ./utils/setup --create-functions

View File

@@ -73,7 +73,7 @@
}
},
{
"keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country",
"keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in:country",
"addr:country", "addr:country", "addr:country_code"],
"values" : {
"" : "country"

View File

@@ -42,7 +42,7 @@
}
},
{
"keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country",
"keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in:country",
"addr:country", "addr:country", "addr:country_code"],
"values" : {
"" : "country"

View File

@@ -186,7 +186,7 @@
}
},
{
"keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country",
"keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in:country",
"addr:country", "addr:country", "addr:country_code"],
"values" : {
"" : "country"

View File

@@ -42,7 +42,7 @@
}
},
{
"keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in_country",
"keys" : ["country_code", "ISO3166-1", "is_in:country_code", "is_in:country",
"addr:country", "addr:country", "addr:country_code"],
"values" : {
"" : "country"

View File

@@ -547,7 +547,7 @@ BEGIN
-- RAISE WARNING 'get_country_code, start: %', ST_AsText(place_centre);
-- Try for a OSM polygon
FOR nearcountry IN select country_code from location_area_country where country_code is not null and not isguess and st_covers(geometry, place_centre) limit 1
FOR nearcountry IN select country_code from location_area_country where country_code is not null and st_covers(geometry, place_centre) limit 1
LOOP
RETURN nearcountry.country_code;
END LOOP;
@@ -768,6 +768,28 @@ END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION osmline_reinsert(node_id BIGINT, geom GEOMETRY)
RETURNS BOOLEAN
AS $$
DECLARE
existingline RECORD;
BEGIN
SELECT w.id FROM planet_osm_ways w, location_property_osmline p
WHERE p.linegeo && geom and p.osm_id = w.id and p.indexed_status = 0
and node_id = any(w.nodes) INTO existingline;
IF existingline.id is not NULL THEN
DELETE FROM location_property_osmline WHERE osm_id = existingline.id;
INSERT INTO location_property_osmline (osm_id, address, linegeo)
SELECT osm_id, address, geometry FROM place
WHERE osm_type = 'W' and osm_id = existingline.id;
END IF;
RETURN true;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION osmline_insert() RETURNS TRIGGER
AS $$
@@ -887,11 +909,7 @@ BEGIN
END IF;
-- some postcorrections
IF NEW.class = 'place' THEN
IF NEW.type in ('continent', 'sea', 'country', 'state') AND NEW.osm_type = 'N' THEN
NEW.rank_address := 0;
END IF;
ELSEIF NEW.class = 'waterway' AND NEW.osm_type = 'R' THEN
IF NEW.class = 'waterway' AND NEW.osm_type = 'R' THEN
-- Slightly promote waterway relations so that they are processed
-- before their members.
NEW.rank_search := NEW.rank_search - 1;
@@ -908,16 +926,14 @@ BEGIN
NEW.country_code := NULL;
END IF;
-- Block import below rank 22
-- IF NEW.rank_search > 22 THEN
-- RETURN NULL;
-- END IF;
--DEBUG: RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
RETURN NEW; -- %DIFFUPDATES% The following is not needed until doing diff updates, and slows the main index process down
IF NEW.rank_address > 0 THEN
IF NEW.osm_type = 'N' and NEW.rank_search > 28 THEN
-- might be part of an interpolation
result := osmline_reinsert(NEW.osm_id, NEW.geometry);
ELSEIF NEW.rank_address > 0 THEN
IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN
-- Performance: We just can't handle re-indexing for country level changes
IF st_area(NEW.geometry) < 1 THEN
@@ -1162,6 +1178,7 @@ TRIGGER
DECLARE
place_centroid GEOMETRY;
near_centroid GEOMETRY;
search_maxdistance FLOAT[];
search_mindistance FLOAT[];
@@ -1238,6 +1255,8 @@ BEGIN
END IF;
--DEBUG: RAISE WARNING 'Copy over address tags';
-- housenumber is a computed field, so start with an empty value
NEW.housenumber := NULL;
IF NEW.address is not NULL THEN
IF NEW.address ? 'conscriptionnumber' THEN
i := getorcreate_housenumber_id(make_standard_name(NEW.address->'conscriptionnumber'));
@@ -1266,6 +1285,8 @@ BEGIN
-- Speed up searches - just use the centroid of the feature
-- cheaper but less acurate
place_centroid := ST_PointOnSurface(NEW.geometry);
-- For searching near features rather use the centroid
near_centroid := ST_Envelope(NEW.geometry);
NEW.centroid := null;
NEW.postcode := null;
--DEBUG: RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(place_centroid);
@@ -1396,7 +1417,7 @@ BEGIN
IF NEW.parent_place_id IS NULL AND addr_street IS NOT NULL THEN
address_street_word_ids := get_name_ids(make_standard_name(addr_street));
IF address_street_word_ids IS NOT NULL THEN
SELECT place_id from getNearestNamedRoadFeature(NEW.partition, place_centroid, address_street_word_ids) INTO NEW.parent_place_id;
SELECT place_id from getNearestNamedRoadFeature(NEW.partition, near_centroid, address_street_word_ids) INTO NEW.parent_place_id;
END IF;
END IF;
--DEBUG: RAISE WARNING 'Checked for addr:street (%)', NEW.parent_place_id;
@@ -1404,7 +1425,7 @@ BEGIN
IF NEW.parent_place_id IS NULL AND addr_place IS NOT NULL THEN
address_street_word_ids := get_name_ids(make_standard_name(addr_place));
IF address_street_word_ids IS NOT NULL THEN
SELECT place_id from getNearestNamedPlaceFeature(NEW.partition, place_centroid, address_street_word_ids) INTO NEW.parent_place_id;
SELECT place_id from getNearestNamedPlaceFeature(NEW.partition, near_centroid, address_street_word_ids) INTO NEW.parent_place_id;
END IF;
END IF;
--DEBUG: RAISE WARNING 'Checked for addr:place (%)', NEW.parent_place_id;
@@ -1439,7 +1460,7 @@ BEGIN
IF location.address ? 'street' THEN
address_street_word_ids := get_name_ids(make_standard_name(location.address->'street'));
IF address_street_word_ids IS NOT NULL THEN
SELECT place_id from getNearestNamedRoadFeature(NEW.partition, place_centroid, address_street_word_ids) INTO NEW.parent_place_id;
SELECT place_id from getNearestNamedRoadFeature(NEW.partition, near_centroid, address_street_word_ids) INTO NEW.parent_place_id;
EXIT WHEN NEW.parent_place_id is not NULL;
END IF;
END IF;
@@ -1448,7 +1469,7 @@ BEGIN
IF location.address ? 'place' THEN
address_street_word_ids := get_name_ids(make_standard_name(location.address->'place'));
IF address_street_word_ids IS NOT NULL THEN
SELECT place_id from getNearestNamedPlaceFeature(NEW.partition, place_centroid, address_street_word_ids) INTO NEW.parent_place_id;
SELECT place_id from getNearestNamedPlaceFeature(NEW.partition, near_centroid, address_street_word_ids) INTO NEW.parent_place_id;
EXIT WHEN NEW.parent_place_id is not NULL;
END IF;
END IF;
@@ -1477,7 +1498,7 @@ BEGIN
-- Still nothing, just use the nearest road
IF NEW.parent_place_id IS NULL THEN
SELECT place_id FROM getNearestRoadFeature(NEW.partition, place_centroid) INTO NEW.parent_place_id;
SELECT place_id FROM getNearestRoadFeature(NEW.partition, near_centroid) INTO NEW.parent_place_id;
END IF;
--DEBUG: RAISE WARNING 'Checked for nearest way (%)', NEW.parent_place_id;
@@ -1500,7 +1521,7 @@ BEGIN
NEW.postcode := location.postcode;
END IF;
IF NEW.postcode is null THEN
NEW.postcode := get_nearest_postcode(NEW.country_code, place_centroid);
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
END IF;
END IF;
@@ -1803,7 +1824,7 @@ BEGIN
-- RAISE WARNING '% isaddress: %', location.place_id, location_isaddress;
-- Add it to the list of search terms
IF NOT %REVERSE-ONLY% AND location.rank_search > 4 THEN
IF NOT %REVERSE-ONLY% THEN
nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]);
END IF;
INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address)
@@ -2204,12 +2225,13 @@ BEGIN
indexed_status = 2,
geometry = NEW.geometry
where place_id = existingplacex.place_id;
-- if a node(=>house), which is part of a interpolation line, changes (e.g. the street attribute) => mark this line for reparenting
-- (already here, because interpolation lines are reindexed before nodes, so in the second call it would be too late)
IF NEW.osm_type='N' and NEW.class='place' and NEW.type='house' THEN
-- Is this node part of an interpolation line? search for it in location_property_osmline and mark the interpolation line for reparenting
update location_property_osmline p set indexed_status = 2 from planet_osm_ways w where p.linegeo && NEW.geometry and p.osm_id = w.id and NEW.osm_id = any(w.nodes);
IF NEW.osm_type='N'
and (coalesce(existing.address, ''::hstore) != coalesce(NEW.address, ''::hstore)
or existing.geometry::text != NEW.geometry::text)
THEN
result:= osmline_reinsert(NEW.osm_id, NEW.geometry);
END IF;
-- linked places should get potential new naming and addresses
@@ -2318,6 +2340,7 @@ DECLARE
searchhousename HSTORE;
searchrankaddress INTEGER;
searchpostcode TEXT;
postcode_isaddress BOOL;
searchclass TEXT;
searchtype TEXT;
countryname HSTORE;
@@ -2325,6 +2348,8 @@ BEGIN
-- The place ein question might not have a direct entry in place_addressline.
-- Look for the parent of such places then and save if in for_place_id.
postcode_isaddress := true;
-- first query osmline (interpolation lines)
IF in_housenumber >= 0 THEN
SELECT parent_place_id, country_code, in_housenumber::text, 30, postcode,
@@ -2361,7 +2386,7 @@ BEGIN
-- postcode table
IF for_place_id IS NULL THEN
SELECT parent_place_id, country_code, rank_address, postcode, 'place', 'postcode'
SELECT parent_place_id, country_code, rank_search, postcode, 'place', 'postcode'
FROM location_postcode
WHERE place_id = in_place_id
INTO for_place_id, searchcountrycode, searchrankaddress, searchpostcode,
@@ -2441,7 +2466,10 @@ BEGIN
searchcountrycode := location.country_code;
END IF;
IF location.type in ('postcode', 'postal_code') THEN
location.isaddress := FALSE;
postcode_isaddress := false;
IF location.osm_type != 'R' THEN
location.isaddress := FALSE;
END IF;
END IF;
countrylocation := ROW(location.place_id, location.osm_type, location.osm_id,
location.name, location.class, location.type,
@@ -2485,7 +2513,7 @@ BEGIN
IF searchpostcode IS NOT NULL THEN
location := ROW(null, null, null, hstore('ref', searchpostcode), 'place',
'postcode', null, true, true, 5, 0)::addressline;
'postcode', null, false, postcode_isaddress, 5, 0)::addressline;
RETURN NEXT location;
END IF;

View File

@@ -1,27 +1,27 @@
-- Indices used only during search and update.
-- These indices are created only after the indexing process is done.
CREATE INDEX idx_word_word_id on word USING BTREE (word_id) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_word_word_id on word USING BTREE (word_id) {ts:search-index};
CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) {ts:search-index};
DROP INDEX IF EXISTS idx_placex_rank_search;
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) {ts:search-index};
CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) {ts:search-index};
CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) {ts:address-index} where indexed_status > 0;
CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL;
DROP INDEX CONCURRENTLY IF EXISTS idx_placex_rank_search;
CREATE INDEX CONCURRENTLY idx_placex_rank_search ON placex USING BTREE (rank_search) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_placex_rank_address ON placex USING BTREE (rank_address) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) {ts:address-index} where indexed_status > 0;
CREATE INDEX CONCURRENTLY idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL;
CREATE INDEX idx_placex_geometry_reverse_lookupPoint
CREATE INDEX CONCURRENTLY idx_placex_geometry_reverse_lookupPoint
ON placex USING gist (geometry) {ts:search-index}
WHERE (name is not null or housenumber is not null or rank_address between 26 and 27)
AND class not in ('railway','tunnel','bridge','man_made')
AND rank_address >= 26 AND indexed_status = 0 AND linked_place_id is null;
CREATE INDEX idx_placex_geometry_reverse_lookupPolygon
CREATE INDEX CONCURRENTLY idx_placex_geometry_reverse_lookupPolygon
ON placex USING gist (geometry) {ts:search-index}
WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
AND rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
CREATE INDEX idx_placex_geometry_reverse_placeNode
CREATE INDEX CONCURRENTLY idx_placex_geometry_reverse_placeNode
ON placex USING gist (geometry) {ts:search-index}
WHERE osm_type = 'N' AND rank_search between 5 and 25
AND class = 'place' AND type != 'postcode'
@@ -29,14 +29,14 @@ CREATE INDEX idx_placex_geometry_reverse_placeNode
GRANT SELECT ON table country_osm_grid to "{www-user}";
CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {ts:address-index};
CREATE INDEX CONCURRENTLY idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {ts:address-index};
CREATE INDEX idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index};
CREATE INDEX idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {ts:search-index};
DROP INDEX IF EXISTS place_id_idx;
CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index};
DROP INDEX CONCURRENTLY IF EXISTS place_id_idx;
CREATE UNIQUE INDEX CONCURRENTLY idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index};
CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index};
CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index};
CREATE UNIQUE INDEX CONCURRENTLY idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index};

View File

@@ -1,6 +1,6 @@
-- Indices used for /search API.
-- These indices are created only after the indexing process is done.
CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {ts:search-index};
CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {ts:search-index};
CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {ts:search-index};
CREATE INDEX CONCURRENTLY idx_search_name_centroid ON search_name USING GIST (centroid) {ts:search-index};

View File

@@ -6,11 +6,9 @@ BEGIN
-- start
IF in_partition = -partition- THEN
FOR r IN
SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid FROM (
SELECT * FROM location_area_large_-partition- WHERE ST_Intersects(geometry, feature) and rank_search < maxrank
UNION ALL
SELECT * FROM location_area_country WHERE ST_Intersects(geometry, feature) and rank_search < maxrank
) as location_area
SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid
FROM location_area_large_-partition-
WHERE ST_Intersects(geometry, feature) and rank_search < maxrank
GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid
ORDER BY rank_address, isin_tokens && keywords desc, isguess asc,
ST_Distance(feature, centroid) *
@@ -64,9 +62,9 @@ BEGIN
RETURN TRUE;
END IF;
IF in_rank_search <= 4 THEN
INSERT INTO location_area_country (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, centroid, geometry)
values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, in_centroid, in_geometry);
IF in_rank_search <= 4 and not in_estimate THEN
INSERT INTO location_area_country (place_id, country_code, geometry)
values (in_place_id, in_country_code, in_geometry);
RETURN TRUE;
END IF;
@@ -97,7 +95,7 @@ BEGIN
ST_Distance(centroid, point) as distance, null as isguess
FROM search_name_-partition-
WHERE name_vector && isin_token
AND ST_DWithin(centroid, point, 0.015)
AND centroid && ST_Expand(point, 0.015)
AND search_rank between 26 and 27
ORDER BY distance ASC limit 1
LOOP
@@ -125,7 +123,7 @@ BEGIN
ST_Distance(centroid, point) as distance, null as isguess
FROM search_name_-partition-
WHERE name_vector && isin_token
AND ST_DWithin(centroid, point, 0.04)
AND centroid && ST_Expand(point, 0.04)
AND search_rank between 16 and 22
ORDER BY distance ASC limit 1
LOOP

View File

@@ -35,9 +35,6 @@ CREATE TABLE search_name_blank (
);
CREATE TABLE location_area_country () INHERITS (location_area_large) {ts:address-data};
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {ts:address-index};
-- start
CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large) {ts:address-data};
CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) {ts:address-index};
@@ -45,8 +42,8 @@ CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large
CREATE TABLE search_name_-partition- () INHERITS (search_name_blank) {ts:address-data};
CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) {ts:address-index};
CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) {ts:address-index};
CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) {ts:address-index};
CREATE INDEX idx_search_name_-partition-_centroid_street ON search_name_-partition- USING GIST (centroid) {ts:address-index} where search_rank between 26 and 27;
CREATE INDEX idx_search_name_-partition-_centroid_place ON search_name_-partition- USING GIST (centroid) {ts:address-index} where search_rank between 2 and 25;
DROP TABLE IF EXISTS location_road_-partition-;
CREATE TABLE location_road_-partition- (

View File

@@ -36,6 +36,7 @@ GRANT SELECT ON new_query_log TO "{www-user}" ;
GRANT SELECT ON TABLE country_name TO "{www-user}";
GRANT SELECT ON TABLE gb_postcode TO "{www-user}";
GRANT SELECT ON TABLE us_postcode TO "{www-user}";
drop table IF EXISTS word;
CREATE TABLE word (
@@ -69,6 +70,15 @@ CREATE TABLE location_area (
CREATE TABLE location_area_large () INHERITS (location_area);
DROP TABLE IF EXISTS location_area_country;
CREATE TABLE location_area_country (
place_id BIGINT,
country_code varchar(2),
geometry GEOMETRY(Geometry, 4326)
) {ts:address-data};
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {ts:address-index};
drop table IF EXISTS location_property CASCADE;
CREATE TABLE location_property (
place_id BIGINT,

View File

@@ -26,6 +26,11 @@ BEGIN
endnumber = in_startnumber;
END IF;
IF startnumber < 0 THEN
RAISE WARNING 'Negative house number range (% to %) on %, %', startnumber, endnumber, in_street, in_isin;
RETURN 0;
END IF;
numberrange := endnumber - startnumber;
IF (interpolationtype = 'odd' AND startnumber%2 = 0) OR (interpolationtype = 'even' AND startnumber%2 = 1) THEN

View File

@@ -6,7 +6,7 @@ SELECT country_code,
ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid
FROM placex
WHERE address ? 'postcode'
AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
AND address->'postcode' NOT SIMILAR TO '%(,|;|:)%'
AND geometry IS NOT null
GROUP BY country_code, pc;

View File

@@ -34,3 +34,10 @@ Feature: Object details
| 1 |
Then the result is valid html
# ticket #1343
Scenario: Details of a country with keywords
When sending details query for R287072
| keywords |
| 1 |
Then the result is valid html

View File

@@ -4,15 +4,30 @@ Feature: Places by osm_type and osm_id Tests
Scenario Outline: address lookup for existing node, way, relation
When sending <format> lookup query for N3284625766,W6065798,,R123924,X99,N0
Then the result is valid <format>
Then the result is valid <outformat>
And exactly 3 results are returned
Examples:
| format |
| xml |
| json |
| geojson |
| format | outformat |
| xml | xml |
| json | json |
| jsonv2 | json |
| geojson | geojson |
| geocodejson | geocodejson |
Scenario: address lookup for non-existing or invalid node, way, relation
When sending xml lookup query for X99,,N0,nN158845944,ABC,,W9
Then exactly 0 results are returned
Scenario Outline: Boundingbox is returned
When sending <format> lookup query for N3284625766,W6065798
Then exactly 2 results are returned
And result 0 has bounding box in -32.812,-32.811,-56.509,-56.508
And result 1 has bounding box in 47.14,47.15,9.51,9.53
Examples:
| format |
| json |
| jsonv2 |
| geojson |
| xml |

View File

@@ -26,6 +26,18 @@ Feature: Searches with postcodes
| country_code |
| li |
Scenario: Postcode search with bounded viewbox restriction
When sending json search query "9486" with address
| bounded | viewbox |
| 1 | 9.55,47.20,9.58,47.22 |
Then result addresses contain
| postcode |
| 9486 |
When sending json search query "9486" with address
| bounded | viewbox |
| 1 | 5.00,20.00,6.00,21.00 |
Then exactly 0 results are returned
Scenario: Postcode search with structured query
When sending json search query "" with address
| postalcode | country |

View File

@@ -5,7 +5,7 @@ Feature: Status queries against unknown database
Scenario: Failed status as text
When sending text status query
Then a HTTP 500 is returned
And the page contents equals "ERROR: No database"
And the page contents equals "ERROR: Database connection failed"
Scenario: Failed status as json
When sending json status query
@@ -13,5 +13,5 @@ Feature: Status queries against unknown database
And the result is valid json
And results contain
| status | message |
| 700 | No database |
| 700 | Database connection failed |
And result has not attributes data_updated

View File

@@ -344,3 +344,23 @@ Feature: Import of address interpolations
When importing
Then W1 expands to no interpolation
Scenario: Two point interpolation starting at 0
Given the places
| osm | class | type | housenr | geometry |
| N1 | place | house | 0 | 1 1 |
| N2 | place | house | 2 | 1 1.001 |
And the places
| osm | class | type | addr+interpolation | geometry |
| W1 | place | houses | even | 1 1, 1 1.001 |
And the ways
| id | nodes |
| 1 | 1,2 |
When importing
Then W1 expands to interpolation
| start | end | geometry |
| 0 | 2 | 1 1, 1 1.001 |
When sending jsonv2 reverse coordinates 1,1
Then results contain
| ID | osm_type | osm_id | type | display_name |
| 0 | way | 1 | house | 0 |

View File

@@ -224,7 +224,7 @@ Feature: Parenting of objects
When importing
Then placex contains
| object | parent_place_id |
| W1 | W3 |
| W1 | W2 |
Scenario: Building with addr:street tags
Given the scene building-on-street-corner

View File

@@ -220,8 +220,8 @@ Feature: Import into placex
| R21 | 30 | 30 |
| R22 | 30 | 30 |
| R23 | 30 | 30 |
| R40 | 4 | 4 |
| R41 | 8 | 8 |
| R40 | 4 | 0 |
| R41 | 8 | 0 |
Scenario: search and address ranks for highways correctly assigned
Given the scene roads-with-pois

View File

@@ -275,7 +275,6 @@ Feature: Update of address interpolations
| parent_place_id | start | end |
| W2 | 2 | 6 |
@Fail
Scenario: housenumber added in middle of interpolation
Given the grid
| 1 | | | | | 2 |
@@ -334,7 +333,6 @@ Feature: Update of address interpolations
| parent_place_id | start | end | geometry |
| W1 | 2 | 10 | 3,4,5 |
@Fail
Scenario: Change the start housenumber
Given the grid
| 1 | | 2 |

View File

@@ -0,0 +1,38 @@
@DB
Feature: Update parenting of objects
Scenario: POI inside building inherits addr:street change
Given the scene building-on-street-corner
And the named places
| osm | class | type | geometry |
| N1 | amenity | bank | :n-inner |
| N2 | shop | bakery | :n-edge-NS |
| N3 | shop | supermarket| :n-edge-WE |
And the places
| osm | class | type | addr_place | housenr | geometry |
| W1 | building | yes | nowhere | 3 | :w-building |
And the places
| osm | class | type | name | geometry |
| W2 | highway | primary | bar | :w-WE |
| W3 | highway | residential | foo | :w-NS |
When importing
Then placex contains
| object | parent_place_id | housenumber |
| W1 | W2 | 3 |
| N1 | W3 | 3 |
| N2 | W3 | 3 |
| N3 | W2 | 3 |
When updating places
| osm | class | type | street | addr_place | housenr | geometry |
| W1 | building | yes | foo | nowhere | 3 | :w-building |
And updating places
| osm | class | type | name | geometry |
| N3 | shop | supermarket| well | :n-edge-WE |
Then placex contains
| object | parent_place_id | housenumber |
| W1 | W3 | 3 |
| N1 | W3 | 3 |
| N2 | W3 | 3 |
| N3 | W3 | 3 |

View File

@@ -73,12 +73,14 @@ class NominatimEnvironment(object):
def write_nominatim_config(self, dbname):
f = open(self.local_settings_file, 'w')
f.write("<?php\n @define('CONST_Database_DSN', 'pgsql://%s:%s@%s%s/%s');\n" %
(self.db_user if self.db_user else '',
self.db_pass if self.db_pass else '',
self.db_host if self.db_host else '',
(':' + self.db_port) if self.db_port else '',
dbname))
# https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
f.write("<?php\n @define('CONST_Database_DSN', 'pgsql:dbname=%s%s%s%s%s');\n" %
(dbname,
(';host=' + self.db_host) if self.db_host else '',
(';port=' + self.db_port) if self.db_port else '',
(';user=' + self.db_user) if self.db_user else '',
(';password=' + self.db_pass) if self.db_pass else ''
))
f.write("@define('CONST_Osm2pgsql_Flatnode_File', null);\n")
f.close()
@@ -193,6 +195,7 @@ class NominatimEnvironment(object):
proc = subprocess.Popen(cmd, cwd=self.build_dir,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
(outp, outerr) = proc.communicate()
outerr = outerr.decode('utf-8').replace('\\n', '\n')
logger.debug("run_nominatim_script: %s\n%s\n%s" % (cmd, outp, outerr))
assert (proc.returncode == 0), "Script '%s' failed:\n%s\n%s\n" % (script, outp, outerr)

View File

@@ -96,9 +96,9 @@ Feature: Tag evaluation
Then place contains
| object | name |
| N1 | 'name: de' : 'Foo', 'name' : 'real1' |
| N2 | 'name: de' : 'Foo', 'name' : 'real2' |
| N3 | 'name: de' : 'Foo', 'name:\\\\' : 'real3' |
| N4 | 'name: de' : 'Foo', 'name' : 'rea\\l3' |
| N2 | 'name:\nde' : 'Foo', 'name' : 'real2' |
| N3 | 'name:\tde' : 'Foo', 'name:\\\\' : 'real3' |
| N4 | 'name:\tde' : 'Foo', 'name' : 'rea\\l3' |
Scenario: Unprintable character in address tag are maintained
When loading osm data

View File

@@ -24,3 +24,38 @@ Feature: Update of simple objects by osm2pgsql
| object | class | type | name |
| N1:tourism | tourism | hotel | 'name' : 'foo' |
Scenario: Downgrading a highway to one that is dropped without name
When loading osm data
"""
n100 x0 y0
n101 x0.0001 y0.0001
w1 Thighway=residential Nn100,n101
"""
Then place contains
| object |
| W1:highway |
When updating osm data
"""
w1 Thighway=service Nn100,n101
"""
Then place has no entry for W1
Scenario: Downgrading a highway when a second tag is present
When loading osm data
"""
n100 x0 y0
n101 x0.0001 y0.0001
w1 Thighway=residential,tourism=hotel Nn100,n101
"""
Then place contains
| object |
| W1:highway |
| W1:tourism |
When updating osm data
"""
w1 Thighway=service,tourism=hotel Nn100,n101
"""
Then place has no entry for W1:highway
And place contains
| object |
| W1:tourism |

View File

@@ -71,7 +71,7 @@ class GenericResponse(object):
pass
elif h == 'osm':
assert_equal(res['osm_type'], row[h][0])
assert_equal(res['osm_id'], row[h][1:])
assert_equal(res['osm_id'], int(row[h][1:]))
elif h == 'centroid':
x, y = row[h].split(' ')
assert_almost_equal(float(y), float(res['lat']))
@@ -455,8 +455,12 @@ def website_lookup_request(context, fmt, query):
if fmt == 'json ':
outfmt = 'json'
elif fmt == 'jsonv2 ':
outfmt = 'json'
elif fmt == 'geojson ':
outfmt = 'geojson'
elif fmt == 'geocodejson ':
outfmt = 'geocodejson'
else:
outfmt = 'xml'

View File

@@ -0,0 +1,116 @@
<?php
namespace Nominatim;
require_once(CONST_BasePath.'/lib/lib.php');
require_once(CONST_BasePath.'/lib/DB.php');
// subclassing so we can set the protected connection variable
class NominatimSubClassedDB extends \Nominatim\DB
{
public function setConnection($oConnection)
{
$this->connection = $oConnection;
}
}
// phpcs:ignore PSR1.Classes.ClassDeclaration.MultipleClasses
class DBTest extends \PHPUnit\Framework\TestCase
{
public function testReusingConnection()
{
$oDB = new NominatimSubClassedDB('');
$oDB->setConnection('anything');
$this->assertTrue($oDB->connect());
}
public function testDatabaseExists()
{
$oDB = new \Nominatim\DB('');
$this->assertFalse($oDB->databaseExists());
}
public function testErrorHandling()
{
$this->expectException(DatabaseError::class);
$this->expectExceptionMessage('Failed to establish database connection');
$oDB = new \Nominatim\DB('pgsql:dbname=abc');
$oDB->connect();
}
public function testErrorHandling2()
{
$this->expectException(DatabaseError::class);
$this->expectExceptionMessage('Database query failed');
$oPDOStub = $this->getMockBuilder(PDO::class)
->setMethods(array('query', 'quote'))
->getMock();
$oPDOStub->method('query')
->will($this->returnCallback(function ($sVal) {
return "'$sVal'";
}));
$oPDOStub->method('query')
->will($this->returnCallback(function () {
throw new \PDOException('ERROR: syntax error at or near "FROM"');
}));
$oDB = new NominatimSubClassedDB('');
$oDB->setConnection($oPDOStub);
$oDB->getOne('SELECT name FROM');
}
public function testGetPostgresVersion()
{
$oDBStub = $this->getMockBuilder(\Nominatim\DB::class)
->disableOriginalConstructor()
->setMethods(array('getOne'))
->getMock();
$oDBStub->method('getOne')
->willReturn('100006');
$this->assertEquals(10, $oDBStub->getPostgresVersion());
}
public function testGetPostgisVersion()
{
$oDBStub = $this->getMockBuilder(\Nominatim\DB::class)
->disableOriginalConstructor()
->setMethods(array('getOne'))
->getMock();
$oDBStub->method('getOne')
->willReturn('2.4.4');
$this->assertEquals(2.4, $oDBStub->getPostgisVersion());
}
public function testParseDSN()
{
$this->assertEquals(
array(),
\Nominatim\DB::parseDSN('')
);
$this->assertEquals(
array(
'database' => 'db1',
'hostspec' => 'machine1'
),
\Nominatim\DB::parseDSN('pgsql:dbname=db1;host=machine1')
);
$this->assertEquals(
array(
'database' => 'db1',
'hostspec' => 'machine1',
'port' => '1234',
'username' => 'john',
'password' => 'secret'
),
\Nominatim\DB::parseDSN('pgsql:dbname=db1;host=machine1;port=1234;user=john;password=secret')
);
}
}

View File

@@ -10,7 +10,7 @@ class DatabaseErrorTest extends \PHPUnit\Framework\TestCase
public function testSqlMessage()
{
$oSqlStub = $this->getMockBuilder(\DB_Error::class)
$oSqlStub = $this->getMockBuilder(PDOException::class)
->setMethods(array('getMessage'))
->getMock();
@@ -21,9 +21,6 @@ class DatabaseErrorTest extends \PHPUnit\Framework\TestCase
$this->assertEquals('Sql error', $oErr->getMessage());
$this->assertEquals(123, $oErr->getCode());
$this->assertEquals('Unknown table.', $oErr->getSqlError());
// causes a circular reference warning during dump
// $this->assertRegExp('/Mock_DB_Error/', $oErr->getSqlDebugDump());
}
public function testSqlObjectDump()
@@ -31,14 +28,4 @@ class DatabaseErrorTest extends \PHPUnit\Framework\TestCase
$oErr = new DatabaseError('Sql error', 123, null, array('one' => 'two'));
$this->assertRegExp('/two/', $oErr->getSqlDebugDump());
}
public function testChksqlThrows()
{
$this->expectException(DatabaseError::class);
$this->expectExceptionMessage('My custom error message');
$this->expectExceptionCode(500);
$oDB = new \DB_Error;
$this->assertEquals(false, chksql($oDB, 'My custom error message'));
}
}

View File

@@ -4,6 +4,29 @@ namespace Nominatim;
require_once(CONST_BasePath.'/lib/Phrase.php');
class TokensFullSet
{
public function containsAny($sTerm)
{
return true;
}
}
// phpcs:ignore PSR1.Classes.ClassDeclaration.MultipleClasses
class TokensPartialSet
{
public function __construct($aTokens)
{
$this->aTokens = array_flip($aTokens);
}
public function containsAny($sTerm)
{
return isset($this->aTokens[$sTerm]);
}
}
// phpcs:ignore PSR1.Classes.ClassDeclaration.MultipleClasses
class PhraseTest extends \PHPUnit\Framework\TestCase
{
@@ -21,6 +44,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
public function testEmptyPhrase()
{
$oPhrase = new Phrase('', '');
$oPhrase->computeWordSets(new TokensFullSet());
$this->assertEquals(
array(array('')),
@@ -32,6 +56,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
public function testSingleWordPhrase()
{
$oPhrase = new Phrase('a', '');
$oPhrase->computeWordSets(new TokensFullSet());
$this->assertEquals(
'(a)',
@@ -43,20 +68,23 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
public function testMultiWordPhrase()
{
$oPhrase = new Phrase('a b', '');
$oPhrase->computeWordSets(new TokensFullSet());
$this->assertEquals(
'(a b),(a|b)',
$this->serializeSets($oPhrase->getWordSets())
);
$oPhrase = new Phrase('a b c', '');
$oPhrase->computeWordSets(new TokensFullSet());
$this->assertEquals(
'(a b c),(a|b c),(a|b|c),(a b|c)',
'(a b c),(a|b c),(a b|c),(a|b|c)',
$this->serializeSets($oPhrase->getWordSets())
);
$oPhrase = new Phrase('a b c d', '');
$oPhrase->computeWordSets(new TokensFullSet());
$this->assertEquals(
'(a b c d),(a|b c d),(a|b|c d),(a|b|c|d),(a|b c|d),(a b|c d),(a b|c|d),(a b c|d)',
'(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)',
$this->serializeSets($oPhrase->getWordSets())
);
}
@@ -65,25 +93,47 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
public function testInverseWordSets()
{
$oPhrase = new Phrase('a b c', '');
$oPhrase->computeWordSets(new TokensFullSet());
$oPhrase->invertWordSets();
$this->assertEquals(
'(a b c),(c|a b),(c|b|a),(b c|a)',
'(a b c),(b c|a),(c|a b),(c|b|a)',
$this->serializeSets($oPhrase->getWordSets())
);
}
public function testMaxDepth()
public function testMaxWordSets()
{
$oPhrase = new Phrase(join(' ', array_fill(0, 4, 'a')), '');
$oPhrase->computeWordSets(new TokensFullSet());
$this->assertEquals(8, count($oPhrase->getWordSets()));
$oPhrase->invertWordSets();
$this->assertEquals(8, count($oPhrase->getWordSets()));
$oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
$this->assertEquals(41226, count($oPhrase->getWordSets()));
$oPhrase->computeWordSets(new TokensFullSet());
$this->assertEquals(100, count($oPhrase->getWordSets()));
$oPhrase->invertWordSets();
$this->assertEquals(41226, count($oPhrase->getWordSets()));
$this->assertEquals(100, count($oPhrase->getWordSets()));
}
public function testPartialTokensShortTerm()
{
$oPhrase = new Phrase('a b c d', '');
$oPhrase->computeWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
$this->assertEquals(
'(a|b c d),(a|b c|d)',
$this->serializeSets($oPhrase->getWordSets())
);
}
public function testPartialTokensLongTerm()
{
$oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
$oPhrase->computeWordSets(new TokensPartialSet(array('a', 'a a a a a')));
$this->assertEquals(80, count($oPhrase->getWordSets()));
}
}

View File

@@ -2,7 +2,7 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/db.php');
require_once(CONST_BasePath.'/lib/DB.php');
require_once(CONST_BasePath.'/lib/Status.php');
@@ -23,19 +23,20 @@ class StatusTest extends \PHPUnit\Framework\TestCase
public function testNoDatabaseConnectionFail()
{
$this->expectException(\Exception::class);
$this->expectExceptionMessage('No database');
$this->expectExceptionMessage('Database connection failed');
$this->expectExceptionCode(700);
// causes 'Non-static method should not be called statically, assuming $this from incompatible context'
// failure on travis
// $oDB = \DB::connect('', false); // returns a DB_Error instance
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
->setMethods(array('connect'))
->getMock();
$oDB = new \DB_Error;
$oStatus = new Status($oDB);
$this->assertEquals('No database', $oStatus->status());
$oDbStub->method('connect')
->will($this->returnCallback(function () {
throw new \Nominatim\DatabaseError('psql connection problem', 500, null, 'unknown database');
}));
$oDB = null;
$oStatus = new Status($oDB);
$oStatus = new Status($oDbStub);
$this->assertEquals('No database', $oStatus->status());
}
@@ -47,8 +48,8 @@ class StatusTest extends \PHPUnit\Framework\TestCase
$this->expectExceptionCode(702);
// stub has getOne method but doesn't return anything
$oDbStub = $this->getMockBuilder(\DB::class)
->setMethods(array('getOne'))
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
->setMethods(array('connect', 'getOne'))
->getMock();
$oStatus = new Status($oDbStub);
@@ -62,8 +63,8 @@ class StatusTest extends \PHPUnit\Framework\TestCase
$this->expectExceptionMessage('No value');
$this->expectExceptionCode(704);
$oDbStub = $this->getMockBuilder(\DB::class)
->setMethods(array('getOne'))
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
->setMethods(array('connect', 'getOne'))
->getMock();
// return no word_id
@@ -80,8 +81,8 @@ class StatusTest extends \PHPUnit\Framework\TestCase
public function testOK()
{
$oDbStub = $this->getMockBuilder(\DB::class)
->setMethods(array('getOne'))
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
->setMethods(array('connect', 'getOne'))
->getMock();
$oDbStub->method('getOne')
@@ -96,7 +97,7 @@ class StatusTest extends \PHPUnit\Framework\TestCase
public function testDataDate()
{
$oDbStub = $this->getMockBuilder(\DB::class)
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
->setMethods(array('getOne'))
->getMock();

View File

@@ -2,8 +2,6 @@
namespace Nominatim;
// require_once(CONST_BasePath.'/lib/db.php');
// require_once(CONST_BasePath.'/lib/cmd.php');
require_once(CONST_BasePath.'/lib/TokenList.php');
@@ -56,9 +54,18 @@ class TokenTest extends \PHPUnit\Framework\TestCase
{
$this->expectOutputRegex('/<p><tt>/');
$oDbStub = $this->getMockBuilder(\DB::class)
->setMethods(array('getAll'))
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
->setMethods(array('getAll', 'getDBQuotedList'))
->getMock();
$oDbStub->method('getDBQuotedList')
->will($this->returnCallback(function ($aVals) {
return array_map(function ($sVal) {
return "'".$sVal."'";
}, $aVals);
}));
$oDbStub->method('getAll')
->will($this->returnCallback(function ($sql) {
$aResults = array();

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python3
import sys
from osmium.replication import server

View File

@@ -41,7 +41,8 @@
'path' => 27
);
$oDB =& getDB();
$oDB = new Nominatim\DB();
$oDB->connect();
if (isset($aCMDResult['output-type'])) {
if (!isset($aRankmap[$aCMDResult['output-type']])) fail('unknown output-type: '.$aCMDResult['output-type']);
@@ -55,7 +56,7 @@
$oParams = new Nominatim\ParameterParser();
if (!isset($aCMDResult['language'])) $aCMDResult['language'] = 'xx';
$aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
$sLanguagePrefArraySQL = 'ARRAY['.join(',', array_map('getDBQuoted', $aLangPrefOrder)).']';
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
// output formatting: build up a lookup table that maps address ranks to columns
$aColumnMapping = array();
@@ -95,7 +96,7 @@
$sPlacexSQL .= ' and rank_address = '.$iOutputRank;
if (isset($aCMDResult['restrict-to-country'])) {
$sPlacexSQL .= ' and country_code = '.getDBQuoted($aCMDResult['restrict-to-country']);
$sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']);
}
// restriction to parent place id
@@ -115,11 +116,8 @@
$sOsmId = $aCMDResult['restrict-to-osm-relation'];
}
if ($sOsmType) {
$sSQL = 'select place_id from placex where';
$sSQL .= ' osm_type = '.getDBQuoted($sOsmType);
$sSQL .= ' and osm_id = '.$sOsmId;
$sParentId = $oDB->getOne($sSQL);
if (PEAR::isError($sParentId)) fail(pg_last_error($oDB->connection));
$sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
$sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
if (!$sParentId) fail('Could not find place '.$sOsmType.' '.$sOsmId);
}
if ($sParentId) {
@@ -131,18 +129,15 @@
// Iterate over placeids
// to get further hierarchical information
//var_dump($sPlacexSQL);
$aRes =& $oDB->query($sPlacexSQL);
if (PEAR::isError($aRes)) fail(pg_last_error($oDB->connection));
$oResults = $oDB->getQueryStatement($sPlacexSQL);
$fOutstream = fopen('php://output', 'w');
while ($aRes->fetchInto($aRow)) {
//var_dump($aRow);
while ($aRow = $oResults->fetch()) {
//var_dump($aRow);
$iPlaceID = $aRow['place_id'];
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID, -1)";
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
$sSQL .= ' WHERE isaddress';
$sSQL .= ' order by rank_address desc,isaddress desc';
$aAddressLines = $oDB->getAll($sSQL);
if (PEAR::IsError($aAddressLines)) fail(pg_last_error($oDB->connection));
$aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID));
$aOutput = array_fill(0, $iNumCol, '');
// output address parts
@@ -157,10 +152,10 @@
$sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
$sSQL .= 'on px.place_id = pa.address_place_id ';
$sSQL .= 'where pa.cached_rank_address in (5,11) ';
$sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in ('.substr($aRow['place_ids'], 1, -1).')) ';
$sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) ';
$sSQL .= 'group by postcode order by count(*) desc limit 1';
$sRes = $oDB->getOne($sSQL);
if (PEAR::IsError($sRes)) fail(pg_last_error($oDB->connection));
$sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1)));
$aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
} else {
$aOutput[$aColumnMapping['postcode']] = $aRow['postcode'];

View File

@@ -48,7 +48,8 @@ exit;
$a = array();
$a[] = 'test';
$oDB &= getDB();
$oDB = new Nominatim\DB();
$oDB->connect();
if ($aCMDResult['drop-tables'])
{
@@ -304,7 +305,9 @@ function _templatesToProperties($aTemplates)
}
if (isset($aCMDResult['parse-wikipedia'])) {
$oDB =& getDB();
$oDB = new Nominatim\DB();
$oDB->connect();
$sSQL = 'select page_title from content where page_namespace = 0 and page_id %10 = ';
$sSQL .= $aCMDResult['parse-wikipedia'];
$sSQL .= ' and (page_content ilike \'%{{Coord%\' or (page_content ilike \'%lat%\' and page_content ilike \'%lon%\'))';
@@ -366,7 +369,9 @@ function nominatimXMLEnd($hParser, $sName)
if (isset($aCMDResult['link'])) {
$oDB =& getDB();
$oDB = new Nominatim\DB();
$oDB->connect();
$aWikiArticles = $oDB->getAll("select * from wikipedia_article where language = 'en' and lat is not null and osm_type is null and totalcount < 31 order by importance desc limit 200000");
// If you point this script at production OSM you will be blocked

View File

@@ -1,53 +0,0 @@
#!/bin/bash
psqlcmd=psql wikipedia2013
mysql2pgsqlcmd=./mysql2pgsql.perl /dev/stdin /dev/stdout
language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
# wikipedia pages and links
echo "CREATE TABLE linkcounts (language text, title text, count integer, sumcount integer, lat double, lon double );" | $psqlcmd
echo "CREATE TABLE wikipedia_redirect (language text, from_title text, to_title text );" | $psqlcmd
for i in "${language[@]}"
do
wget http://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-page.sql.gz
wget http://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-pagelinks.sql.gz
wget http://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-langlinks.sql.gz
wget http://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-redirect.sql.gz
done
for i in "${language[@]}"
do
gzip -dc ${i}wiki-latest-pagelinks.sql.gz | sed "s/\`pagelinks\`/\`${i}pagelinks\`/g" | $mysql2pgsqlcmd | $psqlcmd
gzip -dc ${i}wiki-latest-page.sql.gz | sed "s/\`page\`/\`${i}page\`/g" | $mysql2pgsqlcmd | $psqlcmd
gzip -dc ${i}wiki-latest-langlinks.sql.gz | sed "s/\`langlinks\`/\`${i}langlinks\`/g" | $mysql2pgsqlcmd | $psqlcmd
gzip -dc ${i}wiki-latest-redirect.sql.gz | sed "s/\`redirect\`/\`${i}redirect\`/g" | $mysql2pgsqlcmd | $psqlcmd
done
for i in "${language[@]}"
do
echo "create table ${i}pagelinkcount as select pl_title as title,count(*) as count from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | $psqlcmd
echo "insert into linkcounts select '${i}',pl_title,count(*) from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | $psqlcmd
echo "insert into wikipedia_redirect select '${i}',page_title,rd_title from ${i}redirect join ${i}page on (rd_from = page_id) where page_namespace = 0 and rd_namespace = 0;" | $psqlcmd
echo "alter table ${i}pagelinkcount add column othercount integer;" | $psqlcmd
echo "update ${i}pagelinkcount set othercount = 0;" | $psqlcmd
for j in "${language[@]}"
do
echo "update ${i}pagelinkcount set othercount = ${i}pagelinkcount.othercount + x.count from (select page_title as title,count from ${i}langlinks join ${i}page on (ll_from = page_id) join ${j}pagelinkcount on (ll_lang = '${j}' and ll_title = title)) as x where x.title = ${i}pagelinkcount.title;" | $psqlcmd
done
echo "insert into wikipedia_article select '${i}', title, count, othercount, count+othercount from ${i}pagelinkcount;" | $psqlcmd
done
echo "update wikipedia_article set importance = log(totalcount)/log((select max(totalcount) from wikipedia_article))" | $psqlcmd
# precalculated lat,lon from dbpedia
wget http://downloads.dbpedia.org/current/en/geo_coordinates_en.nq.bz2
bzip2 -dc geo_coordinates_en.nq.bz2 | grep http://www.georss.org/georss/point | sed 's|<http://dbpedia.org/resource/[^>]*> *<http://www.georss.org/georss/point> "\(-\?[-0-9.E]\+\) \(-\?[-0-9.E]\+\)"@en <http://\([a-z][a-z]\).wikipedia.org/wiki/\([^#]\+\)#> .|update pagelinks set lat=\1, lon=\2 where language = '"'"'\3'"'"' and title = decode_url_part('"'"'\4'"'"');|g' | $psqlcmd
# media wiki dumper
wget https://github.com/bcollier/mwdumper/blob/master/build/mwdumper.jar
# latest english wikipedia articles
wget http://dumps.wikimedia.org/enwiki/latest/enwiki-latest-pages-articles.xml.bz2
java -jar mwdumper.jar --format=sql:1.5 enwiki-latest-pages-articles.xml.bz2 | ./mysql2pgsql.perl /dev/stdin /dev/stdout | sed 's/"text (/text ("/g' | sed 's/"old_flags)"/"old_flags")/g' | sed 's/"revision (/revision ("/g' | sed 's/"rev_deleted)"/"rev_deleted")/g' | sed 's/"page (/page ("/g' | sed 's/"page_len)"/"page_len")/g' | sed "s/DATE_ADD(E'1970-01-01', INTERVAL UNIX_TIMESTAMP() SECOND)[+]//g" | sed 's/RAND()/0/g' | $psqlcmd

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python
#!/usr/bin/env python3
import osmium
import sys

View File

@@ -25,7 +25,9 @@ $aCMDOptions
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
$oDB =& getDB();
$oDB = new Nominatim\DB;
$oDB->connect();
$oParams = new Nominatim\ParameterParser($aCMDResult);
if ($oParams->getBool('search')) {

View File

@@ -84,9 +84,7 @@ if ($aCMDResult['setup-db'] || $aCMDResult['all']) {
}
// Try accessing the C module, so we know early if something is wrong
if (!checkModulePresence()) {
fail('error loading nominatim.so module');
}
checkModulePresence(); // raises exception on failure
if ($aCMDResult['import-data'] || $aCMDResult['all']) {
$bDidSomething = true;

View File

@@ -27,77 +27,127 @@ if ($aCMDResult['wiki-import']) {
foreach (explode(',', $sLanguageIn) as $sLanguage) {
$sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
$sWikiPageXML = file_get_contents($sURL);
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
if ($oNormalizer !== null) {
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
} else {
$sTrans = null;
}
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with
// quotes into the wiki
$sType = preg_replace('/&quot;/', '', $sType);
// sanity check, in case somebody added garbage in the wiki
if (preg_match('/^\\w+$/', $sClass) < 1
|| preg_match('/^\\w+$/', $sType) < 1
) {
trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
exit;
}
// blacklisting: disallow certain class/type combinations
if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
// fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
continue;
}
// whitelisting: if class is in whitelist, allow only tags in the list
if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
// fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
continue;
}
$aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
switch (trim($aMatch[4])) {
case 'near':
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'near');\n";
break;
case 'in':
echo "select getorcreate_amenityoperator(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType', 'in');\n";
break;
default:
echo "select getorcreate_amenity(make_standard_name('".pg_escape_string($sLabel)."'), '$sTrans', '$sClass', '$sType');\n";
break;
}
if (!preg_match_all(
'#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
$sWikiPageXML,
$aMatches,
PREG_SET_ORDER
)) {
continue;
}
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
if ($oNormalizer !== null) {
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
} else {
$sTrans = null;
}
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with
// quotes into the wiki
$sType = preg_replace('/(&quot;|")/', '', $sType);
// sanity check, in case somebody added garbage in the wiki
if (preg_match('/^\\w+$/', $sClass) < 1
|| preg_match('/^\\w+$/', $sType) < 1
) {
trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
exit;
}
// blacklisting: disallow certain class/type combinations
if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
// fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
continue;
}
// whitelisting: if class is in whitelist, allow only tags in the list
if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
// fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
continue;
}
$aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
switch (trim($aMatch[4])) {
case 'near':
printf(
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
case 'in':
printf(
"SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
default:
printf(
"SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
pg_escape_string($sLabel),
$sTrans,
$sClass,
$sType
);
break;
}
}
}
echo 'create index idx_placex_classtype on placex (class, type);';
echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
foreach ($aPairs as $aPair) {
echo 'create table place_classtype_'.pg_escape_string($aPair[0]).'_'.pg_escape_string($aPair[1]);
if (CONST_Tablespace_Aux_Data)
echo ' tablespace '.CONST_Tablespace_Aux_Data;
echo ' as select place_id as place_id,st_centroid(geometry) as centroid from placex where ';
echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."'";
echo ";\n";
$sql_tablespace = CONST_Tablespace_Aux_Data ? ' TABLESPACE '.CONST_Tablespace_Aux_Data : '';
echo 'CREATE INDEX idx_place_classtype_'.pg_escape_string($aPair[0]).'_'.pg_escape_string($aPair[1]).'_centroid ';
echo 'ON place_classtype_'.pg_escape_string($aPair[0]).'_'.pg_escape_string($aPair[1]).' USING GIST (centroid)';
if (CONST_Tablespace_Aux_Index)
echo ' tablespace '.CONST_Tablespace_Aux_Index;
echo ";\n";
printf(
'CREATE TABLE place_classtype_%s_%s'
. $sql_tablespace
. ' AS'
. ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
. " WHERE class = '%s' AND type = '%s'"
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
echo 'CREATE INDEX idx_place_classtype_'.pg_escape_string($aPair[0]).'_'.pg_escape_string($aPair[1]).'_place_id ';
echo 'ON place_classtype_'.pg_escape_string($aPair[0]).'_'.pg_escape_string($aPair[1]).' USING btree(place_id)';
if (CONST_Tablespace_Aux_Index)
echo ' tablespace '.CONST_Tablespace_Aux_Index;
echo ";\n";
printf(
'CREATE INDEX idx_place_classtype_%s_%s_centroid'
. ' ON place_classtype_%s_%s USING GIST (centroid)'
. $sql_tablespace
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
echo 'GRANT SELECT ON place_classtype_'.pg_escape_string($aPair[0]).'_'.pg_escape_string($aPair[1]).' TO "'.CONST_Database_Web_User."\";\n";
printf(
'CREATE INDEX idx_place_classtype_%s_%s_place_id'
. ' ON place_classtype_%s_%s USING btree(place_id)'
. $sql_tablespace
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1])
);
printf(
'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
. ";\n",
pg_escape_string($aPair[0]),
pg_escape_string($aPair[1]),
CONST_Database_Web_User
);
}
echo 'drop index idx_placex_classtype;';
echo 'DROP INDEX idx_placex_classtype;';
}

View File

@@ -52,9 +52,10 @@ if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
date_default_timezone_set('Etc/UTC');
$oDB =& getDB();
$oDB = new Nominatim\DB();
$oDB->connect();
$aDSNInfo = DB::parseDSN(CONST_Database_DSN);
$aDSNInfo = Nominatim\DB::parseDSN(CONST_Database_DSN);
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
// cache memory to be used by osm2pgsql, should not be more than the available memory
@@ -115,7 +116,7 @@ if ($aResult['init-updates']) {
}
$sDatabaseDate = getDatabaseDate($oDB);
if ($sDatabaseDate === false) {
if (!$sDatabaseDate) {
fail('Cannot determine date of database.');
}
$sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
@@ -128,10 +129,13 @@ if ($aResult['init-updates']) {
fail('Error running pyosmium tools');
}
pg_query($oDB->connection, 'TRUNCATE import_status');
$oDB->exec('TRUNCATE import_status');
$sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
$sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
if (!pg_query($oDB->connection, $sSQL)) {
try {
$oDB->exec($sSQL);
} catch (\Nominatim\DatabaseError $e) {
fail('Could not enter sequence into database.');
}
@@ -139,7 +143,7 @@ if ($aResult['init-updates']) {
}
if ($aResult['check-for-updates']) {
$aLastState = chksql($oDB->getRow('SELECT sequence_id FROM import_status'));
$aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
if (!$aLastState['sequence_id']) {
fail('Updates not set up. Please run ./utils/update.php --init-updates.');
@@ -219,20 +223,21 @@ if ($bHaveDiff) {
}
if ($aResult['deduplicate']) {
$oDB =& getDB();
$oDB = new Nominatim\DB();
$oDB->connect();
if (getPostgresVersion($oDB) < 9.3) {
if ($oDB->getPostgresVersion() < 9.3) {
fail('ERROR: deduplicate is only currently supported in postgresql 9.3');
}
$sSQL = 'select partition from country_name order by country_code';
$aPartitions = chksql($oDB->getCol($sSQL));
$aPartitions = $oDB->getCol($sSQL);
$aPartitions[] = 0;
// we don't care about empty search_name_* partitions, they can't contain mentions of duplicates
foreach ($aPartitions as $i => $sPartition) {
$sSQL = 'select count(*) from search_name_'.$sPartition;
$nEntries = chksql($oDB->getOne($sSQL));
$nEntries = $oDB->getOne($sSQL);
if ($nEntries == 0) {
unset($aPartitions[$i]);
}
@@ -241,7 +246,7 @@ if ($aResult['deduplicate']) {
$sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '";
$sSQL .= ' and class is null and type is null and country_code is null';
$sSQL .= ' group by word_token having count(*) > 1 order by word_token';
$aDuplicateTokens = chksql($oDB->getAll($sSQL));
$aDuplicateTokens = $oDB->getAll($sSQL);
foreach ($aDuplicateTokens as $aToken) {
if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
echo 'Deduping '.$aToken['word_token']."\n";
@@ -249,7 +254,7 @@ if ($aResult['deduplicate']) {
$sSQL .= ' (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num';
$sSQL .= " from word where word_token = '".$aToken['word_token'];
$sSQL .= "' and class is null and type is null and country_code is null order by num desc";
$aTokenSet = chksql($oDB->getAll($sSQL));
$aTokenSet = $oDB->getAll($sSQL);
$aKeep = array_shift($aTokenSet);
$iKeepID = $aKeep['word_id'];
@@ -259,32 +264,32 @@ if ($aResult['deduplicate']) {
$sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.'),';
$sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
$sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
chksql($oDB->query($sSQL));
$oDB->exec($sSQL);
$sSQL = 'update search_name set';
$sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
$sSQL .= ' where nameaddress_vector @> ARRAY['.$aRemove['word_id'].']';
chksql($oDB->query($sSQL));
$oDB->exec($sSQL);
$sSQL = 'update location_area_country set';
$sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
$sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
chksql($oDB->query($sSQL));
$oDB->exec($sSQL);
foreach ($aPartitions as $sPartition) {
$sSQL = 'update search_name_'.$sPartition.' set';
$sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.')';
$sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
chksql($oDB->query($sSQL));
$oDB->exec($sSQL);
$sSQL = 'update location_area_country set';
$sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
$sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
chksql($oDB->query($sSQL));
$oDB->exec($sSQL);
}
$sSQL = 'delete from word where word_id = '.$aRemove['word_id'];
chksql($oDB->query($sSQL));
$oDB->exec($sSQL);
}
}
}
@@ -306,7 +311,7 @@ if ($aResult['index']) {
runWithEnv($sCmd, $aProcEnv);
$oDB->query('update import_status set indexed = true');
$oDB->exec('update import_status set indexed = true');
}
if ($aResult['update-address-levels']) {
@@ -335,7 +340,7 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
while (true) {
$fStartTime = time();
$aLastState = chksql($oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status'));
$aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
if (!$aLastState['sequence_id']) {
echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
@@ -347,7 +352,7 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
$sBatchEnd = $aLastState['lastimportdate'];
$iEndSequence = $aLastState['sequence_id'];
if ($aLastState['indexed'] == 't') {
if ($aLastState['indexed']) {
// Sleep if the update interval has not yet been reached.
$fNextUpdate = $aLastState['unix_ts'] + CONST_Replication_Update_Interval;
if ($fNextUpdate > $fStartTime) {
@@ -413,12 +418,12 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
$sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
$sSQL .= date('Y-m-d H:i:s')."','import')";
var_Dump($sSQL);
chksql($oDB->query($sSQL));
$oDB->exec($sSQL);
// update the status
$sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
var_Dump($sSQL);
chksql($oDB->query($sSQL));
$oDB->exec($sSQL);
echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
}
@@ -440,11 +445,11 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
$sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
$sSQL .= date('Y-m-d H:i:s')."','index')";
var_Dump($sSQL);
$oDB->query($sSQL);
$oDB->exec($sSQL);
echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
$sSQL = 'update import_status set indexed = true';
$oDB->query($sSQL);
$oDB->exec($sSQL);
} else {
if ($aResult['import-osmosis-all']) {
echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";

View File

@@ -18,7 +18,8 @@ require_once(CONST_BasePath.'/lib/Geocode.php');
require_once(CONST_BasePath.'/lib/PlaceLookup.php');
require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
$oDB =& getDB();
$oDB = new Nominatim\DB();
$oDB->connect();
$bVerbose = $aResult['verbose'];

View File

@@ -23,7 +23,7 @@
sudo yum install -y postgresql-server postgresql-contrib postgresql-devel \
postgis postgis-utils \
wget git cmake make gcc gcc-c++ libtool policycoreutils-python \
php-pgsql php php-pear php-pear-DB php-intl libpqxx-devel \
php-pgsql php php-intl libpqxx-devel \
proj-epsg bzip2-devel proj-devel libxml2-devel boost-devel \
expat-devel zlib-devel
@@ -34,7 +34,9 @@
sudo yum install -y python34-pip python34-setuptools python34-devel \
php-phpunit-PHPUnit
pip3 install --user behave nose pytidylib psycopg2
sudo pear install PHP_CodeSniffer
composer global require "squizlabs/php_codesniffer=*"
sudo ln -s ~/.config/composer/vendor/bin/phpcs /usr/bin/
#
# System Configuration

View File

@@ -29,7 +29,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
libbz2-dev libpq-dev libproj-dev \
postgresql-server-dev-9.5 postgresql-9.5-postgis-2.2 \
postgresql-contrib-9.5 \
apache2 php php-pgsql libapache2-mod-php php-pear php-db \
apache2 php php-pgsql libapache2-mod-php \
php-intl git
# If you want to run the test suite, you need to install the following
@@ -39,7 +39,9 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
python3-psycopg2 python3-tidylib phpunit php-cgi
pip3 install --user behave nose
sudo pear install PHP_CodeSniffer
composer global require "squizlabs/php_codesniffer=*"
sudo ln -s ~/.config/composer/vendor/bin/phpcs /usr/bin/
#
# System Configuration

Some files were not shown because too many files have changed in this diff Show More