Compare commits

...

1031 Commits

Author SHA1 Message Date
Sarah Hoffmann
b51efd876f prepare release 3.7.2 2021-05-30 16:15:22 +02:00
Sarah Hoffmann
405279dae3 commit changes to replication log table
Fixes #2350.
2021-05-30 15:09:36 +02:00
mogita
3dd223dfa3 fix: add the missing question mark 2021-05-30 15:08:33 +02:00
Frederik Ramm
7e6125ada5 Add array_key_last function for PHP <7.3
This patch adds an array_key_last function if it doesn't yet exist, fixes #2316. It is tested on PHP 7.2.24 but not PHP 7.3.
2021-05-30 15:06:49 +02:00
Sarah Hoffmann
93b72e4e9b correctly catch the exception when import date is missing 2021-05-30 15:05:55 +02:00
Sarah Hoffmann
3bf57e6d2a do not check for extra housenumber index for reverse-only
Also adds a database check for reverse only import to the CI.
2021-05-30 15:02:31 +02:00
Sarah Hoffmann
e9ce5714a7 prepare release 3.7.1 2021-04-26 11:23:14 +02:00
Sarah Hoffmann
ff1a49a0f8 rebuild manpage 2021-04-26 11:23:14 +02:00
Sarah Hoffmann
ec2873fbf6 avoid Path in subprocess parameters
Not supported by Python 3.5.
2021-04-26 11:23:14 +02:00
Sarah Hoffmann
5dee3cf7f7 add warming to CI import test 2021-04-26 11:23:14 +02:00
AntoJvlt
341cb55690 Only log a warning if a wrong input is detected on the wiki while importing special phrases 2021-04-26 09:31:08 +02:00
Sarah Hoffmann
055b5a46ba docs: update deployment to use project directory
Fixes #2295.
2021-04-24 16:24:14 +02:00
Sarah Hoffmann
6e8d4c0dac actions: add import on ubuntu 18.04
This uses oldest possible dependencies where possible.
2021-04-24 15:14:35 +02:00
Sarah Hoffmann
baf684195b use group() for regex matches
Needed for compatibility with Python 3.5.
2021-04-24 15:13:56 +02:00
Sarah Hoffmann
b0832669a3 use pathlib version of open 2021-04-24 15:13:47 +02:00
Sarah Hoffmann
243394b1c2 subprocess needs string argument
Compatibility change for Python 3.5.
2021-04-24 15:13:38 +02:00
Sarah Hoffmann
0f14851c98 check for existance of custom .env before opening 2021-04-24 15:13:14 +02:00
Sarah Hoffmann
b15a687564 use more generic ImportError to check for module
ModuleNotFoundError was only introduced in Python 3.6.
2021-04-24 15:13:06 +02:00
Sarah Hoffmann
0289d125da replace usages of fromisoformat() with strptime()
fromisoformat was only introduced with Python 3.7 while we
still support Python 3.5.

Fixes #2292.
2021-04-24 15:12:57 +02:00
Sarah Hoffmann
76ee6959c1 remove argparse dependency for vagrant scripts
Users don't need to recreate the manpage.
2021-04-24 15:12:49 +02:00
RhinoDevel
74c74dde07 Replace "nominatim-update" with "nominatim".
If I am not mistaken, the correct command to index imported data via commandline is "nominatim index".
2021-04-24 15:12:22 +02:00
AntoJvlt
f7e4bfa980 Fix occurence regex 2021-04-24 15:11:02 +02:00
Channgping Chen
4a6e9ba187 fix index on location_property_tiger (parent_place_id)
Looks like 2af82975cd
accidentally renamed an index. Because of the added "if not
exists" clause, the index doesn't get created. This
significantly slows down reverse queries because they now
require full scans on location_property_tiger.

Without this fix, reverse queries can take 8s on a full
planet install on an r5.8xlarge instance in EC2.
2021-04-24 15:10:46 +02:00
Sarah Hoffmann
10d99893f9 add support index when continuing import at index phase
Indexing scans the placex table sequentially during indexing
on the initial import. That is okay because we know that all
rows need to be processed anywhere. When continuing the import,
however, a large part might already be indexed, so that the
process spends a lot of time going through rows that are no
longer of interest. Create a supporting index for all unindexed
rows to speed up the scan. This is the same index as used later
for updates.
2021-04-24 15:09:55 +02:00
Sarah Hoffmann
b7a5a8b5f7 github actions: reintroduce postgresql repo 2021-04-24 15:07:59 +02:00
Darkshredder
6ed495bfc2 Fix: Removed error if endstatement is wrong and improved tests 2021-04-24 15:07:37 +02:00
AntoJvlt
cd7c841f5b Fix default languages loading 2021-04-24 15:06:22 +02:00
Sarah Hoffmann
8f9c10c762 split LANGUAGES parameter before use
The user supplies the languages as a comma-separated list.
2021-04-24 15:06:02 +02:00
Sarah Hoffmann
492186716f prepare 3.7.0 release 2021-04-06 21:23:29 +02:00
Sarah Hoffmann
07fda48cee docs: minor spelling corrections 2021-04-06 16:09:53 +02:00
Sarah Hoffmann
4b31be5203 docs: unpacking tiger data is no longer necessary 2021-04-06 15:56:08 +02:00
Sarah Hoffmann
5d69c7ade1 Merge pull request #2250 from lonvia/save-transliterated-housenumbers
Switch to saving transliterated housenumbers in placex
2021-04-05 15:48:22 +02:00
Sarah Hoffmann
96b0699621 add migration for transliterated housenumbers 2021-04-04 15:26:47 +02:00
Sarah Hoffmann
6cbef84cad use new transliteration in initial housenumber word computation
The new create_housenumber_id() function splits housenumber
lists correctly. Otherwise there is no difference.
2021-04-04 15:26:47 +02:00
Sarah Hoffmann
55fcc44c8c correctly handle housenumber lists
Lists are now standardised to use a semicolon separator.
2021-04-04 15:26:47 +02:00
Sarah Hoffmann
16a66b5326 move transliteration of housenumbers into indexing
Housenumbers are now saved in transliterated form in the housenumber
column. This saves the transliteration step during lookup.
2021-04-04 15:26:47 +02:00
Sarah Hoffmann
3590e76a1c tests for finding non-ascii housenumbers 2021-04-04 15:26:47 +02:00
Sarah Hoffmann
0ec3fdd3ba return housenumbers always from address field
This means that we can use normalized versions of the
housenumber in the housenumber field as it is no longer
a user visible field.
2021-04-04 15:26:47 +02:00
Sarah Hoffmann
c0f0b66509 Merge pull request #2248 from darkshredder/special-term-test
Added Test for TokenSpecialTerm
2021-04-03 18:31:01 +02:00
Darkshredder
0f9df32d11 Added Test for TokenSpecialTerm 2021-04-02 04:49:05 +05:30
Sarah Hoffmann
a370c8be4b Merge pull request #2247 from lonvia/index-for-housenumber-lookup
Index for housenumber lookup
2021-04-01 18:35:00 +02:00
Sarah Hoffmann
d6e0bc698e add recommendation for Postgresql 11+ 2021-04-01 17:10:44 +02:00
Sarah Hoffmann
8d8b1d4307 use non-key index to speed up housenumber search
On Postgresql versions 11+ add an index to speed up the lookup
of housenumbers for terms found in search_name. This is really
just a band-aid around the query planer's interpretation of the
query.
2021-04-01 17:10:44 +02:00
Sarah Hoffmann
8dbfdd59b0 Merge pull request #2243 from darkshredder/XML-format-fix
Fixed: XML format: more_url points to localhost, not base URL
2021-03-30 09:19:01 +02:00
Sarah Hoffmann
cd03882536 Merge pull request #2244 from AntoJvlt/import-special-phrases-tests-cleaning
Cleaned tests for special phrases.
2021-03-30 09:17:27 +02:00
Darkshredder
0b154a2a1a Added HTTP_HOST to if statement 2021-03-30 03:02:55 +05:30
AntoJvlt
e82de99e5a Cleaned tests of exceptions and fix phrase_settings.json test file name. 2021-03-29 22:07:29 +02:00
Darkshredder
27b379c1e3 fixed: XML format: more_url points to localhost, not base URL 2021-03-30 01:02:43 +05:30
Sarah Hoffmann
f9517e9143 Merge pull request #2234 from darkshredder/add-man-page
Added Manual page for Nominatim tool
2021-03-29 14:25:10 +02:00
Sarah Hoffmann
e05dee6df5 allow sorting by housenumbers for rare street names
Usually we don't narrow down search results by house number when
only a street name is given because there may be a lot of rows
to cross check when the street name is very frequent. However,
when it is known to be rare, the housenumber check may be done
anyway.

Fixes #2238.
2021-03-29 12:06:51 +02:00
Darkshredder
3fad492c6f Update manpage after rebase 2021-03-29 14:27:06 +05:30
Darkshredder
b7d6ae93e3 Nominatim/cli.py rebase fixes 2021-03-29 14:16:41 +05:30
Darkshredder
21b1b75b08 Rebase with master 2021-03-29 14:00:45 +05:30
Darkshredder
bbe0353b23 fixed indentation and used sed to remove AUTHORS section 2021-03-29 13:57:13 +05:30
Darkshredder
51e2654cd2 Added Manual page and fixed documentation 2021-03-29 13:57:13 +05:30
Sarah Hoffmann
09b2510219 Merge pull request #2228 from AntoJvlt/import-special-phrases-porting-python
Import special phrases porting python
2021-03-29 09:49:35 +02:00
AntoJvlt
57ce75eb67 Change command 'import-special-phrases --from-wiki' to 'special-phrases --import-from-wiki'. 2021-03-26 02:22:38 +01:00
AntoJvlt
cde9389e75 Errors fixes, Cleaning code, Improvement and addition of tests 2021-03-26 01:53:33 +01:00
AntoJvlt
2c19bd5ea3 Encapsulation of tools/special_phrases.py into SpecialPhrasesImporter class and add new tests. 2021-03-25 21:13:57 +01:00
AntoJvlt
ff34198569 Code cleaning, tests simplification and use of python3-icu package 2021-03-23 23:56:39 +01:00
AntoJvlt
919469c8fe Updated documentation for PyICU support 2021-03-23 23:34:19 +01:00
AntoJvlt
1ce8b530cd Introduction of PyICU for transliteration in python. Reversed changes in normalization.sql. 2021-03-23 23:34:16 +01:00
AntoJvlt
2fb6018078 Added wrapper in specialphrases.php to call corresponding nominatim command. 2021-03-23 23:30:42 +01:00
AntoJvlt
6d56cbb3e8 Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file. 2021-03-23 23:30:39 +01:00
AntoJvlt
1a93319093 Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file. 2021-03-23 23:27:56 +01:00
Sarah Hoffmann
28b4fb12b6 Merge pull request #2233 from lonvia/index-for-postcode-ids
Create postcode id index earlier
2021-03-23 09:18:10 +01:00
Sarah Hoffmann
5dabc0aca8 create postcode id index earlier
Now that the indexer takes care of indexing the postcode tables,
the id index is needed to find the rows to index.
2021-03-22 22:24:56 +01:00
Sarah Hoffmann
4f1bdde32e Merge pull request #2231 from mtmail/correct-cli-help-page
nominatim -h was printing wrong text for lookup and details
2021-03-21 16:52:20 +01:00
Sarah Hoffmann
a08ca5b1b5 avoid division by zero in progress meter
On Windows systems the timer may not be accurate enough to measure
the time between init() and done(). Avoid computing statistics with
a diff time of 0 in such cases.

Fixes #2230.
2021-03-21 16:47:22 +01:00
marc tobias
87d5883ddb nominatim -h was priting wrong text for lookup and details 2021-03-21 16:06:41 +01:00
AntoJvlt
d5acade4db Deleted specialphrases.php and phrase_settings.php 2021-03-20 19:48:05 +01:00
AntoJvlt
9d1c23e4f5 Updated specialphrases_testdb.sql 2021-03-20 19:17:03 +01:00
AntoJvlt
17cb59efbd Ported functions for the import of special phrases from php to python.
- the command is now --import-special-phrases
- the output is not an sql file anymore, data are directly imported to the database.
- the little part on the documentation (section data import) has been modified.
2021-03-20 19:11:50 +01:00
Sarah Hoffmann
118befd7d7 bdd tests: make indexing less verbose
Do not print progress info for indexing when there is an error
in the BDD tests.
2021-03-20 10:39:29 +01:00
Sarah Hoffmann
0d9fe6e49c Merge pull request #2219 from lonvia/bdd-test-remove-php
BDD tests: run all setup via nominatim Python library
2021-03-17 11:40:34 +01:00
Sarah Hoffmann
ebae3553e0 bdd: run all setup via nominatim Python library
Drops all calls to PHP utility functions. nominatim cli functions
are used where possible, to stay as close to the final code as
possible with the tests.

By removing the PHP calls, the test code now only uses osm2pgsql and
the database module from the build directory.
2021-03-16 22:20:41 +01:00
Sarah Hoffmann
d3ff831b8a Merge pull request #2216 from lonvia/fix-reverse-interpolation
Reverse: do not prefer interpolations over closer housenumbers
2021-03-15 14:08:54 +01:00
Sarah Hoffmann
4d7c5ec089 reverse: do not prefer interpolations over closer housenumbers
Always look up the closest housenumber before looking up
interpolations. This ensures that closer housenumbers are
preferred over interpolations.

Fixes #2214.
2021-03-15 10:50:04 +01:00
Sarah Hoffmann
81a6b746b8 Merge pull request #2212 from darkshredder/country-name
Ported createCountryNames() to python and Added tests
2021-03-15 09:36:06 +01:00
Darkshredder
f356a75a24 Add setup.php 2021-03-14 15:02:30 +05:30
Sarah Hoffmann
7212fa8630 fix template variable name 2021-03-13 12:05:53 +01:00
Sarah Hoffmann
6cabc44841 Merge pull request #2213 from lonvia/tweak-search-weights
Some more tweaking of the ranking of search interpretations
2021-03-12 15:47:36 +01:00
Darkshredder
b108bd1c1e Linting fix 2021-03-12 18:28:47 +05:30
Darkshredder
077a8c1f95 refactored tests and made changes to code for easy readibility 2021-03-12 18:23:20 +05:30
Darkshredder
7a874d5b97 Ported createCountryNames() to python and added tests 2021-03-12 10:28:41 +05:30
Sarah Hoffmann
9086a794a1 Merge pull request #2204 from darkshredder/tiger-data
Ported tiger-data-import to Python and Added Tarball Support
2021-03-11 22:48:38 +01:00
Sarah Hoffmann
6dd2b9c2ec do not mix partial names with other words
As soon as a housenumber, postcode, etc. appear, the name term
must obviously be closed and no further partial terms can be
appended.
2021-03-11 22:44:49 +01:00
Sarah Hoffmann
3fbe4511f9 make linter happy 2021-03-11 21:14:23 +01:00
Sarah Hoffmann
3933fc3ad3 avoid multi-term partials in names
Names are either full words or single-word partial names.
Searching for multi-word partials yields exactly the same
result as with full words.
2021-03-11 20:42:37 +01:00
Sarah Hoffmann
00b05e2394 higher penalty for special searches
Adds a general higher penalty for special search term and an
additional one if the term is anywhere but the beginning or the
end. Also housenumbers and special searches together are less
likely.
2021-03-11 20:37:51 +01:00
Sarah Hoffmann
d5e8c5e975 do not mix partial and full name terms
If NameNonSearch already contains a partial term, then a
full term must not be added to the Name list anymore.
2021-03-11 20:22:54 +01:00
Sarah Hoffmann
478dfb0639 add one-rank penalty for using partial search
Ensures that full matches are preferred over partial ones even when
the full word consists of only one term.
2021-03-11 17:52:44 +01:00
Sarah Hoffmann
f498e40208 fix result splitting for last search group
When we are in the final iteration of the search groups, it is not
possible to further delay the results. Unconditionally use the
results with the best rank instead.
2021-03-11 17:14:46 +01:00
Sarah Hoffmann
182f5f5d7b give preference to full words in address, too
Full word terms are already preferred for the name part. Adding
only one-word partials to the address, makes it impossible to
give a similar preference for the address part. Each term adds
a rank penalty. The problem here is that we interpret the query
forwards and backwards. Having different penalty systems for
name and address means that the same term ends up with different
penalties and that often leads to interpretations of the wrong
direction being in the way.
2021-03-11 15:03:36 +01:00
Darkshredder
e5719de657 Added fixture for sql_preprocessor and fixed some issues 2021-03-11 15:39:17 +05:30
Darkshredder
8486a83cf5 Added test for tarfile 2021-03-10 18:14:17 +05:30
Darkshredder
ccfad57fca Added test and removed runlegacyscript 2021-03-10 17:18:12 +05:30
Darkshredder
64128b699a fixed linting, refactored threaded sql handling and removed importTigerData() function 2021-03-10 13:28:29 +05:30
Darkshredder
4080fbb95c Test fixes 2021-03-09 01:00:56 +05:30
Darkshredder
14ec83c886 Linting fixes 2021-03-08 23:10:49 +05:30
Darkshredder
122c4618b9 Linting fixes 2021-03-08 22:59:51 +05:30
Darkshredder
2af82975cd Ported tiger-data-import to python and Added Tarball Support 2021-03-08 21:57:56 +05:30
Sarah Hoffmann
35f4695b67 Merge pull request #2200 from lonvia/migrations-for-current-version
Introduce a command for database migration
2021-03-08 10:14:03 +01:00
Sarah Hoffmann
3c9e09545e documentation for new migration command 2021-03-06 16:38:37 +01:00
Sarah Hoffmann
764a41b973 automatic migration from 3.6 release
Adds a 'admin --migrate' command that checks for the current
database version and runs any necessary migrations. Also
has migrations going back to 3.6.
2021-03-06 16:36:57 +01:00
Sarah Hoffmann
9d103503f7 Merge pull request #2197 from lonvia/use-jinja-for-sql-preprocessing
Use jinja2 for SQL preprocessing
2021-03-04 16:36:18 +01:00
Sarah Hoffmann
09f4d767e4 port index creation to python
Also switches to jinja-based preprocessing, which allows to
simplify the SQL files. Use 'if not exists' where possible
so that the step can be rerun to fix missing indexes.
2021-03-04 11:11:47 +01:00
Sarah Hoffmann
dd301cf5ac indexer: ANALYSE must be run outside transactions 2021-03-04 11:06:33 +01:00
Sarah Hoffmann
eacabb0e96 move table creation to jinja-based preprocessing 2021-03-03 22:07:51 +01:00
Sarah Hoffmann
6cda021d9b add new jinja2 requirement 2021-03-03 17:51:08 +01:00
Sarah Hoffmann
d2bd6aa78d introduce jinja2 for preprocessing SQL
Replaces various hand-crafted replacements of varying format with
a single Jinja2 templating mechanism. Allows full access to
configuration if necessary.
2021-03-03 17:51:08 +01:00
Sarah Hoffmann
6b306f30b6 Merge pull request #2194 from grischard/patch-1
Fix typo in .github/actions/build-nominatim/action.yml
2021-03-03 11:34:12 +01:00
Guillaume Rischard
c48fd18344 Update action.yml 2021-03-03 11:20:21 +01:00
Sarah Hoffmann
8ea7e04363 Merge pull request #2192 from lonvia/database-versioning
Introduce database versioning
2021-03-02 15:57:46 +01:00
Sarah Hoffmann
32c2d2b248 document new status fields 2021-03-01 22:21:37 +01:00
Sarah Hoffmann
111cca8c9a return database version with status API 2021-03-01 22:17:16 +01:00
Sarah Hoffmann
7ae9c3a9f0 add database_version setting to tests 2021-03-01 21:49:33 +01:00
Sarah Hoffmann
bf4320a7d6 do not depend on cmdline parameter for creating partition tables
The partition numbers in use only depend on the entries in search_name.
2021-03-01 21:28:39 +01:00
Sarah Hoffmann
3a0a4b9175 save software version in the database
The version represents the software version that was used to
import the data.
2021-03-01 20:35:15 +01:00
Sarah Hoffmann
4faefe156c report software version of status call 2021-03-01 16:47:19 +01:00
Sarah Hoffmann
86273f5e2a introduce database patch level for version
This will be needed later for automatic migrations.
2021-03-01 16:46:19 +01:00
Sarah Hoffmann
b4f64aa770 make sure that calls to PHP legacy scripts are fatal on error 2021-03-01 16:10:45 +01:00
Sarah Hoffmann
976c5e9121 introduce table for in-database properties
Adds a simple table where settings for the database can be
saved. This is useful for state that must not change after
import.
2021-03-01 16:09:17 +01:00
Sarah Hoffmann
db663dd92f remove unused import 2021-03-01 09:26:08 +01:00
Sarah Hoffmann
90a5d23016 use tmp_path fixture in config tests 2021-03-01 09:24:04 +01:00
Sarah Hoffmann
99e35d256a fix typo 2021-03-01 09:07:49 +01:00
Sarah Hoffmann
e14e7c6235 Merge pull request #2186 from lonvia/port-import-to-python
Move setup procedure to Python
2021-02-27 12:09:23 +01:00
Sarah Hoffmann
b46adbad22 make sure psql always finishes
If an execption is raised by other means, we still have to close
the stdin pipe to psql to make sure that it exits and releases its
connection to the database.
2021-02-27 10:24:40 +01:00
Sarah Hoffmann
afabbeb546 older versions of Postgresql need explicit return type 2021-02-27 09:46:42 +01:00
Sarah Hoffmann
d14a3df10f do not truncate search_name in reverse-only mode 2021-02-27 09:46:42 +01:00
Sarah Hoffmann
9feb84e426 actions: add psutil dependency 2021-02-26 16:50:09 +01:00
Sarah Hoffmann
c7f40e3cee fix verbose flag for PHP wrapper scripts
The flag must come after the command.
2021-02-26 16:49:32 +01:00
Sarah Hoffmann
dd03aeb966 bdd: use python library where possible
Replace calls to PHP scripts with direct calls into the
nominatim Python library where possible. This speed up
tests quite a bit.
2021-02-26 16:14:29 +01:00
Sarah Hoffmann
15b5906790 move setup function to python
There are still back-calls to PHP for some of the sub-steps.
These needs some larger refactoring to be moved to Python.
2021-02-26 15:02:39 +01:00
Sarah Hoffmann
3ee8d9fa75 properly close connections of indexer after use 2021-02-26 12:10:54 +01:00
Sarah Hoffmann
57db5819ef prot load-data function to python 2021-02-25 21:32:40 +01:00
Sarah Hoffmann
3c186f8030 add a function for the intial indexing run
Also moves postcodes to fully parallel indexing.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
db5e78c879 remove unused partitionfunction function 2021-02-25 18:42:54 +01:00
Sarah Hoffmann
c7fd0a7af4 port wikipedia importance functions to python 2021-02-25 18:42:54 +01:00
Sarah Hoffmann
32683f73c7 move import-data option to native python
This adds a new dependecy to the Python psutil package.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
7222235579 introduce custom object for cmdline arguments
Allows to define special functions over the arguments.

Also splits CLI tests in two files as they have become too many.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
f6e894a53a port database setup function to python
Hide the former PHP functions in a transition command until
they are removed.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
b93ec2522e use psql for executing sql files
This allows to run larger files without needing to keep
them in memory.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
af7226393a add function to set up libpq environment
Instead of parsing the DSN for each external libpq program we
are going to execute, provide a function that feeds them all
necessary parameters through the environment.

osm2pgsql is the first user.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
e520613362 convert connect() into a context manager 2021-02-25 18:42:54 +01:00
Sarah Hoffmann
204fe20b4b Merge pull request #2185 from lonvia/fix-deadlock-handling-for-psycopg27
Improve deadlock detection for various versions of psycopg2
2021-02-25 18:39:40 +01:00
Sarah Hoffmann
a1f0fc1a10 improve deadlock detection for various versions of psycopg2
Psycopg2 has changed the kind of exception that is emitted on
deadlocks between versions 2.7 and 2.8. The code was already
trying to catch both kind of errors but because the
psycopg2.errors package is unknown in 2.7 and below, the
code would throw an exception on anything but a deadlock error.

This commit wraps the deadlock handling into a context manager
to avoid code duplication and uses module imports to detect if
the new error codes are available.

Also sets the required psycopg2 version to 2.7 or bigger as
versions below are difficult to test.
2021-02-25 18:11:16 +01:00
Sarah Hoffmann
68c3862270 Merge pull request #2182 from lonvia/change-error-for-details
Return 404 for details when no object is found in database
2021-02-23 09:09:35 +01:00
Sarah Hoffmann
5b7483ada5 return 404 for details when no bject is found in database
Fixes #2157.
2021-02-22 16:28:29 +01:00
Sarah Hoffmann
72b01148d2 Merge pull request #2181 from lonvia/port-more-tool-functions-to-python
Port more tool functions to python
2021-02-22 16:11:21 +01:00
Sarah Hoffmann
971df231b0 avoid os.environ as default valie 2021-02-19 19:29:57 +01:00
Sarah Hoffmann
4b32cbe518 fix return code for check database run with 'not applicable' 2021-02-19 18:32:00 +01:00
Sarah Hoffmann
f08078ccca bdd tests: directly call python code for setup-website 2021-02-19 18:20:55 +01:00
Sarah Hoffmann
389138abfe port setup-website to python 2021-02-19 17:51:06 +01:00
Sarah Hoffmann
a0ae4945cd add unit tests for new check_database code 2021-02-18 20:36:11 +01:00
Sarah Hoffmann
b169e4c88c port check-database function to python
This change also adapts the hints to use the nominatim tool.
Slightly changed checks, so that they are just as effective on
a frozen database.
2021-02-18 17:32:30 +01:00
Sarah Hoffmann
a60c34bded use a frozen DB for API tests
This way we also test that dropping does the right thing.
2021-02-17 22:35:27 +01:00
Sarah Hoffmann
153dbb71b8 remove unused code 2021-02-17 22:25:23 +01:00
Sarah Hoffmann
101a1f895d port freeze function to python 2021-02-17 21:43:15 +01:00
Sarah Hoffmann
bd27310c68 Merge pull request #2173 from lonvia/penality-for-housenumberless-places
Increase penalty for places without housenumber
2021-02-17 17:52:59 +01:00
Sarah Hoffmann
42ecd535b3 Merge pull request #2174 from lonvia/disable-jit-for-osm2pgsql-again
Disable JIT and parallel execution for osm2pgsql updates again
2021-02-16 21:32:57 +01:00
Sarah Hoffmann
c9838a02ce disable JIT and parallel execution for osm2pgsql updates again
The gazetteer output doesn't disable these functions when
writing to the place table but the triggers may contain
operations that cause misplanning for the query planner.
2021-02-16 18:23:47 +01:00
Sarah Hoffmann
7ebcf602ac add simple test for result splitting with multiple ranks 2021-02-16 17:59:12 +01:00
Sarah Hoffmann
8eb85f1340 increase penalty for places without housenumber
Results where the housenumber was dropped are an unlikely result
when they refer to something other than a street. Therefore
increase their result rank so that other matches are tried first
before choosing them as a result.

Improves #2167.
2021-02-16 17:47:06 +01:00
Sarah Hoffmann
2a8e3741fa Merge pull request #2166 from mtmail/tiger-2020
documentation: 2020 TIGER data got released
2021-02-16 14:45:14 +01:00
Sarah Hoffmann
684378722c Merge pull request #2171 from lonvia/update-vagrant-scripts-for-make-install
Update vagrant scripts for make install
2021-02-16 14:42:38 +01:00
Sarah Hoffmann
286a686f88 switch vagrant scripts to make install 2021-02-16 12:04:34 +01:00
Sarah Hoffmann
7360e6c5df use file copy on older cmake to install osm2pgsql
Fixes #2170.
2021-02-16 11:06:14 +01:00
Sarah Hoffmann
fbe7be760b ignore failure to get replication date 2021-02-14 12:17:30 +01:00
marc tobias
a3ce89aeff documentation: 2020 TIGER data got released 2021-02-12 23:57:12 +01:00
Sarah Hoffmann
6a7e0d652b Merge pull request #2164 from lonvia/add-make-install
Make Nominatim installable
2021-02-12 10:32:26 +01:00
Sarah Hoffmann
7cc4c53adb always return 0 for updates unless there is an error
This is more in line with previous behavioru than returning
a status code when no updates are available.
2021-02-11 10:33:49 +01:00
Sarah Hoffmann
24b13a7a87 docs: adapt check-database command 2021-02-10 21:55:04 +01:00
Sarah Hoffmann
b6c2dbf69c actions: remove install directories before import
This ensures that any dangling references to the build
or source directory are caught by the CI.
2021-02-10 17:59:52 +01:00
Sarah Hoffmann
0e0e9a6809 need test database for analysing cli test 2021-02-10 16:19:51 +01:00
Sarah Hoffmann
ed60154552 actions: test import with installed version of Nominatim 2021-02-10 16:17:52 +01:00
Sarah Hoffmann
85589cf7dc add 'make install' to installation instructions 2021-02-10 11:15:21 +01:00
Sarah Hoffmann
99dcd10d3f test for existance of country grid in cmake already
Given that the file potentially gets installed, it needs to be
present during build time already. Checking during the import
is too late.
2021-02-10 10:40:36 +01:00
Sarah Hoffmann
745ae02f47 make installation targets conditional to what is built 2021-02-10 10:04:07 +01:00
Sarah Hoffmann
b6bd11f292 add make install target
Installation includes PHP andPython libraries, settings, the basic
country data, the postgresql module and our custom version of
osm2pgsql. The latter is installed in our private library directory
so that it does not get in the way of a potentially installed
osm2pgsql from the distribution.
2021-02-09 21:04:42 +01:00
Sarah Hoffmann
c60a0784ea adapt unit tests to new directory structure 2021-02-09 20:13:00 +01:00
Sarah Hoffmann
3cb6f3e460 use DataDir constant for data only
So far the data directory constant has pointed to the source
directory to be usable with different subdirectories. Now only
the data subdirectory itself is being used with the constant,
so point to the directory directly.
2021-02-09 20:04:08 +01:00
Sarah Hoffmann
de37dc9300 forgot to replace one occurence of sql_dir 2021-02-09 19:32:05 +01:00
Sarah Hoffmann
8ffd7d9243 remove unused BINDIR constant 2021-02-09 19:30:31 +01:00
Sarah Hoffmann
298ed11261 introduce constant for configuration directory
This replaces {data_dir}/settings throughout the code, so that
the configuration may be placed somewhere else in the directory
structure (e.g. in /etc).
2021-02-09 18:45:45 +01:00
Sarah Hoffmann
b9517c99ae rename sql directory to lib-sql
Also introduces a separate constant for the sql directory, so that
it can be put separately from the rest of the data if required.
2021-02-09 15:26:56 +01:00
Sarah Hoffmann
db3ced17bb rename lib to lib-php 2021-02-09 11:52:07 +01:00
Sarah Hoffmann
248b4cddab update osm2pgsql (disable install rule) 2021-02-09 09:48:50 +01:00
Sarah Hoffmann
d81e152804 integrate analyse of indexing into nominatim tool 2021-02-08 22:22:49 +01:00
Sarah Hoffmann
0cbf98c020 consolidate warm and db-check into single admin command 2021-02-08 21:05:06 +01:00
Sarah Hoffmann
195f9f5ef3 split cli.py by subcommands
Reduces file size below 1000 lines.
2021-02-08 17:23:05 +01:00
Sarah Hoffmann
a759c5b75b move website into php library directory 2021-02-08 12:00:34 +01:00
Sarah Hoffmann
7dfe645b5f move postcode table setup to sql/
Also moves the call to the setup from the setup-db
step to the calculate-postcodes step. The tables also need
no longer be accessible by the webservice.
2021-02-08 11:53:01 +01:00
Sarah Hoffmann
ca3283cbaa remove unused SQL script 2021-02-08 11:28:24 +01:00
Sarah Hoffmann
861e67dfe8 fix off-by-one error in replication download 2021-02-04 17:04:04 +01:00
Sarah Hoffmann
82ef02cd1a Merge pull request #2161 from lonvia/timeout-for-replication
Reintroduce timeout for replication file download
2021-02-04 16:52:24 +01:00
Sarah Hoffmann
948217d5e9 reintroduce timeout for replication file download
This ports the --socket-timeout parameter from
pyosmium-get-changes which ensures that the update
process eventually times out on hanging network connections.
2021-02-04 11:47:11 +01:00
Sarah Hoffmann
6cc06828db Merge pull request #2160 from lonvia/introduce-project-dir
Officially introduce and recommend use of a project directory
2021-02-04 09:52:59 +01:00
Sarah Hoffmann
0b2abfb115 replace make serve with nominatim serve command
With the website directory now tied to the project directory instead
of the build directory, it is no longer possible to use make for
running the web server.
2021-02-03 16:34:31 +01:00
Sarah Hoffmann
b2f8fb6201 add migration info for status table 2021-02-03 14:13:09 +01:00
Sarah Hoffmann
e2329c03fe Revert "increase splitting for large geometries"
This reverts commit 559fe513fa.

Increasing the splitting results in geometries where with rounding
issues at the split points, so that contain operations do not
work as expected anymore.

Fixes #2137.
2021-02-03 10:23:38 +01:00
Sarah Hoffmann
9bca670b4e adapt quick start instructions in README to project dir 2021-02-03 10:17:22 +01:00
Sarah Hoffmann
cb06d1f4ca do not overwrite custom set module paths
Given that the module is now copied to the project directory
when no module path is set, we need the information that the
module path is empty. Therefore hand in the default module path
in a separate variable.
2021-02-02 18:31:25 +01:00
Sarah Hoffmann
36447c488a print project directory before running any command 2021-02-02 11:19:31 +01:00
Sarah Hoffmann
69092030cd make phpcs happy 2021-02-02 11:15:56 +01:00
Sarah Hoffmann
109aa9c428 actions: switch to using separate project dir
Also fixes reverse-only import which not run at all.
2021-02-02 11:03:09 +01:00
Sarah Hoffmann
1d97816c53 docs: add hint about putting the nominatim tool into the PATH 2021-02-02 10:56:40 +01:00
Sarah Hoffmann
7591c4fb42 copy database module on install
When no explicity database module is configured, then the
module is now copied into the project directory and used from
there. This means that Nominatim can be updated to a new
version of the module while existing installation keep their
version of normalisation.
2021-02-02 10:56:40 +01:00
Sarah Hoffmann
60cbeb165e hand in absolute path to nominatim tool to php scripts 2021-02-02 10:56:40 +01:00
Sarah Hoffmann
bddfc109f8 refer to new nominatim tool in configuration comments 2021-02-02 10:56:40 +01:00
Sarah Hoffmann
b05c379b39 change the default location for external data to project dir 2021-02-02 10:56:40 +01:00
Sarah Hoffmann
7ba5283fe8 actions: revert to reletive paths for caching 2021-02-02 10:37:18 +01:00
Sarah Hoffmann
98fe5af07d actions: remove setting custom .env
It only set the pyosmium-get-changes binary which is no longer
needed.
2021-02-02 10:35:30 +01:00
Sarah Hoffmann
59cb1d6c27 remove pyosmium-get-changes detection from cmake
pyosmium-get-changes is not longer used.
2021-02-02 10:33:15 +01:00
Sarah Hoffmann
0ad1b28497 Merge pull request #2155 from lonvia/port-regresh-to-python
Port replication and part of the refrsh function to native Python
2021-02-01 11:50:05 +01:00
Sarah Hoffmann
5f63d4ca1f print nice summary after updates 2021-02-01 10:34:31 +01:00
Sarah Hoffmann
90aaab77fc fix linting issues 2021-01-30 16:42:25 +01:00
Sarah Hoffmann
7158433cd3 disable warning about non-toplevel import
They are needed here so nominatim can be run when osmium
is not installed. Everything except replication will work fine.
2021-01-30 16:29:28 +01:00
Sarah Hoffmann
e629a175ed introduce custom UsageError
This is a exception to be thrown when the error occures because
of bad user data. We don't want to print a full stack trace in
these cases but just tell the user what went wrong.
2021-01-30 16:20:10 +01:00
Sarah Hoffmann
45ea73913f remove setting for PYOSMIUM_BINARY
pyosmium is now called as a library from the python code,
so that pyosmium-get-changes is no longer needed.
2021-01-30 15:55:04 +01:00
Sarah Hoffmann
01e0fd7e13 whitelist pyosmium for pylint 2021-01-30 15:52:49 +01:00
Sarah Hoffmann
4cb6dc01f3 port replication update function to python 2021-01-30 15:50:34 +01:00
Sarah Hoffmann
8f0885f6cb port check-for-update function to python 2021-01-28 14:50:14 +01:00
Sarah Hoffmann
beb0fa0727 Merge pull request #2153 from rizkyarlin/patch-1
fix indentation
2021-01-28 09:06:33 +01:00
Muh. Rizky Eka Arlin
436cb9229b fix indentation 2021-01-28 14:21:54 +08:00
Sarah Hoffmann
d78f0ba804 port replication initialisation to Python 2021-01-26 22:50:54 +01:00
Sarah Hoffmann
5b46fcad8e convert functon creation to python
The new functions always creates normal and partitioned functions.
Also adds specialised connection and cursor classes for adding
frequently used helper functions.
2021-01-26 22:50:54 +01:00
Sarah Hoffmann
94fa7162be port address level computation to Python
Also adds simple tests for correct table creation.
2021-01-26 22:50:54 +01:00
Sarah Hoffmann
e6c2842b66 move update code for postcode and word count to Python
Adds also tests for the new function to execute a SQL script.
2021-01-26 22:50:54 +01:00
Sarah Hoffmann
e6d9485c4a cli: import python modules for commands on demand
Given that only one command will be executed in the end, it is
not necessary to import what amounts to the whole library. This
becomes in particular important for update functions that have
a dependency on pyosmium. The dependency can remain optional for
people not using updates.
2021-01-26 22:50:54 +01:00
Sarah Hoffmann
30cd2f2280 remove API comparison util
This is outdated and unmaintained. There are tools out there
which can do this better. Try, for example
https://github.com/radarlabs/api-diff
2021-01-26 22:46:35 +01:00
Sarah Hoffmann
2c909c1f0c Merge pull request #2147 from lonvia/tests-for-python-code
Add basic set of tests for Python code
2021-01-21 10:07:50 +01:00
Sarah Hoffmann
063a4cb403 cli indexer tests need a fake database
The Indexer constructor opens a connection to the given database.
2021-01-20 21:30:27 +01:00
Sarah Hoffmann
42ec67f63c add more tests for CLI parameter parser 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
8c02786820 add tests for indexer 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
c26f323bf5 add simple tests for CLI parsing 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
041ae67fd9 optionally hand in command line arguments to CLI functions
Allows easier testing.
2021-01-20 21:30:27 +01:00
Sarah Hoffmann
bfa6580ad5 use pytest mocking functions for manipulating os.environ 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
52b76d1d01 add tests for Python exec_utils 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
a3767f9142 Merge pull request #2146 from mtmail/two-typos
correct parameter name in query CLI
2021-01-20 21:29:49 +01:00
marc tobias
f62c784102 correct parameter name in query CLI 2021-01-20 21:09:41 +01:00
Sarah Hoffmann
ffc221a87f Merge pull request #2145 from lonvia/cli-query-functions
Add interface to search via command line tool
2021-01-20 09:00:45 +01:00
Sarah Hoffmann
8cf54a1317 add API functions to nominatim tool 2021-01-19 19:38:46 +01:00
Sarah Hoffmann
77e287f669 rename nominatim.admin to nominatim.tools 2021-01-19 19:38:46 +01:00
Sarah Hoffmann
5d95a72758 probe for php_cgi in cmake to be used for querying 2021-01-19 19:38:46 +01:00
Sarah Hoffmann
3475e1dfd6 Merge pull request #2143 from lonvia/integrate-indexer-into-nominatim-tool
Integrate indexer into nominatim tool
2021-01-19 08:42:22 +01:00
Sarah Hoffmann
504922ffbe remove old nominatim.py in favour of 'nominatim index'
The PHP scripts need to know the position of the nominatim
tool in order to call it. This is handed in as environment
variable, so it can be set by the Python script.
2021-01-18 15:43:27 +01:00
Sarah Hoffmann
c77877a934 implementaion of 'nominatim index' 2021-01-18 15:43:27 +01:00
Sarah Hoffmann
27977411e9 move indexing function into its own Python module
This makes it mow a standard function of our new Python
library instead of a stand-alone program.
2021-01-18 15:43:27 +01:00
Sarah Hoffmann
b79c79fa73 add function to get a DSN for psycopg
Converts the PHP DSN syntax into psycopg syntax when necessary.
2021-01-18 15:43:27 +01:00
Sarah Hoffmann
cd0001b55a Merge pull request #2142 from lonvia/update-bdd-api-tests
Update BDD API tests
2021-01-18 15:40:50 +01:00
Sarah Hoffmann
340e7f7210 bdd: complete coverage for API tests
Also removes some functions that are no longer used and
fixes debug output where the tests found an issue.
2021-01-17 16:12:06 +01:00
Sarah Hoffmann
f9c43137c9 remove unused output formatting functions 2021-01-16 17:39:49 +01:00
Sarah Hoffmann
171ed36e36 bdd: remove duplicated tests 2021-01-16 16:57:28 +01:00
Sarah Hoffmann
c6c907d451 bdd: clean up and extend API tests for details
- remove duplicates created by replacing HTML tests
  with JSON tests
- add tests for newer functions for returning geometries
  and hierarchies
2021-01-16 12:04:13 +01:00
Sarah Hoffmann
19ab038724 collect coverage for /website directory as well 2021-01-15 20:27:14 +01:00
Sarah Hoffmann
1c26fd489d Merge pull request #2139 from lonvia/add-pytest
Introduce unit testing for Python code
2021-01-15 17:37:36 +01:00
Sarah Hoffmann
e8cfba1b10 pytest may also be installed as py-test[-3] 2021-01-15 17:22:31 +01:00
Sarah Hoffmann
496a3d29db enable pytest testing in CI 2021-01-15 15:33:53 +01:00
Sarah Hoffmann
438ed431dd add documentation for new pytest tests 2021-01-15 15:18:45 +01:00
Sarah Hoffmann
f1f0032758 add pytest as a test goal in cmake 2021-01-15 15:09:36 +01:00
Sarah Hoffmann
eb3b789855 add initial pytest test for Configuration 2021-01-15 14:42:03 +01:00
Sarah Hoffmann
c077050855 Merge pull request #2136 from lonvia/introduce-pylint
Introduce pylint for code style checking for Python.
2021-01-15 14:39:26 +01:00
Sarah Hoffmann
d9998bfab3 pylint may be available as pylint3 or pylint 2021-01-15 10:52:25 +01:00
Sarah Hoffmann
7cf9d459d6 use check parameter of subprocess.run
...instead of checking on our own.

Also increase required version of Python to 3.5 because of
subprocess.run().
2021-01-15 10:43:04 +01:00
Sarah Hoffmann
de724aa576 add pylint to list of required linting tools
With pylint being run in the CI, passing it is required now.
2021-01-15 10:43:04 +01:00
Sarah Hoffmann
8e53f63036 fix errors reported by pylint 2021-01-15 08:57:00 +01:00
Sarah Hoffmann
565356613a Merge pull request #2135 from lonvia/python-frontend
Introduce new 'nominatim' all-in-one command-line tool
2021-01-15 08:56:07 +01:00
Sarah Hoffmann
eda0900c8e fix typo 2021-01-14 20:30:27 +01:00
Sarah Hoffmann
3dd67083b2 replace Symfony dotenv dependency with Python dotenv 2021-01-14 18:31:18 +01:00
Sarah Hoffmann
2f73bb3643 bdd: directly call utility scripts in lib
This removes the dependency on php-symfony-dotenv for the tests.
2021-01-14 18:19:22 +01:00
Sarah Hoffmann
9348fc5e15 move dotenv parsing to installed PHP scripts
This means that the php-symfony-dotenv library is now only needed
when using the legacy scripts. This includes the BDD tests which
currently still rely on the PHP utils.
2021-01-14 18:06:22 +01:00
Sarah Hoffmann
97710ee9d1 use cli tool for github CI 2021-01-14 16:35:01 +01:00
Sarah Hoffmann
9619cb3fe5 forward cli tool return value as exit code 2021-01-14 14:36:41 +01:00
Sarah Hoffmann
1c1e951826 adapt documentation to new nominatim cli tool 2021-01-14 12:12:38 +01:00
Sarah Hoffmann
88c57b4dc8 maller command execution fixes 2021-01-14 12:03:49 +01:00
Sarah Hoffmann
ba13cfd9ff make sure that environment variables have highest prio 2021-01-14 11:12:45 +01:00
Sarah Hoffmann
1ff8751caa liniting of new python code 2021-01-14 10:19:21 +01:00
Sarah Hoffmann
98dbc84836 add wrapper calls for all nominatim tool functions 2021-01-14 09:37:47 +01:00
Sarah Hoffmann
0847964a27 avoid accessing constants when getting data from env
When a setting can be read from the environment variable, avoid
accessing the internal defaults. This way the scripts can be
accessed directly in \lib as long as the environment is set up
correctly with full defaults.
2021-01-14 09:37:04 +01:00
Sarah Hoffmann
bc09d7aedb fix access to environment variable 2021-01-14 09:29:43 +01:00
Sarah Hoffmann
04690ad8c4 implement warming in new cli tool
Adds infrastructure for calling the legacy PHP scripts. As the
CONST_* values cannot be set from the python script, hand the values
in via secret environment variables instead. These are all
temporary hacks for the transition phase to python code.
2021-01-13 18:25:15 +01:00
Sarah Hoffmann
ec636111ba warm.php needs constant setup for queries
Warming is done using the query classes and therefore the same
copy-over from dotenv settings to CONST_* parameters is needed
as for query.php.
2021-01-13 18:12:53 +01:00
Sarah Hoffmann
e467b956ff set CONST_LibDir directly from the source scripts
Now that the source scripts have been moved to \lib, they
can determine the position of the PHP library relative to
themselves.
2021-01-13 17:00:38 +01:00
Sarah Hoffmann
ff5a237200 move PHP utilities into the lib directory
These are not called directly as programs but used in a library
fashion by the installed utilities. So the library directory
is a better place.
2021-01-13 14:44:45 +01:00
Sarah Hoffmann
d6bcb7c8b7 consolidate cli interface to single tool 2021-01-13 10:11:58 +01:00
Sarah Hoffmann
57f5e6d898 create skeleton for new CLI tools 2021-01-12 22:21:20 +01:00
Sarah Hoffmann
612fd50612 add skeleton for new Nominatim executables 2021-01-12 10:17:28 +01:00
Sarah Hoffmann
a74e736283 Merge pull request #2132 from lonvia/reduce-api-testdb
Reduce BDD API test database to Liechtenstein
2021-01-11 10:42:22 +01:00
Sarah Hoffmann
86cd5ddd65 also run BDD API tests in CI 2021-01-09 17:58:06 +01:00
Sarah Hoffmann
812de0545d test can be run all in one go with make 2021-01-09 17:57:30 +01:00
Sarah Hoffmann
3bed5516da update documentation for new BDD API tests 2021-01-09 17:54:45 +01:00
Sarah Hoffmann
0495dbe756 bdd: add new API test data
Make all data necessary for API tests directly available in the
repository.
2021-01-09 17:01:33 +01:00
Sarah Hoffmann
5d656891ba bdd: convert API tests to smaller test db
Changes BDD API tests to restrict themselves to
Liechtenstein. One test moved to DB as no appropriate
data is available.
2021-01-09 16:59:46 +01:00
Sarah Hoffmann
74122dc965 bdd: improve assert output for API query checks
Adds wrapper function for checking address parts and
more explanation strings to asserts.
2021-01-09 16:58:37 +01:00
Sarah Hoffmann
ee18a511c6 bdd: import API test DB as part of step setup
In the future, the BDD tests will simply set up the required
test database themselves. Like with the template database, it
is not reimported when it already exists unless that is explicitly
forced.

Makes most of the API tests currently fail because they still
point to old test data.
2021-01-07 11:51:38 +01:00
Sarah Hoffmann
da20881096 Merge pull request #2129 from lonvia/cleanup-bdd-tests
Clean up Python support code for BDD tests
2021-01-07 09:10:40 +01:00
Sarah Hoffmann
aaabb46f20 add symphony dotenv to prerequisites list 2021-01-07 08:56:52 +01:00
Sarah Hoffmann
49142eb6e5 use relative dir for sources for phpunit 2021-01-07 08:55:15 +01:00
Sarah Hoffmann
73cbb6eb9a bdd: clean up DB ops steps
Adds comments and modernizes code.
2021-01-06 16:37:32 +01:00
Sarah Hoffmann
1f29475fa5 bdd: move column comparison in separate file
Introduces a new class DBRow that encapsulates the comparison
functions. This also is responsible for formatting more informative
assert messages. place and placex steps are unified.
2021-01-06 12:28:09 +01:00
Sarah Hoffmann
d586b95ff1 bdd: move nominitim id reader to separate file 2021-01-05 16:00:48 +01:00
Sarah Hoffmann
25557e5f14 bdd: factor out reindexing on updates 2021-01-05 15:17:46 +01:00
Sarah Hoffmann
197870e67a bdd: move place table inserter into separate file
Also simplifies usage by implementing a function that inserts
a complete table row.
2021-01-05 12:12:59 +01:00
Sarah Hoffmann
b8e39d2dde bdd: move scene setup to OSM data steps
The step has nothing to do with the database.
2021-01-05 11:42:28 +01:00
Sarah Hoffmann
5dfa76a610 bdd: switch to auto commit mode
Put the connection to the test database into auto-commit mode
and get rid of the explicit commits. Also use cursors always in
context managers and unify the two implementations that copy
data from the place table.
2021-01-05 11:42:28 +01:00
Sarah Hoffmann
58c471c627 bdd: remove class for lazy formatting
assert in combination with format() does the right thing and calls
the __str__() method only when an assertion hits.
2021-01-05 10:39:44 +01:00
Sarah Hoffmann
213bf7d19d bdd: rename db_ops steps
Now all files implementing steps are called steps_*.py.
2021-01-05 10:20:00 +01:00
Sarah Hoffmann
12ae8a4ed3 bdd: move output format computation into response 2021-01-05 10:17:59 +01:00
Sarah Hoffmann
8a93f8ed94 bdd: move Response classes in own file and simplify
Removes most of the duplicated parse functions, introduces
a common assert_field function with a more expressive error
message.
2021-01-05 10:03:47 +01:00
Sarah Hoffmann
2712c5f90e bdd: rename and clean up osm_data steps
Move common OPL creation code into a function and remove
unused imports.
2021-01-04 20:17:17 +01:00
Sarah Hoffmann
72587b08fa bdd: move external process execution in separate func 2021-01-04 19:58:59 +01:00
Sarah Hoffmann
faa85ded50 bdd: move NominatimEnvironment into separate file
Also cleans up and modernizes the code and adds documentation.
2021-01-04 17:54:51 +01:00
Sarah Hoffmann
14e5bc7a17 bdd: move grid generation code into geometry factory 2021-01-04 17:04:47 +01:00
Sarah Hoffmann
f727620859 bdd: move geoemtry creation into separate file
Also renames the OsmDataFactory in the more appropriate
GeometryFactory and modernizes code for python3.
2021-01-04 16:34:40 +01:00
Sarah Hoffmann
843d3a137c remove stale code for python2 2021-01-04 14:14:34 +01:00
Sarah Hoffmann
e4691005e2 Merge pull request #2125 from lonvia/independent-project-directory
Allow for truely independent project directory
2021-01-04 14:10:24 +01:00
Sarah Hoffmann
4aba70caee create a temporary project dir for tests
The project directory contains the website script as
configured through the test configuration. This means
that tests are now completely independet of any
configuration that may be contained in the build
directory.

Also removes the hack to inject additional settings via
a environment variable.
2021-01-04 11:39:45 +01:00
Sarah Hoffmann
5e989b9296 configure osm2pgsql and module location via cmake
The default location of osm2pgsql and the postgresql module
is decided at compile/installation time and is not necessarily
in the project directory.

With this change it is now possible to have a project directory
that is completely separate from the build directory.
2021-01-04 11:37:56 +01:00
Sarah Hoffmann
cba2d252c8 Merge pull request #2124 from lonvia/remove-nose
Remove nose dependency for tests
2021-01-03 21:04:59 +01:00
Sarah Hoffmann
2ecec19df0 remove nose requirement from documentation 2021-01-03 17:23:44 +01:00
Sarah Hoffmann
4ca7197826 replace nose assertions with simple asserts 2021-01-03 17:21:24 +01:00
Sarah Hoffmann
a8ec250993 Merge pull request #2119 from mtmail/check-import-finished-when-tables-droped
utils/check_import_finished: skip some checks when setup ran with --drop
2020-12-22 15:57:48 +01:00
Sarah Hoffmann
f3e0e401fd Merge pull request #2118 from mtmail/vagrant-ubuntu-dotenv
Vagrant ubuntu: install dotenv package
2020-12-22 15:54:48 +01:00
marc tobias
d60f89867b utils/check_import_finished: skip some checks when setup ran with --drop 2020-12-21 20:12:31 +01:00
marc tobias
b133f2bc4c Vagrant ubuntu: install dotenv package 2020-12-21 20:10:13 +01:00
Sarah Hoffmann
301fd7f7e8 Merge pull request #2115 from lonvia/use-dotenv
Switch configuration to dotenv
2020-12-21 11:33:38 +01:00
Sarah Hoffmann
45148c7078 switch documentation to describing dotenv 2020-12-20 12:09:27 +01:00
Sarah Hoffmann
3c75194448 adapt instructions for creating the test db to dotenv 2020-12-20 11:53:19 +01:00
Sarah Hoffmann
f218e20522 mark CentOS installation instructions as broken
Getting symfony-dotenv installed on CentOS is a major pain,
so just mark it broken instead.

Still sSwitch the config format to dotenv already.
2020-12-20 11:35:29 +01:00
Sarah Hoffmann
33b038ce6f tests: always create the config file
There is also one database test that uses the API functions.
2020-12-19 17:55:46 +01:00
Sarah Hoffmann
f62c65e9d9 adapt php tests to new directory constants 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
867baab3d1 make phpcs happy 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
63ad0cb498 github actions: need dotenv 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
433017b990 move creation of website scripts to setup script
Instead of creating the website wrapper scripts with cmake,
they are now created when --setup-website is called. The
setup of the configuration constants is directly embedded
into the scripts. This means we can get rid of the separate
settings-frontend.php. More importantly however, it means
that it is now possible to set up multiple website directories
from the same build directory.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
d97aed8741 adapt tests to new dotenv environment
DB tests now can simply set the environment to change configuration
variables. API tests still rely on a configuration file.

Also, query.php needs to set up the CONST_* variables to work with
the query scripts. That is a tiny bit messy and duplicates code
but this part will need to be reworked later.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
06d89e1d47 fix various typos 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
8676e45d88 remove old default settings 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
992d3faac8 switch all utils to initialising dotenv 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
0947b61808 switch remaining settings to dotenv format
CONST_Search_AreaPolygons and CONST_Search_ReversePlanForAll have
been removed completely.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
d43f30903c use explicit DSN for website scripts
Website scripts have no access to the dotenv variables, so use
the DSN constant instead when connecting to the database.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
15a1666f8a replace database settings with dotenv variant
As we can't refer to the project root dir in the module path, the
module path may now also be a relative directory which is then
taken as being relative to the project root path.

Moves the checkModulePresence() function into the Setup class, so
that it can work on the computed absolute module path.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
25bdd7c6d9 introduce dotenv parsing for setup.php
This adds the notion of a project directory. This is the directory
that holds all necessary files for one specific installation of
Nominatim. Dotenv looks for an .env file in this directory and
adds it to the global environment together with the defaults from
Nominatim's data directory.

Add's symfony's dotenv library as a new dependency.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
ac116980ac make HTTP proxy setup explicit
The setup relies on the project configuration which we want to
explicitly set up in later steps. Therefore proxy setup needs to
be done explicitly as well. There is the added bonus that the
setup is done only for the utils which try to call outside.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
b5480f6e36 reorganise path settings in config
CONST_BasePath is split into separate configuration variables
for binaries, libraries and data. These variables as well as
the installation path are now set in the executable directly and
no longer configurable via project settings.

This is the first step towards an installable software. The
executables should know per installation where to find their
necessary data to execute. Project configuration needs to be
restricted to settings that really concern the specific Nominatim
installation.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
17a8cc5e29 use /usr/bin/env for python script
Makes it easier to use the script with a virtualenv setup.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
aeeee0d5da Merge pull request #2112 from lonvia/fix-tests-for-php-8
work around failing CI tests
2020-12-18 14:25:50 +01:00
Sarah Hoffmann
de03a0f924 work around failing CI tests
Force use of phpunit7 to avoid an issue with different sort order.
2020-12-18 10:58:09 +01:00
Sarah Hoffmann
5528918d5d Issue templates: require postgres config modifications 2020-12-18 10:37:01 +01:00
Sarah Hoffmann
9e0d5cb669 Issue templates: more commenting of instructions 2020-12-16 08:41:20 +01:00
Sarah Hoffmann
11622b2863 Issue templates: put instructions into comments 2020-12-16 08:38:04 +01:00
Sarah Hoffmann
9610664fc5 prepare 3.6.0 release 2020-12-13 11:59:16 +01:00
Sarah Hoffmann
eed2e3f2a8 Merge pull request #2105 from lonvia/fix-use-of-records
Use a typed record for place info in get_addressdata
2020-12-13 09:48:21 +01:00
Sarah Hoffmann
1b62162e08 actions: run tests also on postgresql 9.5
This is the oldest version for which a postgis is available.
2020-12-12 20:55:54 +01:00
Sarah Hoffmann
5cf573c340 use a typed record for place info in get_addressdata
Older versions of Postgresql cannot handle an untyped record
for INTO.

Fixes #2100.
2020-12-12 14:46:34 +01:00
Sarah Hoffmann
76cd8bf258 Merge pull request #2102 from mtmail/typo-in-reverse-md
Reverse.md: fix two typos
2020-12-12 10:16:23 +01:00
marc tobias
31ba7e7cf0 Reverse.md: fix two typos 2020-12-12 00:30:26 +01:00
Sarah Hoffmann
a338ba695b Merge pull request #2101 from lonvia/update-documentation
Update documentation
2020-12-11 18:02:44 +01:00
Sarah Hoffmann
9f6e2de4ed fix typos 2020-12-11 17:08:13 +01:00
Sarah Hoffmann
1b3acc4f8f update import times 2020-12-11 16:53:38 +01:00
Sarah Hoffmann
56cf3b362e Merge pull request #2099 from lonvia/update-country-names
Dynamically update country names from OSM data
2020-12-10 17:56:32 +01:00
Sarah Hoffmann
b59d01fe85 update country names
Copies all name:xx country names that are in OSM as of today
into the country name fallback table.
2020-12-09 17:52:25 +01:00
Sarah Hoffmann
65d8770b28 update country_names from OSM data
Update names in the coutry_names table on the fly from incomming
OSM country data. Adding a small sanity check that the country
must be an OSM relation and within the area where we expect the
country to be.
2020-12-09 11:38:19 +01:00
Sarah Hoffmann
38582c3e52 Merge pull request #2096 from otbutz/patch-1
Add link to discussions when creating new issue
2020-12-09 09:55:59 +01:00
otbutz
5f6fe6cab7 Add link to discussions when creating new issue 2020-12-09 09:33:42 +01:00
Sarah Hoffmann
34f8e6ddf7 remove bug reporting hints in favour of issue templates 2020-12-08 17:47:38 +01:00
Sarah Hoffmann
603367dced updates to admin and develop documentation
Mostly minor updates in wording and resource consumption.
2020-12-08 17:47:38 +01:00
Sarah Hoffmann
eaee87e73d update API documentation
* remove traces of HTML output
* add details on artificial objects (see also #1671)
* add geometry output documentation for lookup
* deprecate query by ID via reverse endpoint
* remove /search/<query> query format, no longer supported
* explain better what reverse geocoding does
* lots of smaller fixes to wording
2020-12-08 17:47:38 +01:00
Sarah Hoffmann
ea844db847 do not classify housenumbers as rare
House numbers are highly redundant, so don't even attempt to
do it as a rare name search. Greatly improves speed of such
queries.
2020-12-08 17:25:15 +01:00
Sarah Hoffmann
3ca8cc0344 Merge pull request #2092 from lonvia/update-osm2pgsql
Update osm2pgsql to 1.4.0 release
2020-12-08 10:56:25 +01:00
Sarah Hoffmann
f67c06f128 update osm2pgsql to 1.4.0 release 2020-12-08 10:18:18 +01:00
Sarah Hoffmann
7889509856 Create issue templates 2020-12-06 22:24:45 +01:00
Sarah Hoffmann
67c995aef6 Merge pull request #2090 from lonvia/avoid-contains-operator
avoid contains operator for geometries
2020-12-03 09:39:50 +01:00
Sarah Hoffmann
e20defeebd avoid contains operator for geometries
Postgis keeps messing up use of index in some circumstances.
2020-12-02 22:18:27 +01:00
Sarah Hoffmann
cca646a19e Merge pull request #2087 from lonvia/only-one-link-per-node
Place nodes can only be linked once against boundaries
2020-12-02 22:16:56 +01:00
Sarah Hoffmann
ddc2b4b806 Merge pull request #2088 from lonvia/update-osm2pgsql
update osm2pgsql
2020-12-02 22:16:38 +01:00
Sarah Hoffmann
0334915067 update osm2pgsql
Needs now an explicit switch to avoid propagating changes from
nodes to ways to relations.
2020-12-02 18:27:18 +01:00
Sarah Hoffmann
987d60ccda place nodes can only be linked once against boundaries
If a place node is already linked against a boundary, it should not
be used for linking again. It is usually a sign of a mapping error,
when there are multiple boundary candidates. This change just avoids
inconsistent data in the database, it does not guarantee that the
linking is against the more correct boundary.
2020-12-02 15:31:02 +01:00
Sarah Hoffmann
7d520bf448 also add explicit cast for varchar 2020-12-01 22:15:51 +01:00
Sarah Hoffmann
dc53288e6b Merge pull request #2085 from lonvia/add-address-rank-to-xml-output
add address rank to XML output
2020-12-01 19:59:29 +01:00
Sarah Hoffmann
db02673b60 Merge pull request #2084 from lonvia/fix-term-count
fix use of term count in partial terms
2020-12-01 19:59:07 +01:00
Sarah Hoffmann
cf9b248f29 add address rank to XML output
The address rank is much more interesting than the search rank
these days because it tells something about the kind of object.

Reverse did have neither rank, so add both for consistency.
2020-12-01 17:54:53 +01:00
Sarah Hoffmann
df12954312 fix use of term count in partial terms
Term count for partial words is one less than the actual number
of words. Take that into account when adding to the search rank.

Fixes #2081.
2020-12-01 17:21:01 +01:00
Sarah Hoffmann
a9357b4dce Merge pull request #2082 from lonvia/compute-address-on-the-fly-II
Compute address for POIs on the fly
2020-12-01 16:41:31 +01:00
Sarah Hoffmann
63544db8f9 null entries need to be typed 2020-12-01 14:54:42 +01:00
Sarah Hoffmann
7295cad715 compute address parts for rank 30 objects on the fly
Rank 30 objects usually use the address parts of their parent.
When the parent has address parts that are areas but not marked
as isaddress, then the parent might go through multiple administrative
areas. In that case recheck if the right area has been choosen
for the object in question instead of relying on isaddress.
Note that we really only have to do the recomputation in the
case of 'isarea = True and isaddress = False' which hopefully
keeps the number of additional geometric operations we have to do
to a minimum.

There is one more special case to be taken into account here: a
street may go through two administrative areas and a house along
that street is placed in one of the area while the addr:* tags
says it belongs to the other. In that case we must not switch
the isaddress to the one it is situated. To avoid that recheck
the address names against the name of the ara. That is not perfect
but should cover most cases.

Fixes #328.
2020-12-01 11:58:25 +01:00
Sarah Hoffmann
ff85da0a31 cleanup get_addressdata
Save location data in a ROW instead of using separate varaibles
for each value.
2020-11-30 22:54:36 +01:00
Sarah Hoffmann
75b2d7ca99 Merge pull request #2080 from donalhunt/fix-Migration.md-typos
Migration.md: fix typos, improve style consistency and readability.
2020-11-30 16:21:35 +01:00
Donal Hunt
3c9eeb11fa Migration.md: fix typos, improve style consistency and readability. 2020-11-30 11:59:10 +00:00
Sarah Hoffmann
63bacaee2e Merge pull request #2079 from lonvia/improve-progress-logging
Improve progress logging during indexing
2020-11-30 11:42:08 +01:00
Sarah Hoffmann
5016eace34 improve progress logging during indexing
Wait for 2 seconds before logging the first progress, so that we
have numbers that are a bit more reliable statistically speaking.

Also provides an actual implementation for the log_interval
parameter and fixes some small style issues.
2020-11-30 10:59:29 +01:00
Sarah Hoffmann
2e5c8b5cd3 Merge pull request #2077 from lonvia/optimize-large-rank-0-areas
Restrict size of features that get a full address search
2020-11-26 14:40:54 +01:00
Sarah Hoffmann
bf0f81adcb Merge pull request #2076 from lonvia/search-name-index-migration
Docs: add migration for search_name_* tables
2020-11-26 12:01:38 +01:00
Sarah Hoffmann
2db751700e restrict size of features that get a full address search
It would be nice to always compute addresses for rank 0 objects
over the complete geometry, so that they can be found via all
the admin boundaries that they intersect. However, there are a
couple of extramely large boundaries in OSM (like timezones)
where this results in thousands of possible address candidates
that need to be checked. Fall back to getting the address of the
centroid for them.
2020-11-26 11:53:58 +01:00
Sarah Hoffmann
62bee4ed37 docs: add migration for search_name_* tables 2020-11-26 09:18:33 +01:00
Sarah Hoffmann
1f07d63dc5 Merge pull request #2075 from lonvia/filter-postcodes-from-location-area-large
Filter postcodes by search rank when adding to address list
2020-11-25 21:42:27 +01:00
Sarah Hoffmann
cc1af99dbd filter postcodes by search rank when adding to address list
The post codes are the last part that does not fit the new
address ranking scheme. In particular, the search rank is still
relevant for choosing if a postcode should be included into
the address terms. Filter out irrelevant postcodes in
getNearFeatures() already, to avoid having to check for
geometry relation.
2020-11-25 21:01:33 +01:00
Sarah Hoffmann
c5d98effc0 Merge pull request #2074 from lonvia/add-housenumber-to-unknown-places
Improve finding addresses that have their own search_name entry because of unknown addr:* parts
2020-11-25 16:57:09 +01:00
Sarah Hoffmann
b68b2ff6b8 Merge pull request #2073 from lonvia/multi-word-partial-terms-in-search-description
Improve handling of multi-word partials in SearchDescription
2020-11-25 16:57:00 +01:00
Sarah Hoffmann
57f0d55c2e make phpcs happy 2020-11-25 16:14:31 +01:00
Sarah Hoffmann
3cf763475f do not use artificial housenumbers as names
If they are artificial they cannot have a search_name entry.
2020-11-25 16:11:32 +01:00
Sarah Hoffmann
0f87da017f improve handling of multi-word partials in SearchDescription
Multi-word partial terms had an undue advantage over separate partial
terms because they only need to pay the penalty once. This changes
the behaviour by setting the penalty according to the number of
words in the token. This should get rid of search interpretations
with low chance of matching.

This also fixes handling of exact term matching. We now match against
all exact terms of the query, not just a couple of them collected
while building the interpretations.

Also adds a penalty to very short postcodes.
2020-11-25 12:07:04 +01:00
Sarah Hoffmann
22800d7d59 Search housenumbers with unknown address parts by housenumber term
House numbers need special handling because they may appear after
the street term. That means we canot just use them as the main name
for searches where the address has its own search term entries.
Doing this right now, we are able to find '40, Main St, Town' but not
'Main St 40, Town'.

This switches to using the housenumber token as the name term instead.
House number tokens can get special handling when building the search
query that covers the case where they come after the street.

The main disadvantage is that this once more increases the numbers
of possible search interpretation of which we have already too many.

no penalty for housenumber searches
2020-11-25 11:36:10 +01:00
Sarah Hoffmann
f21853ea9d Merge pull request #2071 from lonvia/fix-more-ranks
Search rank 30 must always go with address rank 30
2020-11-24 21:45:30 +01:00
Sarah Hoffmann
1e76d668bd Merge pull request #2070 from lonvia/unlisted-places-to-rank-25
Move unlisted places to address rank 25
2020-11-24 21:45:16 +01:00
Sarah Hoffmann
b4b50eef15 search rank 30 must always go with address rank 30 2020-11-24 17:57:28 +01:00
Sarah Hoffmann
a9ad390b9e move unlisted places to address rank 25
Unlisted places are derived from addr:place and as such are
still places not streets.
2020-11-24 17:54:00 +01:00
Sarah Hoffmann
2e9e961fff Merge pull request #2068 from lonvia/fix-reverse-only
Do not create POI search terms in reverse-only mode
2020-11-24 08:22:48 +01:00
Sarah Hoffmann
13180989d9 Test --reverse-only with CI 2020-11-23 22:36:28 +01:00
Sarah Hoffmann
a4f1e40b72 do not create POI search terms on reverse-only
Fixes #2067.
2020-11-23 19:55:36 +01:00
Sarah Hoffmann
04d485c550 Merge pull request #2065 from rustycamper/patch-1
viewbox arguments are no longer accepter "in any order"
2020-11-23 09:55:29 +01:00
Pietro
a92bd1e2db viewbox arguments are no longer accepter "in any order"
Order should be longitude, then latitude
2020-11-23 10:40:43 +02:00
Sarah Hoffmann
f89e71a861 make sure that admin levels in NL are kept in order 2020-11-19 09:44:02 +01:00
Hendrik Morée
dcc075b34b Admin levels 8 and 10 of the Netherlands are municipal / city 2020-11-18 11:30:24 +01:00
Sarah Hoffmann
49083c2597 Merge pull request #2058 from lonvia/split-address-words
Split addr:* tags into words before adding to the search index
2020-11-18 08:58:17 +01:00
Sarah Hoffmann
29785ba166 Merge pull request #2059 from lonvia/include-parent-name-for-unknown-places
POIs with unknown addr:place must add parent name to address
2020-11-18 08:58:03 +01:00
Sarah Hoffmann
ffb2c93ba3 POIs with unknown addr:place must add parent name to address
The previous behaviour was a left-over from a former version
where such POIs parented to the street. Now that they parent to
places, it should be included.
2020-11-17 19:44:43 +01:00
Sarah Hoffmann
30a6b6bdac split addr: tags into words before adding to the search index
Address parts are only matched by single partial words. If
the addr: names are not split, then multi-word names cannot
be found.
2020-11-17 18:03:33 +01:00
Sarah Hoffmann
cc345f531a Merge pull request #2056 from lonvia/avoid-linking-postal-areas
Disallow linking for postcode areas
2020-11-17 11:15:56 +01:00
Sarah Hoffmann
9ede048769 disallow linking for postcode areas 2020-11-17 10:53:26 +01:00
Sarah Hoffmann
d23bf6e659 Merge pull request #2054 from lonvia/display-addr-terms
Merge places into address lists referred to by addr:* tags but not computed by Nominatim
2020-11-16 16:08:06 +01:00
Sarah Hoffmann
6b60f0ab03 use bool_or(ST_Intersects) instead of ST_Intersects(ST_Collect)
ST_Intersects segfaults on geometry collections for certain versions
of Postgis 3.
2020-11-16 15:28:01 +01:00
Sarah Hoffmann
aa9923bf07 fix typo 2020-11-16 15:28:01 +01:00
Sarah Hoffmann
9160cce6d8 remove unused columns in search_name_* and use right index
We only need the address rank these days, so get rid of
search rank. Also switch indexes to work on address rank.
2020-11-16 15:28:01 +01:00
Sarah Hoffmann
885dc0a8e1 more tests for absense of additional addressline entries 2020-11-16 15:28:01 +01:00
Sarah Hoffmann
7324431b12 get additional addresses for rank 30 objects
get_addressdata() now also checks if the place itself has entries
in the place_addressline table and merges them into the results.

Also restrict checking for address tag places to cases where the
name cannot be found in the parent's address search terms. Looking
up all address tags is just too slow.
2020-11-16 15:28:01 +01:00
Sarah Hoffmann
021f2bef4c get address terms from address tags for rank 30
For rank 30 objects add extra elements into the place_addressline
table.
2020-11-16 15:28:01 +01:00
Sarah Hoffmann
6260fef2e8 add test for placex from addr tags 2020-11-16 15:28:01 +01:00
Sarah Hoffmann
c7472662a6 lookup places for address tags for rank < 30
While previously the content of addr:* tags was only added
to the list of address search keywords, we now really look up
the matching place. This has the advantage that we pull in all
potential translations from the place, just like all the other
address terms that are looked up by neighbourhood search.

If no place can be found for a given name, the content of the
addr:* tag is still added to the search keywords as before.
2020-11-16 15:28:01 +01:00
Sarah Hoffmann
fecfe62fc6 Merge pull request #2055 from lonvia/fix-actions
Actions: update apt repo before installing software
2020-11-16 11:26:10 +01:00
Sarah Hoffmann
21b0430e46 actions: update apt repo before installing software 2020-11-16 10:14:38 +01:00
Sarah Hoffmann
66595c2d2b Merge pull request #2046 from lonvia/less-parallel-ranking
Only index larger batches for rank 30
2020-11-06 09:39:07 +01:00
Sarah Hoffmann
4ac29fb525 only index larger batches for rank 30
Fixes #2045.
2020-11-05 22:14:49 +01:00
Sarah Hoffmann
04d50a271d Merge pull request #2041 from lonvia/address-ranks-belgium
Adapt admin_levels for Belgium
2020-11-03 16:23:35 +01:00
Sarah Hoffmann
cbbda1ddf0 adapt admin_levels for Belgium
Fixes #272.
2020-11-03 10:46:52 +01:00
Sarah Hoffmann
928c6245c9 Merge pull request #2038 from lonvia/addresses-for-large-areas
Improve addresses for large areas
2020-11-03 08:49:01 +01:00
Sarah Hoffmann
ac9be161f6 Merge pull request #2039 from lonvia/migration-for-ui
Add  migration hints for UI removal and remove tests for icon attribute
2020-11-03 08:48:45 +01:00
Sarah Hoffmann
33378dcf6e remove tests for icon attribute
The icon attribute requires the CONST_MapIcon_URL to be present
which we cannot guarantee for the tests.
2020-11-02 16:46:29 +01:00
Sarah Hoffmann
e31a1f7ef1 docs: add migration hints for removed UI 2020-11-02 16:34:17 +01:00
Sarah Hoffmann
fa574ae9fd use different area estimates for large countries 2020-11-02 14:21:30 +01:00
Sarah Hoffmann
b2ebf4b4b7 adapt tests to rank changes of natural 2020-11-02 11:42:10 +01:00
Sarah Hoffmann
0f5615b618 guess a base address level for address rank 0 objects
The guess is based on the area and mainly avoids odd
addresses for very large or small objects.
2020-11-02 11:42:10 +01:00
Sarah Hoffmann
f050f898bc elevate most natural feature to address rank 22
Makes them be in par with landuse features.
2020-11-02 11:42:10 +01:00
Sarah Hoffmann
ce1c3bab6d Merge pull request #2032 from lonvia/remove-ui
Remove HTML output
2020-11-01 15:12:12 +01:00
Sarah Hoffmann
5cdabc5173 Merge pull request #2035 from lonvia/add-index-to-multicountry-script
docs: need to index after updating with a file
2020-11-01 15:11:02 +01:00
Sarah Hoffmann
42775f959b docs: need to index after updating with a file
Fixes #2031.
2020-10-31 22:53:08 +01:00
Sarah Hoffmann
7f55dcef3a use simpler recurse operator for overpass download
Also fixes a typo in the OSM link.
2020-10-31 21:44:28 +01:00
Sarah Hoffmann
ba8accf4bb make phpcs happy 2020-10-29 11:36:16 +01:00
Sarah Hoffmann
e62db51b06 vagrant: setting website URL is not longer necessary 2020-10-29 11:13:32 +01:00
Sarah Hoffmann
b81894d3d5 remove now unused settings related to website
There are two places where the website URL is still used:
for icons, replace the URL with a link to the icon repository
of the UI repo. The more URL now builds the link from the
server info.
2020-10-29 11:13:32 +01:00
Sarah Hoffmann
d86cf6801f remove tests for HTML output 2020-10-29 11:13:32 +01:00
Sarah Hoffmann
c0d21d0bd3 remove HTML output and UI elements 2020-10-29 11:13:04 +01:00
Sarah Hoffmann
b838f66dd7 remove hierarchy endpoint
This endpoint was never maintained and most of the information
can be obtained via the details endpoint.
2020-10-29 11:13:03 +01:00
Sarah Hoffmann
db9cc270b3 Merge pull request #2030 from lonvia/improve-ci
Small improvements for github actions run
2020-10-28 15:20:40 +01:00
Sarah Hoffmann
4147e04319 action: cache downloaded dependencies 2020-10-28 14:51:05 +01:00
Sarah Hoffmann
a888f6ff93 Merge pull request #2027 from lonvia/remove-duplicate-admin-boundaries
Handle duplicated admin boundaries
2020-10-28 11:11:42 +01:00
Sarah Hoffmann
bd04c49bc1 actions: tweak database settings
Disabling fsync and friends should speed up the CI run
significantly.
2020-10-28 11:10:14 +01:00
Sarah Hoffmann
5872b81232 use highest admin boundary for duplicated ones 2020-10-28 10:49:26 +01:00
Sarah Hoffmann
abd20d3ca6 disable admin level 5 in Russia
They either interfere with cities or refer to historical boundaries.
2020-10-28 10:49:26 +01:00
Sarah Hoffmann
95f83b90d2 minor fixes for geometry compuation during boundary ranking
Go back to using centroid when determining if one admin level
is within another. There are cases where boundaries are slightly
misaligned due to mapping errors (not using the same ways in the
relations).

Only declare boundaries the same when they have the same wikidata
tag _and_ have exactly the same geometry. This works around tagging
errors with the wikidata tag, which happen because of automated
edits to the wikidata tag.
2020-10-28 10:49:26 +01:00
Sarah Hoffmann
7a16909219 detect and remove admin boundary duplicates
The Polish community maps admin boundaries that span multiple
levels by duplicating the boundary relations. Detect this situation
by looking out for matching wikidata tags. The higher ranked
duplicates are then thrown out from the address pool by setting
their address rank to 0.
2020-10-28 10:49:26 +01:00
Sarah Hoffmann
ee4684e6a9 Merge pull request #2029 from lonvia/master
Switch CI to github actions
2020-10-28 10:12:52 +01:00
Sarah Hoffmann
a2b1a1eb33 switch CI badge to github actions 2020-10-28 10:11:22 +01:00
Sarah Hoffmann
7bc0fc9611 switch CI to github acitons 2020-10-28 09:29:22 +01:00
Sarah Hoffmann
e0e18e2b6f Merge pull request #2025 from lonvia/fix-secondary-importance-for-countries
Improve secondary result ordering for administrative boundaries
2020-10-22 13:43:22 +02:00
Sarah Hoffmann
788ba6d985 adjust secondary order when no addressimportance available
In cases of countries and remote places without an address
it is possible that 'addressimportance' comes back empty.
Adjust the 'foundorder' to the places importance instead
in such cases.

Fixes #2023.
2020-10-22 10:20:16 +02:00
Sarah Hoffmann
b012e15245 readd boundary:administrative to class importance 2020-10-22 10:16:04 +02:00
Sarah Hoffmann
ba31456278 Merge pull request #2022 from lonvia/populate-rank-25
reorganize ranks of high-level place types
2020-10-20 23:26:18 +02:00
Sarah Hoffmann
b661c66c00 reorganize ranks of high-level place types
Rank 25 is now available for places that should appear in addresses
but not when a street is present. Use this for som block-like
place types. Also document the particularity of rank 25.

subdevisions and allotments are now at the same level as landuse
which they are frequently used together with.
2020-10-20 20:20:49 +02:00
Sarah Hoffmann
f0372736aa Merge pull request #2019 from lonvia/find-pgconfig-inpostgresql-repo
Add support for finding pg_config in Postgresql repos
2020-10-20 15:02:00 +02:00
Sarah Hoffmann
e286d3f23d add support for finding pg_config in Postgresql repos
It uses the same PostgreSQL_ADDITIONAL_VERSIONS variable as
osm2pgsql so that setting that should be sufficient to make
it work.
2020-10-20 11:39:00 +02:00
Sarah Hoffmann
6c8a7b0a1a Merge pull request #2018 from lonvia/update-osm2pgsql
Update to latest osm2pgsql
2020-10-20 11:33:36 +02:00
Sarah Hoffmann
3107dc208a update to latest osm2pgsql
Important changes:

* fix disabling of JIT in Postgresql
* support for finding latests Postgresql from their repos
* no longer create nodes table with flatnodes
2020-10-20 11:08:07 +02:00
Sarah Hoffmann
bf4d75458c add explicit bbox contains check
Now that the containment check uses ST_Relate, we need to add
a separate bbox contains check to ensure that Postgis does the
efficient check first. Note that we still cannot get rid of the
overlap(&&) check because then Postgis will use the wrong indexes.
2020-10-19 10:39:01 +02:00
Sarah Hoffmann
3604d0d913 Merge pull request #2016 from lonvia/locale-address-russia
add country-specific address ranks for Russia
2020-10-18 09:48:23 +02:00
Sarah Hoffmann
dd06638dec Merge pull request #2015 from lonvia/cleanup-address-computation
Rework collection of address parts
2020-10-18 09:48:05 +02:00
Sarah Hoffmann
73a0ec22a3 add country-specific address ranks for Russia
Removes admin level 7, which should not exist and promotes
admin level 8 to municipality level.

place=municipality is only used for boroughs of St. Petersburg,
so demote to level 18.

Fixes #926.
2020-10-17 17:54:06 +02:00
Sarah Hoffmann
b0ef84caae add tests for rank computation 2020-10-17 17:51:22 +02:00
Sarah Hoffmann
64899ef54b add tests for address computation 2020-10-16 11:07:17 +02:00
Sarah Hoffmann
1064a9264e revert to && comparison for geometries
Postgis 3 picks the wrong index when using ~ or @.
2020-10-16 09:49:48 +02:00
Sarah Hoffmann
acfa7bec9c use computed centroid for location_area_large
The new address computation assumes that the centroid is inside
the area. Therefore we cannot use the centroid function. Use the
pre-computed centroid instead which has already been corrected to
be inside the area.
2020-10-15 17:30:52 +02:00
Sarah Hoffmann
62b94e838b correctly set from area column in place_addressline
This was always set to true which brings us to the question
if it is even still needed.
2020-10-15 12:06:53 +02:00
Sarah Hoffmann
5236e7a03e fix use of geometry operators
@ is contained by while ~ is contains.
2020-10-15 12:06:18 +02:00
Sarah Hoffmann
7e9412a044 demote admin boundaries for place areas
Also demote the address rank of an admin boundary when there
is a place area of higher rank that completely contains the
area. This catches the case where city boundaries do not exactly
align with administrative units (see for example Moscow).
2020-10-14 11:33:47 +02:00
Sarah Hoffmann
e47c19beb9 exclude rank 25 when computing addresses of streets
Address rank 25 is used for squares which are address-wise on the
same level as streets.
2020-10-13 22:36:17 +02:00
Sarah Hoffmann
2fe3c654fc overhaul address computation
This is a complete rewrite of the selection of address parts to
be inserted into the place_addressline table.

The new algorithm selects for each rank:
* the boundary overlapping with the addressee and contained
  in the already selected boundaries of lower rank, or failing that
* the place node closest to the addressee that is contained in
  the already selected boundaries and in the influence radius
  of already selected place nodes of lower rank

Place nodes that are not contained in already selected boundaries
of lower rank are completely thrown away. All other candidates are
added as non-address parts.
2020-10-13 22:10:07 +02:00
Sarah Hoffmann
5ec48c66cb move ordering out of getNearFeatures
The two places where the function is called have different ordering
requirement.
2020-10-13 15:24:54 +02:00
Sarah Hoffmann
4e7ec92d6f Merge pull request #2012 from lonvia/format-reverse-debug-output
use Debug class for formatting reverse debug output
2020-10-13 09:23:43 +02:00
Sarah Hoffmann
e115de47fc use Debug class for formatting reverse debug output 2020-10-12 17:12:03 +02:00
Sarah Hoffmann
bb5ffd3904 Merge pull request #2011 from lonvia/increase-city-radius
Increase radius of influence around city nodes
2020-10-12 16:17:39 +02:00
Sarah Hoffmann
887ae7fcab increase radius of influence around city nodes
The current radius does not cover cities with more than a
million inhabitants well.
2020-10-12 14:17:37 +02:00
Sarah Hoffmann
abaaf942cb Merge pull request #2004 from lonvia/demote-place-nodes-in-admin-areas
demote place nodes in admin areas
2020-10-12 11:54:52 +02:00
Sarah Hoffmann
ff47f6f65d when linking always check against original address rank 2020-10-11 12:29:49 +02:00
Sarah Hoffmann
ca680fc9fc make housenumber interpolation tests more lenient 2020-10-11 12:04:53 +02:00
Sarah Hoffmann
b04463bb2d demote place nodes in admin areas
If a place node of city rank and above finds itself in an
administrative boundary of the same address rank, then
increase the address rank by 2. This catches the rather
frequent case where city suburbs are tagged for historical
reasons as towns or villages.
2020-10-11 12:04:53 +02:00
Sarah Hoffmann
c66f701232 Merge pull request #2008 from lonvia/docs-missing-index-migration
docs: migration to new wikipedia needs new index
2020-10-11 11:05:59 +02:00
Sarah Hoffmann
3b23144ae6 docs: migration to new wikipedia needs new index
Fixes #1998.
2020-10-11 10:40:23 +02:00
Sarah Hoffmann
f7a9462337 Merge pull request #2006 from mtmail/ubuntu-20-postgresql-contrib
Ubuntu 20: use postgresql-contrib-12 so no version higher gets installed
2020-10-11 09:45:37 +02:00
Sarah Hoffmann
17f24d0061 Merge pull request #2003 from lonvia/admin-levels-indonesia
Adapt address levels for admin boundaries in Indonesia
2020-10-11 09:43:33 +02:00
marc tobias
eb8bce22b8 Ubuntu 20: use postgresql-contrib-12 so no version higher gets installed 2020-10-10 02:42:46 +02:00
Sarah Hoffmann
6a31691121 adapt address levels for admin boundaries in Indonesia 2020-10-09 22:28:06 +02:00
Sarah Hoffmann
25c4bf6ed4 Merge pull request #2002 from lonvia/analyse-indexing-script
Add script for detailed explaing of indexing trigger
2020-10-09 20:23:51 +02:00
Sarah Hoffmann
ec5743bcc0 add script for detailed explaing of indexing trigger 2020-10-09 17:38:33 +02:00
Sarah Hoffmann
7cd330ee55 Merge pull request #1996 from lonvia/remove-postcode-search-structured
Restrict postcode searches to postcode in first token
2020-10-06 17:04:22 +02:00
Sarah Hoffmann
7d2b6879c8 restrict postcode searches to postcode in first token
In structured queries we should only assume that it is
a postcode search when only the postcode and optionally
the country is given. If any other term is present, it
is better to avoid the search for postcode as it yields
too many bad searches. Given that the terms in a structured
query are ordered, this means that the postcode must be
the first token just like in the unstructured query.

Fixes #1988.
2020-10-06 14:08:31 +02:00
Sarah Hoffmann
a40684162a Revert "adapt tests to rank_search removal"
This reverts commit 2a717da850.
2020-10-06 13:59:50 +02:00
Sarah Hoffmann
cd997ff058 Merge pull request #1995 from lonvia/update-osm2pgsql
update to latest osm2pgsql version
2020-10-05 18:14:29 +02:00
Sarah Hoffmann
5391bb6cf7 update to latest osm2pgsql version
The latest version of osm2pgsql no longer creates indexes on
the members of planet_osm_rels. So we do that ourselves.
Given that we only need to access associated street relations,
the index can become quite a bit smaller.
2020-10-05 17:11:13 +02:00
Sarah Hoffmann
6b8ce1ee74 Merge pull request #1986 from mtmail/document-drop-idx_placex_geometry_reverse_lookupPoint
migration guide: idx_placex_geometry_reverse_lookupPoint can be dropped
2020-10-05 16:18:46 +02:00
Sarah Hoffmann
a37d46ec25 Merge pull request #1993 from mtmail/country-name-eSwatini
country names: Swaziland => eSwatini
2020-10-05 16:17:54 +02:00
marc tobias
b5c8237118 country names: Swaziland => eSwatini 2020-10-04 13:38:15 +02:00
marc tobias
eeffe2caf4 migration guide: idx_placex_geometry_reverse_lookupPoint can be dropped 2020-09-30 14:28:20 +02:00
Sarah Hoffmann
c5c7a6a453 Merge pull request #1984 from lonvia/remove-reverse-index
Remove unused idx_placex_geometry_reverse_lookupPoint
2020-09-30 12:02:34 +02:00
Sarah Hoffmann
b969392f40 remove removed index from database check 2020-09-30 11:33:15 +02:00
Sarah Hoffmann
40bc1752c2 remove unused idx_placex_geometry_reverse_lookupPoint
The index has been unused ever since the query using it was
changed two years ago. Given that it has not been missed much,
drop it completely here.
2020-09-30 09:21:35 +02:00
Sarah Hoffmann
851c3779b5 Merge pull request #1982 from lonvia/more-rank-search-removal
More rank search removal
2020-09-26 11:08:20 +02:00
Sarah Hoffmann
2a717da850 adapt tests to rank_search removal 2020-09-26 09:10:37 +02:00
Sarah Hoffmann
f8694da3c9 Remove more rank_search usage from address computation
Fixes #1904.
2020-09-25 17:50:36 +02:00
Sarah Hoffmann
eeb2b1f998 Merge pull request #1980 from lonvia/add-descriptive-term-for-address-rank-24
add descriptive term for address rank 24
2020-09-25 16:56:35 +02:00
Sarah Hoffmann
1db8f7e353 add descriptive term for address rank 24
With that term we have terms for all ranks, so that no generic
'administrative' term will show up in the address details anymore.
2020-09-25 16:02:17 +02:00
Sarah Hoffmann
6625e93be6 Merge pull request #1975 from lonvia/simplify-parent-assignment-for-unlisted-places
Use closest containing place area for parent of unlisted addr:place
2020-09-23 19:10:42 +02:00
Sarah Hoffmann
56053988bf Merge pull request #1970 from lonvia/remove-duplicate-geometry-check
Switch recursive updates to using rank_search
2020-09-23 18:51:35 +02:00
Sarah Hoffmann
d9325dc11a use rank_address when invalidating containing objects
Only rank_address is now relevant for determining if a place
could be part of an address.
2020-09-23 17:44:31 +02:00
Sarah Hoffmann
d3ca9dd3f7 remove ST_Covers check when also testing for ST_Intersects
Using both is slightly problematic because they have different
ways to use the index. Newer versions of Postgis exhibit a
query planner issue when both functions appear together.
As ST_Intersects includes ST_Covers, simply remove the latter.
2020-09-23 17:44:31 +02:00
Sarah Hoffmann
e552f6bce5 use closest containing place for unlisted addr:place
We can't use getNearFeatures() to determine the parent of a
place with an unlisted addr:place because this function
returns place nodes that are potentially outside the area
of interest. Doing the complete address computation is too
expensive, so simply use the area with the largest rank that
contains the feature instead.
2020-09-23 17:33:42 +02:00
Sarah Hoffmann
cf23e10382 Merge pull request #1974 from lonvia/show-unknown-addr-place
add unknown addr:place to address output
2020-09-23 15:26:12 +02:00
Sarah Hoffmann
c84e7e72f1 add unknown addr:place to address output
When a POI has no addr:street but an addr:place that is not
contained in the name list of the parent place, then remember
this situation and merge the content of addr:place into the
address output.

We don't need to care about translations in this case because
it is obvious that no object with translations exists if the
parent isn't the object named in addr:place.
2020-09-23 11:55:18 +02:00
Sarah Hoffmann
f2ff351da4 Merge pull request #1971 from lonvia/drop-support-for-isin
Drop support for is_in tag
2020-09-23 09:20:35 +02:00
Sarah Hoffmann
c5c242d193 Merge pull request #1972 from lonvia/exclude-unnamed-highway-areas
Exclude unnamed highway areas
2020-09-23 09:20:16 +02:00
Sarah Hoffmann
72193a1c23 exclude unnamed highway areas
These are used to mark large paved areas. Sometimes they exists
together with named regular streets. In such cases the unnamed
area may overshadow the actual street when computing the address
parent. As unnamed highways are not very useful anyway, we
simply remove them from the database.
2020-09-22 21:42:13 +02:00
Sarah Hoffmann
248d6b413a remove test for is_in 2020-09-22 21:36:49 +02:00
Sarah Hoffmann
d04e87fb80 drop suport for is_in tag 2020-09-22 20:26:36 +02:00
Sarah Hoffmann
cc61b74cde Merge pull request #1965 from lonvia/make-addr-tags-searchable
Make addresses searchable by their addr: tags
2020-09-22 20:21:06 +02:00
Sarah Hoffmann
915d362b11 Merge pull request #1966 from lonvia/remove-dead-code
remove dead code
2020-09-21 10:59:58 +02:00
Sarah Hoffmann
f8a5f2964f remove dead code
The SQL query has moved into the addTokensFromDB() funtion.
2020-09-21 10:39:14 +02:00
Sarah Hoffmann
a8dfbcef44 always bind addr:place to place instead of street
If an addr:place is given but no addr:street tag, then bind
the rank 30 object always to a <=25 object, even when there
is none found with the same name.
2020-09-21 10:15:14 +02:00
Sarah Hoffmann
caea14d035 merge addr tags into search_name table
When a place of rank 30 has addr tags that are not covered by the
search terms of the parent, add a separate entry for the POI in
the search_name table that includes the addr tags. We can only
do that with named places. For POIs without a name the housenumber
is used as name. If that is not available either, searching still
won't work.
2020-09-21 10:15:14 +02:00
Sarah Hoffmann
c5fc12e04b Merge pull request #1964 from lonvia/remove-postcodes-with-colon
ignore postcodes with colons
2020-09-19 17:47:43 +02:00
Sarah Hoffmann
731c620e31 ignore postcodes with colons
Colons are used as a delimiter in tiger:left and tiger:right tags
when multiple postcodes are given. Ignore those. This was already
done in the postcode update script. This changes just makes the
two places consistent where postcodes are added.
2020-09-19 17:23:40 +02:00
Ben Antony
0dad470eb9 Update broken links in documentation 2020-09-18 22:52:59 +02:00
Sarah Hoffmann
6947ab3a65 Merge pull request #1963 from lonvia/remove-postcodes-from-search-index
Remove postcodes from search index
2020-09-18 22:41:24 +02:00
Sarah Hoffmann
b219374d36 remove special casing for rank 25 postcodes
They can be computed like any other place.
2020-09-18 16:18:02 +02:00
Sarah Hoffmann
576ee5aaab use same label for all types of postcode in address 2020-09-18 16:17:30 +02:00
Sarah Hoffmann
4c9cfe2532 remove postcodes entirely from indexing
place=postcode places are artificial places that collect addr:postcode
points for aggration. They should neither show up in the address nor
be searchable. That means that there is no need to index them at all.
Only let boundary=postal_code through which define correct areas for
postcodes.
2020-09-18 15:09:35 +02:00
Sarah Hoffmann
7fb62ea904 postal boundary may be imported without name
Postal boundaries usually just have the postcode tag set and are
therefore officially 'nameless'. We want to have them as
boundary=postal_code anyways in order to distiguish them from postcode
points inherited from addr: tags.
2020-09-18 11:33:45 +02:00
Sarah Hoffmann
fe250d3ee8 Merge pull request #1961 from lonvia/set-place-type-for-result-in-address
Use place type of for result object in address parts
2020-09-17 21:23:40 +02:00
Sarah Hoffmann
6f55c67d16 Merge pull request #1960 from lonvia/fix-postcodes-duplicated-by-normalization
Make sure that all postcodes have an entry in the word table
2020-09-17 21:23:23 +02:00
Sarah Hoffmann
3aa6e6a365 Merge pull request #1957 from lonvia/docs-separate-out-deployment
Restructure vagrant scripts and installation documentation
2020-09-17 21:22:48 +02:00
Sarah Hoffmann
b8ced5d96b Merge pull request #1962 from mtmail/travis-ci-without-webserver
travis-ci: we dont need Apache installed
2020-09-17 20:38:17 +02:00
marc tobias
9e21c6a862 travis-ci: we dont need Apache installed 2020-09-17 18:25:23 +02:00
Sarah Hoffmann
fe8566928e use place type of for result object in address parts
Boundaries shound derive the address part type from the
linked place if possible. This was already implemented
for the address objects but not for the address information
from the address itself.

Fixes #1949.
2020-09-17 18:17:01 +02:00
Sarah Hoffmann
3600709116 make sure that all postcodes have an entry in word
It may happen that two different postcodes normalize to exactly
the same token. In that case we still need two different entries
in the word table. Token lookup will then make sure that the correct
one is choosen.

Fixes #1953.
2020-09-17 17:11:22 +02:00
Sarah Hoffmann
df115c73b2 Merge pull request #1945 from mtmail/travis-ubuntu-20
Upgrade Travis-CI from Ubuntu 18 to 20
2020-09-17 16:20:19 +02:00
Sarah Hoffmann
dbe025fe40 docs: fix formatting 2020-09-17 10:16:25 +02:00
Sarah Hoffmann
2b11a47a2f restructure developer's manual
Add a section on setting up the development environment which now
also includes the former chapter on recreating the documentation.
Move the README from test/ into the manual as the new Testing
chapter.
2020-09-17 09:54:46 +02:00
Sarah Hoffmann
fb63a7418e make CentOS 8 the default vagrant script
This puts it in line with the Ubunutu scripts.
2020-09-16 17:34:36 +02:00
Sarah Hoffmann
2029931b95 adapt Ubunut-20 vagrant file to triple webserver config 2020-09-16 16:27:34 +02:00
Sarah Hoffmann
91219bb3dd restructure webserver setup in ubuntu 18 script
Unify the two vagrant scripts for Ubuntu 18. The script can now
be run in three modes: no webserver, with apache, with nginx.
The default mode is to not install any webserver at all. This is
normally sufficient when just developping.

The commit also switches from bento to generic boxes and adds config
for running with a libvirt provider. You need an NFS deamon for
synchronized folders.
2020-09-16 11:19:38 +02:00
Sarah Hoffmann
ab4fe4d58a add 'make serve-global'
This runs the PHP development server in a mode where it listens
globally. This is needed when running inside vagrant and port-forwarding
to the host machine.
2020-09-16 11:15:55 +02:00
Sarah Hoffmann
8ff1f16b7f remove host from default website URL
Just assume that Nominatim runs under the root URL. This is a
more versatile base that also makes 'make serve' work out of the
box.
2020-09-16 11:13:51 +02:00
Sarah Hoffmann
cf4f62c82c docs: move webserver installation into separate chapter 2020-09-15 23:51:25 +02:00
marc tobias
1d2f4264a2 Upgrade Travis-CI from Ubuntu 18 to 20 2020-09-14 12:44:50 +02:00
Sarah Hoffmann
9d506a4afa Merge pull request #1943 from mtmail/no-get-magic-quotes
starting PHP 5.4 get_magic_quotes_gpc() returns false, no need to check
2020-09-14 08:47:49 +02:00
marc tobias
7ac22e9227 starting PHP 5.4 get_magic_quotes_gpc() returns false, no need to check 2020-09-14 00:45:22 +02:00
Sarah Hoffmann
47f2fe724f Merge pull request #1940 from mtmail/north-macedonia
country_name Macedonia => North Macedonia
2020-09-12 16:15:22 +02:00
marc tobias
130571fb29 country_name Macedonia => North Macedonia 2020-09-07 17:24:06 +02:00
Sarah Hoffmann
da7218350b Merge pull request #1936 from lonvia/tweeking-of-ranks
More fine tuning of default rank assignments
2020-09-01 21:23:28 +02:00
Sarah Hoffmann
b6078de6f8 adapt tests to ranking changes 2020-09-01 18:03:17 +02:00
Sarah Hoffmann
07430b0194 tweak size of large POIs
Further reduce the size from which on POIs are no longer bound
to streets but only to larger objects. The point of reference,
of what a largest POI could be that is still bound is JFK airport.
2020-09-01 18:00:40 +02:00
Sarah Hoffmann
fae02fab00 address rank adjustment for addressable boundaries only
Only administrative boundaries with an address rank need
to be adjusted. Otherwise just handle them like any other
object.
2020-09-01 17:59:26 +02:00
Sarah Hoffmann
a68cdc40be improve fallback ranking
Boundaries and places now always get a rank < 26 to make sure that
they do not parent to a street. Skip boundary=place completely
because they will be covered throught the secondary place tag.
2020-09-01 17:55:40 +02:00
Sarah Hoffmann
76b307f42a Merge pull request #1934 from lonvia/fix-deletion-of-large-highway-areas
Do not block deletion of large highway areas
2020-08-28 10:08:42 +02:00
Sarah Hoffmann
6e4b7eb966 do not block deletion of large highway areas
Deletion of areas should only e blocked for addressable features.
Streets and POIs do not have a large impact on updates.
2020-08-28 09:49:21 +02:00
Sarah Hoffmann
770754ae2c place lookup: filter places that have no details
In rare cases search_name might have entries for places for
which we do not return details, in particular for linkees.
Need to remove those entries in the result list before returning
the details.

Fixes #1932.
2020-08-27 09:33:21 +02:00
Sarah Hoffmann
a932855f6f Merge pull request #1931 from lonvia/stable-sort-for-results
Reranking of results must be stable
2020-08-26 20:52:17 +02:00
Sarah Hoffmann
72ee1abc90 ensure that ordering by importance is stable
The initial search results retrieved from the database already come
preordered, either by importnace or by distance. We want to keep
that order if all other things are equal.
2020-08-26 17:42:43 +02:00
Sarah Hoffmann
9e1909643c PlaceLookup should return results in input order 2020-08-26 17:15:11 +02:00
Sarah Hoffmann
77a1329285 Merge pull request #1930 from lonvia/add-support-for-squares
Add support for place=square
2020-08-26 15:11:45 +02:00
Sarah Hoffmann
be6ecc388c add support for place=square
Squares are now addressable (on address level 25) and thus can
be attached to a house number via addr:place. Needed to increase
the rank range for matching up addr:place to 25.
2020-08-26 12:12:52 +02:00
Sarah Hoffmann
13dba94307 do not run rank 0 objects in parallel
Waterways are at address rank 0 and do linking. This might lead to
deadlocks.
2020-08-22 19:51:19 +02:00
Sarah Hoffmann
d51440bb5d Merge pull request #1926 from lonvia/speed-up-location-lookup
Increase splitting for large geometries
2020-08-22 17:04:34 +02:00
Sarah Hoffmann
d730e179bf tests: use larger grid to avoid rouding errors 2020-08-22 16:04:24 +02:00
Sarah Hoffmann
559fe513fa increase splitting for large geometries
When computing the address parts for a geometry, we need to do
a ST_Relates lookup in the location_area_large_* tables. This is
potentially very expensive for geometries with many vertices.
There is already a funtion for splitting large areas to reduce the
impact. This commit reduces the minimum area of a split, effectively
increasing the number of splits.

The effect on database size is minimal (around 3% increase), while
the indexing speed for streets increases by a good 60%.
2020-08-20 16:37:33 +02:00
Sarah Hoffmann
5b20fa7e38 Merge pull request #1923 from lonvia/split-indexing-for-boundries
Rework indexing order of places
2020-08-20 15:03:29 +02:00
Sarah Hoffmann
d16e75de91 Merge pull request #1924 from lonvia/installation-instructions-external-server
docs: installation hints for external databases
2020-08-19 15:17:32 +02:00
Sarah Hoffmann
6fd9994590 docs: installation hints for external databases
Fixes #1882.
2020-08-19 15:03:42 +02:00
Sarah Hoffmann
b9729f3b66 Merge pull request #1921 from lonvia/skip-over-traffic-signs
Remove traffic signs from full styles
2020-08-19 11:50:28 +02:00
Sarah Hoffmann
d6ff7475f1 make sure that addr:* tags can always be searched for
Always add contents of addr:* tags into address part of the search
table, even when there is no corresponding other name. This keeps
search tolerant to the kind of tagging where parts show up in the
address that have no corresponding object in the database or where
it is only an unaddressable object.
2020-08-19 11:44:10 +02:00
Sarah Hoffmann
984979d9bf add migration for new indxing schema 2020-08-18 21:40:53 +02:00
Sarah Hoffmann
73c449b97b switch indexind to address rank
A place needs all lower address rank object indexed to make up
the address. The search rank no longer ensures that as it can have
a different ordering than the address rank.

This switches indexing rank order to address ranks. Non-address
objects (with address rank 0) are indexed together with POIs.
2020-08-18 16:58:58 +02:00
Sarah Hoffmann
1529666232 use only centroid to get parent admin boundaries
Using the full geometry is far too expensive.
2020-08-18 15:17:09 +02:00
Sarah Hoffmann
3816b86a9e nominatim: also index boundaries by rank
We need to make sure that the entry in serach_name from a lower rank
is indeed available.
2020-08-18 15:17:09 +02:00
Sarah Hoffmann
a4b30fc649 index admin boundaries before everything else
Avoids irregularities that might happen because the address
rank of a boundary is changed through linking.
2020-08-18 15:17:09 +02:00
Sarah Hoffmann
fc50eb8688 nominatim: move DBConnection class into its own file 2020-08-18 15:17:09 +02:00
Sarah Hoffmann
071db1fae7 remove traffic signs from full styles
Traffic signs rarely have name and are therefore mostly not
searchable. Remove them completely. Allow street lamps only when
they have a name. Removes about 2M object from a planet instance.
2020-08-15 22:37:45 +02:00
Sarah Hoffmann
a163ea63c5 Merge pull request #1920 from lonvia/remove-linked-place-when-updating
Remove linked_place from extratags when updating
2020-08-14 09:44:56 +02:00
Sarah Hoffmann
e21a707166 remove linked_place from extratags when updating
Before updating an admin boundary we need to make sure that any
artificially generated 'linked_place' entry is removed from the
extratags column. This ensures that the place designation does
not linger when a linked place disappears and that it is updated
when the linking changes.
2020-08-13 16:59:11 +02:00
Sarah Hoffmann
1b484fa90d Merge pull request #1919 from lonvia/tests-for-ranking
More tests and fixes for address rank computation
2020-08-13 09:13:10 +02:00
Sarah Hoffmann
06aa0f0b76 use address rank for address forming when available 2020-08-12 22:22:24 +02:00
Sarah Hoffmann
fb8bb30144 boundary address ranks must not go above 25
Fixes #1914.
2020-08-12 22:22:24 +02:00
Sarah Hoffmann
7429a33818 add simple tests for address rank computation 2020-08-12 22:22:24 +02:00
Sarah Hoffmann
5b9f61cff8 also take place tags into account for address rank
An admin boundary might have a place tag but no matching place node.
We still should use the place value as indicator for the address
rank in this case.
2020-08-12 22:22:24 +02:00
Sarah Hoffmann
4ff0e20e1f Merge pull request #1917 from lonvia/docs-rank-levels
docs: add tables for the meaning of address and search ranks
2020-08-11 15:12:16 +02:00
Sarah Hoffmann
a692bfa8f9 docs: add tables for the meaning of address and search ranks
Also makes tables a bit more readable by adding margins and better
headers.
2020-08-11 11:48:55 +02:00
Sarah Hoffmann
49dd927406 Merge pull request #1918 from lonvia/remove-more-osmosis-init
remove more traces of --osmosis-init switch
2020-08-11 11:48:17 +02:00
Sarah Hoffmann
4b21cc1737 remove more traces of osmosis-init 2020-08-11 10:43:04 +02:00
Sarah Hoffmann
7ae16f7302 Merge pull request #1912 from lonvia/remove-unused-import-update-functions
remove unused functions from setup and update
2020-08-09 09:31:48 +02:00
Sarah Hoffmann
73566a9f15 remove unused functions from setup and update
Removes the defunct --osmosis-init and --no-api switches and the
unsupported (and unnecessary) deduplicate. Also removes
'experimental' from --setup-website as this is a required
function now.
2020-08-06 16:16:35 +02:00
Sarah Hoffmann
fbdf205ab4 Merge pull request #1909 from lonvia/minor-fixes
Make SQL debug statements execute again
2020-08-06 11:07:38 +02:00
Sarah Hoffmann
83b2b4970d Make SQL debug statements execute again
There were some old variable names used that are no longer valid.
Either fix them or remove the statement completely.

Fixes #1907.
2020-08-06 09:29:19 +02:00
Sarah Hoffmann
f29dc7d7ac Merge pull request #1865 from mtmail/how-to-import-test-db
test/README.md - more instructions how to import test db
2020-08-04 14:31:19 +02:00
Sarah Hoffmann
4d5db74c18 Merge pull request #1902 from lonvia/avoid-touching-boundaries-in-addresses
Be more strict what areas make up an address
2020-08-04 14:30:08 +02:00
Sarah Hoffmann
8c7d285e03 Merge pull request #1901 from lonvia/speed-up-indexing
Batch-index places at rank 30
2020-08-04 12:32:16 +02:00
Sarah Hoffmann
1347abb1e7 be more strict what areas make up an address
Exclude boundaries that touch a line in only one point and
that touch areas only along the boundary.

Fixes #1900.
2020-08-04 12:08:50 +02:00
Sarah Hoffmann
2cb85e48b4 adapt test results to new ranking 2020-08-03 16:57:22 +02:00
Sarah Hoffmann
5be084e0f5 indexer: allow batch processing of places
Request and process multiple place_ids at once so that
Postgres can make better use of caching and there are less
transactions running.
2020-08-03 10:32:39 +02:00
Sarah Hoffmann
2323923bec indexer: move progress tracker into separate class 2020-08-03 10:32:39 +02:00
Sarah Hoffmann
0f54d42863 indexer: get rid of special handling of few places
Given that we do not distiribute geometry sectors to threads anymore,
there is no point in this kind of special handling.
2020-08-03 10:32:39 +02:00
Sarah Hoffmann
8201c7f46c Merge pull request #1899 from mtmail/use-new-dsn-format-in-vagrant-md
VAGRANT.md: we use different database DSN syntax these days
2020-08-03 10:26:35 +02:00
marc tobias
ed22d640f4 VAGRANT.md: we use different database DSN syntax these days 2020-07-31 16:52:29 +02:00
marc tobias
01b009ff24 test/README.md - more instructions how to import test db 2020-07-31 16:50:27 +02:00
Sarah Hoffmann
665b90bf5a Merge pull request #1898 from lonvia/show-housenumber-with-housename
make house number reappear in display name on named POIs
2020-07-31 10:11:09 +02:00
Sarah Hoffmann
4e1f245331 make house number reappear in display name on named POIs
After 6cc6cf950c names and house numbers
of POIS got mingled into a single item when creating the display name.
Add the house number as extra information without place_id to avoid
later mangling.
2020-07-30 23:39:55 +02:00
Sarah Hoffmann
f8e1d39208 Merge pull request #1894 from lonvia/fix-hierarchy-by-admin-level
Preserve admin level hierarchy between admin boundaries
2020-07-29 09:20:34 +02:00
Sarah Hoffmann
955dae5d4b Merge pull request #1895 from lonvia/update-less-quiet
Make indexing during updates less quiet
2020-07-29 09:20:14 +02:00
Sarah Hoffmann
b78cd3f4c9 make indexing during updates less quiet
Adjust verbosity behaviour to that of indexing during setup.
2020-07-28 22:35:51 +02:00
Sarah Hoffmann
6a3eb7edf2 preserve admin level hierarchy between admin boundaries
When the address rank of an admin boundary is changed because
of an attached place type, it may happen that the admin_level
hierarchy gets inversed. Avoid that by adjusting the address
rank if an inversion is detected.
2020-07-28 22:15:25 +02:00
Sarah Hoffmann
0a710c0762 Merge pull request #1891 from lonvia/automatic-db-setup
Implicitly connect to database during setup
2020-07-26 16:08:45 +02:00
Sarah Hoffmann
9a204f6284 test: make road really cross the boundary 2020-07-26 15:57:07 +02:00
Sarah Hoffmann
7837970303 remove connect() in update script
This is now implicit.
2020-07-26 12:27:52 +02:00
Sarah Hoffmann
8cd9550295 implicitly connect to database during setup
Make access to the DB object a function, so that the connection
can be opened implicitly when the object is accessed for the first
time. This way we no longer need to check beforehand if a specific
function of the setup needs DB access or not.

Also move the check for the module to the relevant sub step.
2020-07-26 11:56:00 +02:00
Sarah Hoffmann
840c692d5b Merge pull request #1890 from lonvia/add-wiki-tags-to-all-styles
Add wiki tags to all styles
2020-07-25 11:34:51 +02:00
Sarah Hoffmann
05e0d3e2d4 add wiki tags to all styles
wikipedia and wikidata tags are needed to compute the importance
so we need to put them into extra tags for all styles.

Fixes #1885.
2020-07-25 10:00:18 +02:00
Sarah Hoffmann
7429ff9dce forgit to adapt info message 2020-07-18 12:14:51 +02:00
Sarah Hoffmann
29602fe0cf Merge pull request #1874 from joy-yyd/rank_modification
House number search fix for #164
2020-07-18 12:12:37 +02:00
Sarah Hoffmann
1b95ec5591 Merge pull request #1884 from lonvia/fixes-for-webserver-settigns
Small fixes for new webserver settings file
2020-07-18 11:44:01 +02:00
Sarah Hoffmann
3efe0dc8dc move website settings back to settings/
We don't want the settings to become visible when a server is
accidentally configured wrongly.
2020-07-18 11:02:07 +02:00
Sarah Hoffmann
241e4af1b0 log file is a string when not set to false 2020-07-18 11:00:17 +02:00
(Joy) Yuanyue Ding
cac8a8df18 Modifiy the range of address_rank, fix for issue #164 2020-07-08 17:47:38 +02:00
Sarah Hoffmann
1181ceb735 Merge pull request #1873 from lonvia/resurrect-debug-option
Reenable debug parameter
2020-07-08 10:06:49 +02:00
Sarah Hoffmann
f376f45277 default language is a string when not set to false 2020-07-08 08:38:11 +02:00
Sarah Hoffmann
d364afdf3b reenable debug parameter
The parameter got lost when switching to website settings.
Given that the use of a fixed parameter is limited,
debugging output can now only be set via the URL parameter.
2020-07-08 08:32:46 +02:00
Sarah Hoffmann
7ecfcf7eaa Merge pull request #1869 from lonvia/migration-for-setup-website
docs: add migration for new --setup-website step
2020-07-05 21:06:27 +02:00
Sarah Hoffmann
709c9bbe88 Merge pull request #1868 from lonvia/reverse-for-addressable-places-only
reverse: ignore place nodes without an address rank
2020-07-05 21:06:00 +02:00
Sarah Hoffmann
db175f606e docs: add migration for new --setup-website step 2020-07-05 15:46:06 +02:00
Sarah Hoffmann
3a664dc676 reverse: ignore place nodes without an address rank
We already exclude all polygon places without an address
rank. place nodes should also be ignored. This removes
places like locality from the reverse results.

Fixes #1839.
2020-07-05 15:38:49 +02:00
Sarah Hoffmann
5f8d5f10a6 docs: rename documentation chapter
Avoids confusion about this being the documentation itself.
2020-07-05 11:14:48 +02:00
Sarah Hoffmann
f02d4d9677 docs: move external data sources into simple page 2020-07-05 11:13:28 +02:00
Sarah Hoffmann
1889643eca Merge branch 'move-datasources-into-separate-repos' of https://github.com/mtmail/Nominatim into mtmail-move-datasources-into-separate-repos 2020-07-05 10:57:12 +02:00
Sarah Hoffmann
4fc5c2024b Merge pull request #1864 from lonvia/langauge-specific-presuffixes
exclude language-specific name:prefix and name:suffix
2020-07-05 10:54:42 +02:00
Sarah Hoffmann
354487d7f4 Merge pull request #1829 from krahulreddy/websiteSetup
Added setup-website option
2020-07-01 18:11:50 +02:00
Sarah Hoffmann
6478058946 Merge pull request #1857 from mtmail/db-migration-update-functions
Migration.md - admin also need to run recreate db functions
2020-07-01 18:11:14 +02:00
Sarah Hoffmann
8b8dcea3de exclude language-specific name:prefix and name:suffix
There are about 1k suffixes and 20k prefixes with a
language-speicfic variant in use. These should not
show up as names.
2020-07-01 18:00:53 +02:00
marc tobias
64ace51e02 move data-sources/ directory in new git repos 2020-07-01 17:38:44 +02:00
marc tobias
4cb5c67a44 Migration.md - admin also need to run recreate db functions 2020-07-01 16:46:54 +02:00
Sarah Hoffmann
2edefd9e80 sql: fix rank variable type
The rank type needs to match the parameter type of
update_place_diameter().

Fixes #1851.
2020-07-01 15:48:00 +02:00
Sarah Hoffmann
fa8b16e7e7 Merge pull request #1858 from lonvia/update-osm2pgsql
Update osm2pgsql
2020-07-01 11:38:40 +02:00
Sarah Hoffmann
2a953700e2 update osm2pgsql (hang on multipolygons) 2020-06-30 22:39:38 +02:00
Sarah Hoffmann
c1dc835b5c Merge pull request #1852 from osm-search/disable-jit-for-updates
Disable Postgresql jit and parallel processing for osm2pgsql updates
2020-06-28 22:39:44 +02:00
Sarah Hoffmann
214f92c428 make phpcs happy 2020-06-28 18:24:29 +02:00
Sarah Hoffmann
95fc680af9 travis: reduce the size of diff download 2020-06-28 18:21:42 +02:00
Sarah Hoffmann
22d0c6b5e1 travis: run a single round of updates on the Monaco import 2020-06-28 18:09:08 +02:00
Sarah Hoffmann
ff1be13d0e disable JIT and parallel processing for osm2pgsql in updates
This is known to cause issues because of bad indexing
statistics.
2020-06-28 18:06:06 +02:00
K Rahul Reddy
a3201be7e7 Moved settings-frontend to website/ 2020-06-27 10:45:53 +05:30
K Rahul Reddy
37f0b51dff Updated setup.php 2020-06-27 10:45:53 +05:30
K Rahul Reddy
95d2dd74ad Documentation updated 2020-06-27 10:45:53 +05:30
K Rahul Reddy
a175a25e6c Added setup-website to travis.yml 2020-06-27 10:45:51 +05:30
K Rahul Reddy
6c406124dd Added setup-website option 2020-06-27 10:45:51 +05:30
Sarah Hoffmann
0f17529486 Merge pull request #1836 from lonvia/rework-large-location-II
Change processing of place nodes in addresses
2020-06-26 21:36:32 +02:00
Sarah Hoffmann
dd10c867db docs: minor typo and grammar fixes 2020-06-23 23:31:18 +02:00
Sarah Hoffmann
8335dd3aa5 Merge branch 'split-off-test-tool-installation-instructions' of https://github.com/mtmail/Nominatim into mtmail-split-off-test-tool-installation-instructions 2020-06-23 23:25:46 +02:00
Sarah Hoffmann
cd73ac7038 Merge pull request #1841 from mtmail/faq-entry-about-rebuilding-nominatim-so
FAQ addition when to rebuild nominatim.so
2020-06-23 23:20:43 +02:00
Sarah Hoffmann
7cc33a839c Merge pull request #1834 from mtmail/faq-invalid-page-in-block
FAQ entry for PostgreSQL -invalid page in block-
2020-06-23 23:10:26 +02:00
marc tobias
828da6a425 FAQ addition when to rebuild nominatim.so 2020-06-20 04:01:50 +02:00
marc tobias
2e5bdb8794 Put install instructions of test tools into separate docs/ markdown file 2020-06-20 03:48:07 +02:00
marc tobias
f56bac350b FAQ entry for PostgreSQL -invalid page in block- 2020-06-19 21:16:57 +02:00
Sarah Hoffmann
d373f16c81 Merge pull request #1838 from lonvia/make-serve
add 'make serve' command
2020-06-19 20:12:56 +02:00
Sarah Hoffmann
ebffa15c7c add 'make serve' command
Starts up PHP's built-in webserver in the website/ directory.
Useful for testing and development.

See #1831.
2020-06-19 17:35:24 +02:00
Sarah Hoffmann
6e4ee160ee adapt tests to new search ranks 2020-06-17 10:53:11 +02:00
Sarah Hoffmann
a5697c5279 change place node expansion for large area table
So far we've used a buffer around a place node to define its
potential address reach. This had two problems: the buffer was
so large that addresses often contain false positives and the
buffer is really distorted when getting closer to the poles.

Change the buffer here to draw a bounndig box at a certain
distance in meter. This means that we always use the same
box everywhere on the planet and can make the extent much
smaller. Using a box has the advantage that it is much faster
to figure out if a point is within the box.
2020-06-17 10:53:11 +02:00
Sarah Hoffmann
5abec720d8 Merge pull request #1830 from lonvia/docs-for-nominatim-ui
Add usage docs for nominatim-ui
2020-06-17 09:30:29 +02:00
Sarah Hoffmann
84403b47cb add usage docs for nominatim-ui
Includes migration guides for Apache and nginx.
2020-06-13 20:09:20 +02:00
Sarah Hoffmann
4342a539af Merge pull request #1827 from mtmail/centos8-postgresql12-without-proj
Vagrant centos8: proj52 not needed, use postgresql 12/postgis 3.0
2020-06-11 23:12:38 +02:00
Sarah Hoffmann
f4e744ade5 remove version warning from ubuntu 20 installation 2020-06-11 22:46:04 +02:00
Sarah Hoffmann
155d4c5591 cmake: only require php for import and api 2020-06-11 22:44:00 +02:00
marc tobias
f5cbe0e6ba Vagrant centos8: proj52 not needed, use postgresql 12/postgis 3.0 2020-06-11 19:10:38 +02:00
Sarah Hoffmann
a0e7d80daf prepare 3.5.0 release 2020-06-06 20:30:29 +02:00
Sarah Hoffmann
d7e2f61e13 Merge pull request #1822 from lonvia/document-address-labels
document which labels may appear in the address info
2020-06-06 18:02:40 +02:00
Sarah Hoffmann
96ed4b02d7 document which labels may appear in the address info
The list is manually generated and only valid for the default
configuration as used on openstreetmap.org.

Fixes #1808.
2020-06-06 17:32:30 +02:00
Sarah Hoffmann
cffc7c0121 parents for large POIs must be address features
There are a couple of places with a search rank < 25 which are
not addressable like waterways and islands. We don't want them
to function as parents for POI-level objects. So use the
address rank for finding parents, not the search rank.

See #1815.
2020-06-03 11:30:51 +02:00
Sarah Hoffmann
d89000cc3d Merge pull request #1813 from lonvia/revert-concurrent-indexing
revert building indexes concurrently
2020-06-01 22:13:33 +02:00
Sarah Hoffmann
3661c75b39 Merge pull request #1814 from lonvia/disable-jit
Disable JIT and parallel workers when indexing
2020-06-01 22:13:07 +02:00
Sarah Hoffmann
3b20b11a9f remove warnings about postgres 12 and postgis 3 2020-05-30 11:25:00 +02:00
Sarah Hoffmann
cca366196d Disable JIT and parallel workers when indexing
Locally disable jit and parallel workers in the connection that
do indexing. The query planner tends to be overenthusiatic about
using JIT. But with the rather less complex queries we have, the
overhead tends to be larger than the performance gain.

Fixes #1677.
2020-05-30 11:20:16 +02:00
Sarah Hoffmann
e09d444068 revert building indexes concurrently
This does not really solve the issue with blocking autovacuum
requests and can lead to incomplete indexes and bogus
out-of-disk messages.

Fixes #1549.
2020-05-30 11:00:05 +02:00
Sarah Hoffmann
4956f5e710 Merge pull request #1809 from lonvia/fix-display-names
Fix some glitches in choice of address tags
2020-05-27 21:28:17 +02:00
Sarah Hoffmann
5bebdfa434 Merge pull request #1804 from lonvia/ranking-improvement-germany
Localized ranking adaptions for Germany, Sweden and Norway
2020-05-27 11:58:21 +02:00
Sarah Hoffmann
aea915aa8d prefer linked place type over own place type
For state cities, tagging might prefer the place=state on
the admin boundary. The linked place is a more reliable indicator.
2020-05-27 11:31:50 +02:00
Sarah Hoffmann
e0d29f398e each address line must fill at most one geocodejson field
This fixes an issue where a postcode with rank_address 5
would also appear in the state field.
2020-05-27 11:16:27 +02:00
Frederik Ramm
c43b39bd88 Fix script names in README (#1805) 2020-05-25 12:45:35 +02:00
Sarah Hoffmann
8218da27b3 adapt tests to new ranks 2020-05-23 19:40:41 +02:00
Sarah Hoffmann
aa4bd00631 Adapt boundary labels for Sweden and Norway
This also gives us the correct labels for address output in
json and xml.
2020-05-23 16:19:27 +02:00
Sarah Hoffmann
af6b9fdb39 fix admin levels for Norway and Sweden
Admin levels 3 and 4 are used for region and county respectively,
so downgrade the ranking.
2020-05-23 15:48:40 +02:00
Sarah Hoffmann
c1b6493373 adapt municipality and region for Germany 2020-05-23 15:20:15 +02:00
Sarah Hoffmann
c386cca73f Merge pull request #1801 from lonvia/rework-classtypes
Rework ClassTypes helper functions
2020-05-20 08:22:56 +02:00
Sarah Hoffmann
cadbdaff18 fix style 2020-05-18 22:20:36 +02:00
Sarah Hoffmann
57510f517a adapt tests to modified address types 2020-05-17 16:53:33 +02:00
Sarah Hoffmann
3a2ddbe2e0 encapsulate icon URL in a function 2020-05-17 16:46:45 +02:00
Sarah Hoffmann
859347523f also adapt uses of ClassTypes in website/ 2020-05-17 16:46:45 +02:00
Sarah Hoffmann
528fe6553f adapt php tests
Also fixes some errors found by the tests.
2020-05-17 16:46:45 +02:00
Sarah Hoffmann
1faa0f4d41 reorganise class/type information
Add a separate function for each property which saves necessary
information independently. Simplify computation of labels and
simple labels to not explicitly save the labels.
2020-05-17 16:46:45 +02:00
Sarah Hoffmann
82a11cae2d first draft 2020-05-17 16:46:45 +02:00
Sarah Hoffmann
431948d768 nominatim: always use deadlock-protected wait
Fixes #1785.
2020-05-15 18:49:27 +02:00
Sarah Hoffmann
f69c3d2b66 Merge pull request #1793 from lonvia/remove-struct-params-in-gui
search UI: hide unused query parameters
2020-05-15 16:50:54 +02:00
mmd
08b05964fa Update travis to bionic=Ubuntu18 (#1800) 2020-05-14 22:52:04 +02:00
Sarah Hoffmann
bd7f597682 Merge pull request #1797 from mtmail/jquery-3-5-1
update jquery dependency 3.5.0 => 3.5.1
2020-05-14 20:54:17 +02:00
marc tobias
6d4fbc9d32 update jquery dependency 3.5.0 => 3.5.1 2020-05-14 15:53:05 +02:00
Sarah Hoffmann
124410a17b improve syntax highlighting for apache and nginx examples 2020-05-13 10:13:15 +02:00
Sarah Hoffmann
a543d57cbd switch to php-fpm 7.3
Also fixes indent.
2020-05-13 10:04:31 +02:00
Sarah Hoffmann
8c3a0efe8b Merge branch 'patch-1' of https://github.com/ganeshkrishnan1/Nominatim into ganeshkrishnan1-patch-1 2020-05-13 09:55:48 +02:00
Sarah Hoffmann
9e2841ad44 search UI: hide unused query parameters
Only send query parameters relevant for the current query
type (simple/structured), hide the other input fields.

This is quite a bit of CSS state changing, so move the intial
setup of the input field states into Javascript.
2020-05-11 00:19:33 +02:00
Sarah Hoffmann
233e5f7c0e show simple query field when no parameters are given 2020-05-10 23:52:53 +02:00
Sarah Hoffmann
d5d9445cfd Fix PHP errors in structured HTML output
Correctly handle missing parameters.
2020-05-10 23:41:04 +02:00
Sarah Hoffmann
7be7417b5b Merge pull request #1792 from lonvia/remove-from-location-area
remove linked places also from the location_area_large tables
2020-05-10 15:49:20 +02:00
Sarah Hoffmann
0a14142156 remove linked places also from the location_area_large tables
We don't want linked places to show up in addresses either,
so remove them from the address lookup table.
2020-05-10 13:59:47 +02:00
galewis2
a5e3785843 Add simple/structured query selector to HTML search page (#1722) 2020-05-08 01:29:44 +02:00
Sarah Hoffmann
fc19ebb218 Merge pull request #1786 from lonvia/remove-ubuntu-1604
remove Ubuntu 16 installation instructions
2020-05-07 22:42:31 +02:00
Sarah Hoffmann
b45411f988 Merge pull request #1782 from Simon-Will/1781-make-tests-work-with-phpunit-8
Make tests work with phpunit 8
2020-05-07 22:01:35 +02:00
Sarah Hoffmann
42f6371e47 remove Ubuntu 16 installation instructions
Also fixes up CentOS 8 links in documentation.
2020-05-07 21:55:04 +02:00
Simon Will
be2aa6ab3a Use Ubuntu’s packaged composer, not the custom installation 2020-05-07 21:44:45 +02:00
Sarah Hoffmann
6e39ed9573 Merge pull request #1780 from Simon-Will/1768-vagrant-installation-for-ubuntu-20
Add vagrant machine for Ubuntu 20.04
2020-05-07 20:46:44 +02:00
Simon Will
daf45a2993 Integrate Ubuntu 20 instructions into documentation 2020-05-07 00:36:13 +02:00
Simon Will
d351b10fde Document minimum phpunit version 2020-05-06 23:47:16 +02:00
Simon Will
0b21050904 Install phpunit 8 on Ubuntu 18 with composer 2020-05-06 23:46:53 +02:00
Sarah Hoffmann
644a7f524c Merge pull request #1784 from krahulreddy/patch-1
Removed redundant question
2020-05-06 21:37:29 +02:00
K Rahul Reddy
53949ace36 Removed redundant question 2020-05-06 21:26:32 +05:30
Simon Will
14dba39157 Use assertEqualsWithDelta for float comparisons
PHPUnit 7.3 introduced the functions assertEqualsWithDelta for comparing
floats with a delta. The old four-argument version of assertEquals is
deprecated in PHPUnit 8 and removed in PHPUnit 9.

This commit means that the tests will fail with PHPUnit < 7.3 because
assertEqualsWithDelta is not defined there.
2020-05-05 23:43:09 +02:00
Simon Will
43fd2a7423 Declare return type of testcase setUp method
PHPUnit 7 changed the signature of the TestCase methods to include the
return type.
2020-05-05 23:40:18 +02:00
Simon Will
4b0ac5356e Add vagrant machine for Ubuntu 20.04
The instructions in
[`VAGRANT.md`](42c80893cb/VAGRANT.md)
still work as before. The names of the Vagrant machines are updated so
that Ubuntu 18.04 (previously called `ubuntu`) is now called `ubuntu18`
and Ubuntu 20.04 is now called `ubuntu20`.

The version changes from Ubuntu 18.04 to Ubuntu 20.04 are:

  - Python: 3.6 to 3.8
  - Postgres: 10 to 12
  - PHP: 7.2 to 7.4

In the `apt-get`, I changed `--force` to `--allow-downgrades --allow-remove-essential --allow-change-held-packages`, because the former is deprecated. Cf. the [manpage of apt-get](http://manpages.ubuntu.com/manpages/focal/man8/apt-get.8.html)

The php module `codesniffer` was previously installed via Composer, but it is available via the Ubuntu repository, so I installed it via `apt-get` now.
2020-05-05 23:10:35 +02:00
Sarah Hoffmann
c2f0d8e5ba docs: add link to new status page 2020-05-04 21:11:57 +02:00
marc tobias
0fb93b1e8a documenation for /status endpoint 2020-05-04 17:06:06 +02:00
Sarah Hoffmann
f94828c3f4 properly escape class parameter
The class parameter was used as is, allowing for potential
SQL injection via the API.

Thanks to @bladeswords for finding this.
2020-05-02 21:54:14 +02:00
Sarah Hoffmann
0e1e7c7df2 Merge pull request #1770 from lonvia/eyusupov-separate-compilation
Separate compilation
2020-04-26 21:48:43 +02:00
Sarah Hoffmann
06110ba358 Merge pull request #1769 from lonvia/display-name-order
Ensure that result object name is always first in display_name
2020-04-26 16:18:56 +02:00
Sarah Hoffmann
bae69f0102 cmake: reintroduce check script 2020-04-26 16:17:43 +02:00
Sarah Hoffmann
77e7f4696b fix docs typos 2020-04-26 15:00:28 +02:00
Sarah Hoffmann
47fb2c9126 cmake: restructure splitting between modules
Make a clear distinction between parts used for the importer
and parts used for the API.
2020-04-26 14:17:21 +02:00
Sarah Hoffmann
2ab9e4acd3 Merge branch 'separate-compilation' of https://github.com/eyusupov/Nominatim into eyusupov-separate-compilation 2020-04-26 10:47:41 +02:00
Sarah Hoffmann
65ee7a8002 Merge pull request #1754 from mtmail/nominatim-db-tests-against-postgres
Nominatim::DB tests against separate postgresql database
2020-04-26 10:20:30 +02:00
marc tobias
a5d0657d9b lonvia PR feedback 2020-04-26 03:33:15 +02:00
Sarah Hoffmann
b8f01f91ca simplify display_name computation 2020-04-26 00:18:29 +02:00
Sarah Hoffmann
6cc6cf950c ensure that result object name is always first in display_name
The display name might be mixed up if the result object has a lower
rank_address than its address members.
2020-04-26 00:14:55 +02:00
Sarah Hoffmann
0b0349f746 Merge pull request #1752 from mtmail/new-oo-shell-class
new PHP Nominatim\Shell class to wrap shell escaping
2020-04-25 16:48:04 +02:00
Sarah Hoffmann
2740974a13 Merge pull request #1758 from krahulreddy/advanced-installations
Advanced installations
2020-04-22 09:59:44 +02:00
Sarah Hoffmann
97a9a262bb Merge pull request #1764 from mtmail/docs-countrycodes-based-on-adminlevel-2
API docs: countrycode assignment happens using admin_level=2 tags
2020-04-22 09:57:59 +02:00
Sarah Hoffmann
207efe700f highway:construction should appear as 'road' in the address list
Fixes #1763.
2020-04-22 09:08:33 +02:00
marc tobias
e33315eaa6 API documentation: clarification countrycode assignment happens using admin_level=2 tags 2020-04-21 17:42:12 +02:00
Sarah Hoffmann
5469d02d03 nominatim.py: fix wrong use of assert
Fixes #1762.
2020-04-19 17:59:49 +02:00
K Rahul Reddy
42c80893cb Fix documentation links (#1760)
Update installation documentation link in VAGRANT.md, update.php
2020-04-19 00:42:24 +02:00
K Rahul Reddy
5c56ea3198 Adjustments made to documentation 2020-04-17 21:53:50 +05:30
K Rahul Reddy
42f86329a9 Added Advanced Installations documentation 2020-04-17 21:53:41 +05:30
K Rahul Reddy
08e273c0c7 Added scripts for multiple country setup and updates 2020-04-17 21:50:59 +05:30
Sarah Hoffmann
5f8f98fa03 Merge pull request #1756 from lonvia/downgrade-waterways
downgrade waterways
2020-04-17 08:46:40 +02:00
Sarah Hoffmann
08c53ae27d downgrade waterways
A lot of streams in OSM are of minor importance, they certainly
should show up lower in the list of results than villages. Those
rivers/streams that are well known have a wikipedia page and get
a higher importance from that.

The disadvantage with downgrading is that the address gets even
more useless but that's something that needs to be solved outside
the rank search.
2020-04-14 17:14:20 +02:00
Sarah Hoffmann
f4f369895c Merge pull request #1753 from mtmail/fix-travis-ci-badge
fix Travis-CI badge in README output
2020-04-13 18:36:02 +02:00
marc tobias
38c21de0ee Nominatim::DB tests against separate postgresql database 2020-04-13 18:01:37 +02:00
marc tobias
43cf36e0c7 fix Travis-CI badge in README output 2020-04-13 17:55:17 +02:00
Sarah Hoffmann
9a9ff95989 fix logging of lookup calls
Log start was called but the actual writing was missing.
2020-04-13 11:55:24 +02:00
marc tobias
fc40939775 new PHP Nominatim\Shell class to wrap shell escaping 2020-04-12 03:50:40 +02:00
Sarah Hoffmann
553d8a828c Merge pull request #1751 from lonvia/respect-admin-hierarchy
Address ranks must not invert admin_level hierarchy
2020-04-11 23:41:19 +02:00
Sarah Hoffmann
80f7392fb1 address ranks must not invert admin_level hierarchy
When inheriting an address rank from a linked place we
must be careful not to destroy the hierarchy established
through boundary admin_level. Therefore, before assigning
an address rank from a linked place, find the next higher
boundary in the admin_level hierarchy, look up its address
rank and then only use the address rank from the linked
place if it is higher.
2020-04-11 20:56:30 +02:00
Sarah Hoffmann
61535c9972 Merge branch 'update-jquery-leaflet' of https://github.com/mtmail/Nominatim into mtmail-update-jquery-leaflet 2020-04-11 20:53:14 +02:00
Sarah Hoffmann
b443c92a7a Merge pull request #1749 from lonvia/ranking-during-updates
Reset search and address ranks on update
2020-04-11 20:52:05 +02:00
marc tobias
22da6c541d website dependencies: jQuery v2.1 => 3.5, leaflet 1.3 => 1.6 2020-04-11 18:18:57 +02:00
Sarah Hoffmann
cd96354bc7 reset address and search ranks on update
With ranks being dynamically changed through linking of places,
it is important to reset the ranks on update, so that changes
of the rank due to changes in linking are correctly taken into
account.
2020-04-11 09:20:13 +02:00
Sarah Hoffmann
c6d859a08a factor out computation of address and search rank 2020-04-10 23:18:31 +02:00
Sarah Hoffmann
ef47515420 make admin levels 3 and 7 distinct ones in addresses
There really is no need to conflate these two levels as they
are in use in various countries.

Also adds province as a distinct place.

Fixes #1736.
2020-04-10 22:58:11 +02:00
Sarah Hoffmann
a471a3d1b0 Merge pull request #1745 from lonvia/shuffle-sql-functions
Some more SQL function reorganisation
2020-04-10 17:23:09 +02:00
Sarah Hoffmann
79a68fc2db Merge branch 'deletable-and-polygons-as-json' of https://github.com/mtmail/Nominatim into mtmail-deletable-and-polygons-as-json 2020-04-10 17:20:51 +02:00
marc tobias
93ddd46231 Add JSON output for /deletable.php and /polygons.php 2020-04-10 15:34:56 +02:00
Sarah Hoffmann
f5f0c197be move ranks-related functions in separate sql file
Also adds a common function for computing the update radius
around place nodes.
2020-04-10 11:34:14 +02:00
Sarah Hoffmann
4a30ec28b9 move helper functions from placex_triggers into utils
Also adds documentation for these functions.
2020-04-10 11:05:11 +02:00
Sarah Hoffmann
37ef9bb3d3 Merge pull request #1742 from mtmail/travis-ci-add-os
Travis-Ci configuration: remove -sudo-, add -os-
2020-04-10 08:46:50 +02:00
marc tobias
3e1d4a87fa travis-ci configuration: remove -sudo-, add -os- 2020-04-10 01:20:51 +02:00
Sarah Hoffmann
320d46cc96 Merge pull request #1741 from filimongeorge/patch-1
Updated Import and Update .md file
2020-04-09 23:00:34 +02:00
Sarah Hoffmann
a06ceeef4c Merge pull request #1740 from mtmail/setupclass-index-outputfile-not-used
SetupClass.php: remove unused variable
2020-04-09 22:57:51 +02:00
Sarah Hoffmann
def573d7b4 Merge pull request #1739 from lonvia/remove-self-from-geojson
Further tweaks to geocodejson output
2020-04-09 22:51:20 +02:00
filimongeorge
7f7d29fdd1 Updated Import and Update .md file 2020-04-09 20:51:38 +03:00
marc tobias
c611d49941 SetupClass.php: remove unused variable 2020-04-08 14:16:06 +02:00
Sarah Hoffmann
11cd648699 remove name from geocodejson when not set 2020-04-08 11:19:43 +02:00
Sarah Hoffmann
98be5bf637 adapt tests to geocodejson format adaptions 2020-04-08 11:19:43 +02:00
Sarah Hoffmann
29df9771bb further tweaks to geocodejson address output
Removes the place itself from the address details and use
the lowest ranking element in the rank range for the output.
2020-04-08 11:11:33 +02:00
Sarah Hoffmann
e68c1132da ignore isaddress in details output when it is not present 2020-04-08 10:28:28 +02:00
Sarah Hoffmann
1047b1c191 Merge pull request #1737 from mtmail/expose-isaddress-in-details-json
details JSON: also print isaddress addressline field
2020-04-07 20:49:31 +02:00
marc tobias
9431e80eb4 details JSON: also print isaddress addressline field 2020-04-07 14:50:41 +02:00
Sarah Hoffmann
178501de61 Merge pull request #1734 from krahulreddy/fixed-parselatlon
Added whitespace support for parseLatLon
2020-04-05 23:25:50 +02:00
Sarah Hoffmann
81c7f618fb avoid deletes on search_name in reverse-only mode 2020-04-04 18:26:27 +02:00
Rahul
eb2d816f2a Added test cases for whitespaces in LatLon 2020-04-04 00:53:40 +05:30
Rahul
244cb0e98c Added whitespace characters support in LatLon parsing 2020-04-04 00:53:40 +05:30
Sarah Hoffmann
300ac4b77b fix phpcs issues 2020-04-03 20:08:08 +02:00
Sarah Hoffmann
0d189ac5df Merge pull request #1733 from krahulreddy/whitespaces-considered-as-single-space
Support whitespace characters(x09-x0d) as single space
2020-04-03 18:01:47 +02:00
Sarah Hoffmann
fed2c307a7 Merge pull request #1732 from lonvia/improve-geocodejson-output
Improve geocodejson output
2020-04-02 21:21:04 +02:00
K Rahul Reddy
7aa2df5389 Support whitespace characters(x09-x0d) as single space 2020-04-02 05:04:40 +05:30
Sarah Hoffmann
975ef0b305 re-add district to geocodejson 2020-04-01 21:24:42 +02:00
Sarah Hoffmann
e59146a733 update documentation for geocodejson
Address parts should be usable now.
2020-04-01 11:17:25 +02:00
Sarah Hoffmann
8150c3602b add tests for geocodejson address fields 2020-04-01 11:14:48 +02:00
Sarah Hoffmann
ca8d776724 determine geocodejson address by rank instead of type
Using the address rank to set the address parts catches
a much wider variety of types like 'town' and 'suburb'.
With recent address ranking changes the rank ranges
are relatively reliable.
2020-04-01 11:12:52 +02:00
Sarah Hoffmann
fdc40d5169 factor out geocodejson address generation
Unifies the two implementations currently used for search and address.
2020-04-01 10:27:17 +02:00
Sarah Hoffmann
d0a97056c4 Merge pull request #1731 from lonvia/remove-polygon-from-docs
docs: remove example with polygon parameter
2020-04-01 10:21:45 +02:00
Sarah Hoffmann
e98619f801 docs: remove example with polygon parameter
This parameter was undocumented, long deprecated and is gone now.
2020-03-31 20:10:03 +02:00
Sarah Hoffmann
86eebc4305 fix typo
Fixes #1730
2020-03-31 19:53:55 +02:00
Sarah Hoffmann
4930f776fe fix handling of postcode areas in addresses
The order of preference is now:
1. a post code on the place itself
2. a post code area in the address
3. the computed postcode from the place

Fixes #1723.
2020-03-30 23:27:48 +02:00
Sarah Hoffmann
19948c378a adapt tests to new borough ranking 2020-03-30 23:04:20 +02:00
Sarah Hoffmann
b3215b802d downgrade borough and remove unincorporated area 2020-03-30 18:37:23 +02:00
Sarah Hoffmann
ed16d5b6aa Merge pull request #1729 from lonvia/fix-details-link-for-boundaries
Fix details link for boundaries
2020-03-29 23:12:16 +02:00
marc tobias
7a94872413 remove polygon=1 (polypoints) feature 2020-03-29 21:58:11 +02:00
Sarah Hoffmann
98750922eb also emit place_type in json version of details 2020-03-29 21:06:39 +02:00
Sarah Hoffmann
60c4c9ef2c rather use new place_type in getAddressNames()
If for a boundary the place_type is defined, handle the address
part like a place node. This is the same behaviour as before
when class/type where patched earlier.
2020-03-29 20:49:35 +02:00
Sarah Hoffmann
101f04bbf2 Fix address link for boundaries in details
Removes the special casing for boundaries with a place
type in get_addressdata(). Instead the place type is now
returned as an extra field, so that the caller has to
handle the situation.

This fixes the details link next to the address in the details
view, which previously would go to a place class instead of the
original boundary class.
2020-03-29 17:40:56 +02:00
Sarah Hoffmann
4c593fa859 Merge pull request #1720 from lonvia/better-linking-of-places
Use wikidata tags for improving linking of places with boundaries
2020-03-27 21:12:39 +01:00
Aakankasha Sharma
6603ad4006 Updated TIGER database link in documentation (#1725)
Updated TIGER database link in documentation
2020-03-27 15:50:05 +01:00
Sarah Hoffmann
d56c69dd01 adapt API tests to place linkage changes
The missing district is due to a data error for wikidata tags.
2020-03-25 11:38:31 +01:00
Sarah Hoffmann
e26a300c2f use wikidata tag for linking places
Having the same wikidata is a strong indicator that the same place
is meant. There are some assignment errors where the wikidata does
not link to the object itself but to something that mentions the
place. To reduce errors there, prefer same name.
2020-03-21 22:46:54 +01:00
Sarah Hoffmann
405482ede4 remove linking via admin_centre role
The admin_centre role is for the seat of government which is not
the same as the administrative entity. This is mostly used
correctly these days, so avoid matching by that role.
2020-03-21 21:59:11 +01:00
Sarah Hoffmann
3db2b05069 linking: better name matching for address-less places
Administrative boundaries that do not figure in the address
should still be able to take part in the name matching.
Use the rank_search for comparison in this case.
2020-03-21 21:57:04 +01:00
Sarah Hoffmann
ce5870223a Merge pull request #1706 from mtmail/warn-if-no-tiger-files-found
print warning if no Tiger files found
2020-03-06 22:55:37 +01:00
Sarah Hoffmann
9c1bb87493 Merge pull request #1707 from lonvia/regression-address-in-area
place node address parts must be in lower rank area
2020-03-06 22:55:24 +01:00
Sarah Hoffmann
1f7394dd54 place node address parts must be in lower rank area
This fixes a regression where the area of the lower ranking
area was not computed correctly.

Also excludes postcodes areas now as they have their own
hierarchy.
2020-03-06 21:51:38 +01:00
marc tobias
bb569aa484 print warning if no Tiger files found 2020-03-06 17:52:46 +01:00
Sarah Hoffmann
b0a350db37 Merge pull request #1705 from lonvia/delete-linkee-from-search-name
Remove linkees from search_name
2020-03-04 11:55:05 +01:00
Sarah Hoffmann
78526a33b4 Remove linkees from search_name
Fixes #722
2020-03-04 11:36:39 +01:00
Sarah Hoffmann
ab997b7fb1 Merge pull request #1704 from lonvia/centroid-within-geometry
linked centroids must always be within geometry
2020-03-04 10:18:57 +01:00
Sarah Hoffmann
6d431aebb7 linked centroids must always be within geometry
When using a linked place as centroid for a boundary, check
first that it is really within the area. If it is outside,
just keep the computed centroid because a centroid outside the
area just causes havok.

Fixes #1352.
2020-03-04 09:59:57 +01:00
Sarah Hoffmann
a00ea23847 Merge pull request #1702 from lonvia/rename-derived-place
Make admin boundaries inherit address rank from place nodes
2020-03-04 08:08:39 +01:00
Sarah Hoffmann
53ca751a02 fix style 2020-03-01 22:24:32 +01:00
Sarah Hoffmann
8c444378bc better grouping 2020-02-28 22:10:35 +01:00
Sarah Hoffmann
55fdf0abda output linked place into address details 2020-02-28 22:07:06 +01:00
Sarah Hoffmann
acd8ca2ebd add testing for rank adaption while linking 2020-02-28 15:22:48 +01:00
Sarah Hoffmann
06fdfad89e link against place nodes by place type
If a boundary relation has no label member preferably
link against a place node with the same place type.

Also inherit the rank_address from the place node (only
has an effect when linking via lable member or place type).
2020-02-28 15:22:48 +01:00
Sarah Hoffmann
00ca493f33 move linked place type into linked_place extratags
Using linked_place means that we don't overwrite any
place tags on the boundary. This is important when we
wanto to use the information for linking.
2020-02-28 15:22:48 +01:00
Sarah Hoffmann
b00d16fd7d Merge pull request #1698 from lonvia/cleanup-partition-functions
Cleanup partition functions
2020-02-26 20:21:10 +01:00
Sarah Hoffmann
03c373a4b3 make all query partition functions stable 2020-02-26 11:41:49 +01:00
Sarah Hoffmann
bdaa39573f remove unused nearfeature types
Also move the remaining nearfeaturecentr type close to the
function that is using it.
2020-02-26 11:01:29 +01:00
Sarah Hoffmann
8a4c7f6e2b simplify getNearestParallelRoadFeature function
The function only ever returns one result of which only the
place_id is used. So simplify it to return a single place_id
only (or NULL if none is found).

Also fix typo in function name.
2020-02-26 11:01:04 +01:00
Sarah Hoffmann
84ea0753d8 simplify getNearestRoadFeature function
The function only ever returns one result of which only the
place_id is used. So simplify it to return a single place_id
only (or NULL if none is found). Rename funciton to avoid
conflicts when updating an existing database.
2020-02-26 10:58:55 +01:00
Sarah Hoffmann
c1ef56c870 advise against using Postgresql 12 and Postgis 3
See also #1677
2020-02-25 09:44:32 +01:00
Sarah Hoffmann
0e3252f045 revert using stricter uniqueness constraint on place
Multiple objects with the same (osm_type, osm_id, class) may
exist when we hold back deleting an area because it is so
large.

Fixes #1695.
2020-02-24 22:55:03 +01:00
Sarah Hoffmann
65df218f91 Merge pull request #1693 from lonvia/reorganize-addressline-computation
Reorganize addressline computation
2020-02-24 22:39:51 +01:00
Sarah Hoffmann
5220a92be4 adapt API tests 2020-02-22 16:46:03 +01:00
Sarah Hoffmann
d643ca8dee move address line computation in its own function 2020-02-21 16:38:14 +01:00
Sarah Hoffmann
de45152028 Merge pull request #1692 from mtmail/tests-for-HasSetAny
unit tests for ParameterParser::hasSetAny
2020-02-20 21:16:39 +01:00
marc tobias
7fd9d0eeef unit tests for ParameterParser::hasSetAny 2020-02-19 16:55:17 +01:00
Sarah Hoffmann
d35a0b392e Merge pull request #1691 from lonvia/structured-query-via-cmdline
add structured search to command-line query tool
2020-02-19 11:12:37 +01:00
Sarah Hoffmann
cbddfcde5b add structured search to command-line query tool 2020-02-19 11:04:07 +01:00
Sarah Hoffmann
02ffa752ea Merge pull request #1690 from lonvia/parenting-large-rank-30-areas
improve parenting for large areas with rank 30
2020-02-19 09:20:12 +01:00
Sarah Hoffmann
6189e0c79b improve parenting for large areas with rank 30
Instead of unconditionally parenting them to a street, the
larger areas get a parent area that contains them. To keep
things computationally light-weight, only use the centroid and
bbox to determine if an area is contained.

Requires renaming of parenting functions because renaming
a parameter of the function causes issues when updating the
function (it requires a manual delete, which I'd like to
avoid).
2020-02-19 08:43:53 +01:00
Sarah Hoffmann
6ed6a0d447 Merge pull request #1689 from mtmail/travis-postgres-stopped-working
Travis: documentation suggests we need to add postgresql-client package
2020-02-19 08:00:08 +01:00
marc tobias
484892ae97 Travis: documentation suggests we need to add postgresql-client package pre-startup 2020-02-18 23:45:21 +01:00
Sarah Hoffmann
027d9e938c Merge pull request #1688 from mtmail/snippet-noun-vs-snipped-verb
documentation wording: snipped->snippet
2020-02-18 22:55:20 +01:00
marc tobias
e171f90d81 documentation wording: snipped->snippet 2020-02-18 22:48:27 +01:00
Sarah Hoffmann
92c5d3b720 make sure that linked places are within a boundary
This is a regression from previous code refactoring.

Fixes #1684.
2020-02-18 22:46:32 +01:00
Sarah Hoffmann
2a6e8ad68e add bbox whereclause to make postgis 3.0 happy
Normally ST_Covers() should include a bbox index use,
so adding a bbox where clause is not really necessary.
However, the query planner messes up and uses a parallel
index search with a second index instead of exclusively
running on the geometry index, when the bbox part is
missing.
2020-02-16 14:10:22 +01:00
Sarah Hoffmann
55c8a0ac08 Merge pull request #1678 from lonvia/early-drop
Clean up intermediate tables earlier with --drop
2020-02-13 22:50:41 +01:00
Sarah Hoffmann
6073d948e6 fix duplicate keys in tests
The tests suddenly failed because the unique key constraint
is more strict and does no longer include the type.
2020-02-12 11:29:33 +01:00
Sarah Hoffmann
b9171dd10b clean up intermediate tables earlier with --drop
When --drop is given, we can remove all node geometry information
already after the import with osm2pgsql. Also drop all unnecessary
tables before creating the final indices.
2020-02-12 11:03:20 +01:00
Sarah Hoffmann
97b892fac2 Merge pull request #1675 from lonvia/refresh-connection-while-indexing
Fix a couple of issues with the new Python nominatim script
2020-02-12 08:18:09 +01:00
Sarah Hoffmann
b3fdf19b85 Merge pull request #1674 from mtmail/testdb-how-to-select-tiger-data
document how to extract subset of TIGER data needed for API tests
2020-02-11 22:57:39 +01:00
Sarah Hoffmann
8c89e16ad2 Merge pull request #1673 from mtmail/wikidata-wget-incomplete
wikipedia: wget didnt download, skip index generation
2020-02-11 22:56:45 +01:00
Sarah Hoffmann
960409c701 psycopg 2.6 is now usable on ubuntu 16 2020-02-11 22:49:03 +01:00
Sarah Hoffmann
d1eeaa59a6 nominatim.py: use async in connect() function
The _async parameter name is only supported since psycopg 2.7.
However, async is a keyword in Python >= 3.7, so using this
gives us a syntax error. Working around this by defining the
parameters in a dict and handing that into the connect function.
2020-02-11 22:16:17 +01:00
Sarah Hoffmann
882f496e0a nominatim.py: also catch deadlocks on final wait 2020-02-11 22:16:17 +01:00
Sarah Hoffmann
8b8aa1b4e6 regularly close connection while indexing
Postgres sooner or later runs out of memory when the connection
is used for too long.
2020-02-11 22:16:17 +01:00
marc tobias
932ac23f18 document how to extract subset of TIGER data needed for API tests 2020-02-11 18:50:27 +01:00
marc tobias
6c6560ca53 wikipedia: wget didnt download, skip index generation 2020-02-10 17:20:11 +01:00
Sarah Hoffmann
0698757e6e Merge pull request #1670 from lonvia/permalinks-for-tiger-and-interpolation
Enable Permalinks to dtails for tiger and interpolation
2020-02-09 21:07:19 +01:00
Sarah Hoffmann
3a3f9b3496 fix formatting 2020-02-09 16:57:55 +01:00
Sarah Hoffmann
97d87895bf details: also look for interpolations when way id is given 2020-02-09 16:50:04 +01:00
Sarah Hoffmann
c36fd72f99 use detailsPermaLink function on main website as well 2020-02-09 16:05:22 +01:00
Sarah Hoffmann
57ae3d03a1 return place_id link to details when not an OSM object
Stop-gap solution to find the right object for Tiger and
interpolation objects.
2020-02-09 15:45:38 +01:00
Sarah Hoffmann
3737712044 Merge pull request #1667 from mtmail/setup-delete-invalid-indices
setup: delete invalid indices in create-search-indices step
2020-02-09 14:21:17 +01:00
Sarah Hoffmann
8531339b4e Remove hack that changes the class/type of cities
This interferes badly with the details view.

Fixes #1668.
2020-02-09 12:14:32 +01:00
marc tobias
540b12537a setup: delete invalid indices in create-search-indices step 2020-02-08 15:16:20 +01:00
Sarah Hoffmann
9e2fb45783 Merge pull request #1665 from mtmail/centos7-php7
CentOS7: update from PHP 5.4 to 7.2, add psycopg2
2020-02-07 20:43:49 +01:00
Sarah Hoffmann
e7c128b973 Merge pull request #1666 from mtmail/ubuntu-installs-add-psycopg2
Vagrant Ubuntu: psycopg2 is required
2020-02-07 20:42:38 +01:00
marc tobias
d4a3470c9e Vagrant Ubuntu: psycopg2 is required 2020-02-07 15:26:09 +01:00
marc tobias
4165b8c011 CentOS7: update from PHP 5.4 to 7.2 2020-02-07 15:18:46 +01:00
Sarah Hoffmann
357ba2f64d Merge pull request #1663 from mtmail/vagrant-centos-8
Vagrant setup for CentOS 8
2020-02-06 21:12:28 +01:00
Sarah Hoffmann
3ce8818045 Merge pull request #1664 from mtmail/check-import-finished-for-reverse-only
check_import_finished.php - reverse_only mode has less indices
2020-02-06 21:07:04 +01:00
marc tobias
76082ac7cb check_import_finished.php - reverse_only mode has less indices 2020-02-06 16:48:06 +01:00
marc tobias
4a451671d3 Vagrant setup for CentOS 8 2020-02-06 00:43:30 +01:00
marc tobias
a3728b7188 details html page: no longer use place_id in URLs 2020-02-02 01:16:31 +01:00
Sarah Hoffmann
a8711ab013 fix verboseness of nominiatim script during updates 2020-01-31 18:18:50 +01:00
Sarah Hoffmann
3e4754febd Merge pull request #1648 from lonvia/nominatim-as-python-script
Replace C Nominatim indexer with Python script
2020-01-31 17:53:49 +01:00
Sarah Hoffmann
da1d661fa0 remove libxml dependency for travis as well 2020-01-31 15:15:35 +01:00
Sarah Hoffmann
1801db523b fix typo 2020-01-29 11:50:30 +01:00
Sarah Hoffmann
2979c39628 also adapt indexing command in update script 2020-01-29 11:36:12 +01:00
Sarah Hoffmann
bb9bb40287 update cMake build documentation
Remove the dependency on libxml, no longer needed.
2020-01-24 22:53:26 +01:00
Sarah Hoffmann
8f6fdfeb0b forgot to index last rank 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
b4e6d72fde replace nominatim C program 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
a338ebfce0 fix log levels 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
4144364a15 add time display for nominatim.py 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
11c0dd235b clean up and document script 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
4a9502bf88 fix SQL and some other stuff 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
6c0d6d3178 Revert "switch to threading"
This reverts commit 8b1c2181be5aa5335c68d36a49cab9c4e2cd8bef.
2020-01-24 22:06:30 +01:00
Sarah Hoffmann
0a26ca7104 switch to threading 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
2a15b2522f use generator for thread choice 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
c11d1d78e9 add prepared statement 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
7e51aa4cef simple implementation 2020-01-24 22:06:30 +01:00
Sarah Hoffmann
9abb96fa6b Merge pull request #1647 from lonvia/split-out-linking
Split up placex update trigger code
2020-01-24 21:55:19 +01:00
Sarah Hoffmann
879aafc916 fix indent 2020-01-24 21:16:26 +01:00
Sarah Hoffmann
5ec25122f6 rename functions where return parameter changed
Postgresql cannot cleanly reimport these functions when
upgrading, so simply rename to avoid errors.
2020-01-23 22:28:43 +01:00
Sarah Hoffmann
9371b1aeb9 forgot new trigger sql 2020-01-23 22:28:43 +01:00
Sarah Hoffmann
6f6d116451 adapt index for changes name lookup 2020-01-23 22:28:43 +01:00
Sarah Hoffmann
3ff6eccfd7 move trigger creation later in setup 2020-01-23 22:28:43 +01:00
Sarah Hoffmann
5d1fa597ea clean up get_word_id function
Replaced by addr_ids_from_name() which also normalises the
string.
2020-01-23 22:28:43 +01:00
Sarah Hoffmann
3b6c2c9155 getNearestNamed*Feature functions better return values 2020-01-23 22:28:43 +01:00
Sarah Hoffmann
f863040b38 factor out parent search from addr:street/addr:place 2020-01-23 22:28:43 +01:00
Sarah Hoffmann
1033f8bce7 factor out searching for parent road for pois 2020-01-23 22:28:43 +01:00
Sarah Hoffmann
cf4dbbd681 remove unused function 2020-01-23 22:28:43 +01:00
Sarah Hoffmann
6dccc693d0 factor out computation of default names
Also moves the computation down the line so that we never
have to do it twice.
2020-01-23 22:28:43 +01:00
Sarah Hoffmann
c3dc66ce9c factor out place linking sql 2020-01-23 22:28:43 +01:00
Sarah Hoffmann
4856f56d61 adapt test to change in hamlet classification 2020-01-23 22:26:47 +01:00
Sarah Hoffmann
2edc15dfb8 doc: clarify the influence of autovacuum on memory 2020-01-22 12:02:38 +01:00
Sarah Hoffmann
69e31baf68 docs: add a note that the faltnode file is needed for updates
Fixes #1644.
2020-01-22 11:44:05 +01:00
Sarah Hoffmann
586ff0c364 Merge pull request #1638 from mtmail/check-for-invalid-indices
check_import_finished: check for invalid indices
2020-01-14 21:34:35 +01:00
Sarah Hoffmann
f3ba358d50 hint that invalid indices must be manually deleted 2020-01-14 21:33:09 +01:00
Sarah Hoffmann
54bf4c3339 Merge pull request #1637 from mtmail/fix-warnings-in-verbose-warm
warm.php verbose mode was printing errors
2020-01-14 21:30:46 +01:00
Sarah Hoffmann
acda4344de Merge pull request #1636 from lonvia/update-introduction
Update help text on website
2020-01-14 21:29:25 +01:00
marc tobias
6b0afd5d9b check_import_finished: check for invalid indices 2020-01-14 19:36:40 +01:00
marc tobias
850910ed9e warm.php verbose mode was printing errors 2020-01-14 18:24:49 +01:00
Sarah Hoffmann
4ffa11a26c update report a problem page 2020-01-13 22:42:49 +01:00
Sarah Hoffmann
f5e60f8c40 update help links
All links should go to nominatim.org now. Also add links
to Help OSM and the Github repository.
2020-01-13 22:20:53 +01:00
Sarah Hoffmann
ddaf1b79d4 remove special handling of rail
Skip railway=rail in the style, so that installations can remove
it if they wish.
2020-01-08 23:53:23 +01:00
Sarah Hoffmann
d732dc3bb2 update place address levels
Adds province and allotments and downgrades hamlet.
2020-01-08 23:53:03 +01:00
Sarah Hoffmann
f0af5c5643 Merge pull request #1628 from lonvia/split-sql-functions
Split monstrous functions.sql
2020-01-08 21:36:40 +01:00
Sarah Hoffmann
7a194789bc remove remaining sql functions into function/ directory 2020-01-08 11:45:51 +01:00
Sarah Hoffmann
827d7a9a62 move postcode table triggers to own file 2020-01-08 11:22:23 +01:00
Sarah Hoffmann
dae2761137 move placex triggers into own file 2020-01-08 11:18:42 +01:00
Sarah Hoffmann
4304c1a7bb move place triggers into own file 2020-01-07 23:55:38 +01:00
Sarah Hoffmann
c537ea18a4 move functions for interpolation table in own file 2020-01-05 16:36:46 +01:00
Sarah Hoffmann
28fa7be75a move functions for address lookup into own file 2020-01-05 16:16:21 +01:00
Sarah Hoffmann
f1a5862f3d move creation function for aux data into own file
This function is currently unused, so don't even load it.
2020-01-05 16:04:04 +01:00
Sarah Hoffmann
4088e4e371 move importance/wikipedia functions into separate file 2020-01-05 15:55:39 +01:00
Sarah Hoffmann
0ef6425847 move SQL functions for normalisation in separate file 2020-01-05 15:38:20 +01:00
Sarah Hoffmann
7489deb1b7 Merge pull request #1627 from lonvia/cleanup-setup-scripts
Minor cleanup of setup scripts
2020-01-05 14:17:39 +01:00
Sarah Hoffmann
2059e18e8b setup: factor out parameter replacement in SQL scripts
Put all into a single function and use for all SQL
templates.
2020-01-04 23:48:49 +01:00
Sarah Hoffmann
d11ee4c6d9 fix osm link in polygon error view 2020-01-04 21:51:14 +01:00
Sarah Hoffmann
c74cbde329 Merge pull request #1626 from lonvia/move-tests-to-osm2pgsql
Move tests to osm2pgsql
2020-01-04 20:08:35 +01:00
Sarah Hoffmann
20d541af06 remove osm2pgsql tag tests
These tests are now part of the osm2pgsql test suite.
2020-01-04 16:23:29 +01:00
Sarah Hoffmann
2c163b3959 update osm2pgsql (gazetteer output tests) 2020-01-04 16:20:52 +01:00
Sarah Hoffmann
c6a7ef5574 Merge pull request #1624 from lonvia/add-extratags-style
Add new extratags style
2020-01-03 10:06:47 +01:00
Sarah Hoffmann
7005c6af12 add new extratags style
This is the same as the full style but also adds all unused tags
except for a couple of internal tags to the extratags column.
2020-01-02 14:24:51 +01:00
Sarah Hoffmann
256986f01f Merge pull request #1622 from mtmail/clang-faq-entry
Documentation: add FAQ entry about clang not found
2020-01-02 14:14:00 +01:00
Sarah Hoffmann
33d322df9d update osm2pgsql (exclude country and postcode from address tags) 2019-12-28 22:36:02 +01:00
marc tobias
631013be02 Documentation: add FAQ entry about clang not found 2019-12-28 21:21:39 +01:00
Sarah Hoffmann
89a990e000 Merge pull request #1612 from mtmail/cleanup-wiki-scripts
refactor import_wiki* script for readability
2019-12-23 22:05:39 +01:00
Sarah Hoffmann
ccddd9d1de Merge pull request #1619 from mtmail/document-check-import-script
Use check_import_finished in test suite, document
2019-12-23 22:04:25 +01:00
marc tobias
9587fc9909 refactor import_wiki* script for readability 2019-12-23 21:38:33 +01:00
marc tobias
22b7aed769 Use check_import_finished in test suite, document 2019-12-23 21:25:06 +01:00
marc tobias
7db0da40ad new script utils/check_import_finished.php 2019-12-23 15:13:18 +01:00
Sarah Hoffmann
2be70b2c36 Merge pull request #1596 from mtmail/remove-obsolete-wikidata-scripts
remove old wikidata script. See data-sources/wikipedia-wikidata/ for new process
2019-12-18 21:55:17 +01:00
Sarah Hoffmann
0e03668cf2 Merge pull request #1601 from mtmail/spheric-distance-on-details-page
details page: differentiate between spheric distance and distance in meters
2019-12-18 21:54:06 +01:00
Sarah Hoffmann
28b89daa22 remove duplicate addr:country from style 2019-12-17 20:48:27 +01:00
Sarah Hoffmann
2bfa2f4292 Merge pull request #1609 from txtor/master
typo - fixes openstreetmap#1606
2019-12-17 20:46:20 +01:00
Francesc Hervada-Sala
3b22b9911b typo - fixes openstreetmap#1606 2019-12-17 17:24:29 +01:00
Sarah Hoffmann
bc68ff1e43 update osm2pgsql (deletion and address updates) 2019-12-16 21:27:41 +01:00
Sarah Hoffmann
f59af7483b Merge pull request #1604 from mtmail/wiki-create-pagelinkcount-earlier
wikipedia: create all language pagelinkcount tables before querying them
2019-12-14 21:43:16 +01:00
marc tobias
394f85a96b wikipedia: create all language pagelinkcount tables before querying them 2019-12-14 20:36:54 +01:00
marc tobias
626e3238f2 details page: differentiate between spheric distance and distance in meters 2019-12-11 00:49:32 +01:00
marc tobias
2051a84a09 remove old wikidata script. See data-sources/wikipedia-wikidata/ for new process 2019-12-03 19:27:32 +01:00
Sarah Hoffmann
f8bd4f5133 add test for finding housenumber 0 2019-12-01 20:36:59 +01:00
Sarah Hoffmann
a4e514033d Merge branch 'housenumber-zero' of https://github.com/mtmail/Nominatim into mtmail-housenumber-zero 2019-12-01 20:17:27 +01:00
Sarah Hoffmann
95f20ed7ab Merge pull request #1585 from mmd-osm/patch-1
Mention FAQ / troubleshooting page in README
2019-12-01 20:13:35 +01:00
Sarah Hoffmann
bfe92ea191 bdd tests: enforce use of full import style 2019-12-01 16:25:39 +01:00
Sarah Hoffmann
d3bacf475a Revert "update osm2pgsql (reduce memory usage)"
This reverts commit 3474464894.
2019-12-01 16:25:02 +01:00
Sarah Hoffmann
3474464894 update osm2pgsql (reduce memory usage) 2019-11-29 19:31:48 +01:00
mmd
5b25bff2d8 Mention FAQ / troubleshooting page in README 2019-11-26 22:33:00 +01:00
Sarah Hoffmann
c36896c524 Merge pull request #1578 from lonvia/docs-installation-support
Update installation documentation wrt memory usage
2019-11-26 22:21:45 +01:00
Sarah Hoffmann
d9fe25ac2e Merge pull request #1581 from mtmail/wrap-leaflet-map
Allow leaflet map to wrap-around, still longitude should be -180..+180
2019-11-26 21:54:26 +01:00
Sarah Hoffmann
0bd006eef8 fix typo 2019-11-26 21:52:37 +01:00
Sarah Hoffmann
081d1f9779 Merge pull request #1582 from mtmail/documentation-direct-link-osm2pgsql
add wiki link to osm2psql README
2019-11-26 21:51:30 +01:00
Sarah Hoffmann
546c975e28 Merge pull request #1583 from mtmail/documentation-pear-db-no-longer-prerequisite
PHP PEAR::DB is not longer a prerequisite
2019-11-26 21:51:01 +01:00
marc tobias
05fb037edb PHP PEAR::DB is not longer a prerequisite 2019-11-25 19:11:08 +01:00
marc tobias
5cdc196df1 add wiki link to osm2psql README 2019-11-25 19:08:34 +01:00
marc tobias
0896c07972 Allow map to wrap-around, still longitude should be -180..+180 2019-11-25 18:56:46 +01:00
Sarah Hoffmann
be9f54d0a9 set default osm2pgsql to 0 when using flatnode file 2019-11-24 14:36:36 +01:00
Sarah Hoffmann
88fab44006 update minimum required RAM to 64GB
Also adds more background explanation on time and RAM use,
as well as a hint that github issues are not good for
installation support.
2019-11-24 10:31:34 +01:00
Sarah Hoffmann
000fe3ddff remove reference to nominatim.xml OpenSearch description
The file was meant for the openstreetmap instance only.

Fixes #1554.
2019-11-20 11:53:04 +01:00
Sarah Hoffmann
f180f99a95 Merge pull request #1570 from lonvia/wikipedia-importance-updates
Wikipedia importance updates
2019-11-20 11:25:23 +01:00
Sarah Hoffmann
8d9aa9bf33 formatting: avoid long lines 2019-11-20 10:57:36 +01:00
Sarah Hoffmann
9fed91a47f adapt tests for new wikipedia tables 2019-11-20 09:57:40 +01:00
Sarah Hoffmann
12f830fbbb fix loading of data without wikipedia files
Also removes unused place_boundingbox table.
2019-11-17 23:58:43 +01:00
Sarah Hoffmann
6d764a05b0 prefer English wikipedia for wikidata matches 2019-11-17 19:53:42 +01:00
Sarah Hoffmann
cd3ddec746 Switch to sql.gz format for wikipedia data
The dump import is no longer needed.
2019-11-17 10:09:31 +01:00
Sarah Hoffmann
e4555a208d fix wikidata import
The loop was not skipping empty results of get_wikipedia_match().
2019-11-17 10:06:34 +01:00
Sarah Hoffmann
d53af96aa4 update documentation for new wikipedia data 2019-11-16 16:52:23 +01:00
Sarah Hoffmann
d1a9dc0f24 use wikidata links for importance as well 2019-11-16 16:52:23 +01:00
Sarah Hoffmann
a1bcb28cea also update wikipedia from linked places 2019-11-16 16:52:23 +01:00
Sarah Hoffmann
3fbba8b9db add command for recomputing importance 2019-11-16 16:52:23 +01:00
Sarah Hoffmann
5fb850982a move importance computation into its own function 2019-11-16 16:52:23 +01:00
Sarah Hoffmann
6f2e767c77 Merge pull request #1557 from mtmail/document-boundingbox
documentation: add note what bounding box can be used for
2019-11-13 10:30:22 +01:00
Sarah Hoffmann
1ead5b0f3f Merge pull request #1563 from mtmail/remove-pear-db-faq-entries
documentation: remove FAQ entries related to PEAR::DB
2019-11-13 10:29:52 +01:00
Sarah Hoffmann
eb6681d486 Merge pull request #1564 from mtmail/typo-sql-in-postcodes-md
typo in SQL in Postcodes.md
2019-11-13 10:28:57 +01:00
marc tobias
7503987630 typo in SQL in Postcodes.md 2019-11-11 23:34:44 +01:00
marc tobias
1d337e8a76 documentation: remove FAQ entries related to PEAR::DB, we no longer use that 2019-11-11 16:54:12 +01:00
Sarah Hoffmann
f42e40712e Merge pull request #1555 from mtmail/setup-escape-shell-args
setup: escape arguments when executing shell commands (psql, createdb)
2019-11-06 22:47:00 +01:00
Sarah Hoffmann
b5fb8608ba some reformatting of documentation changes and typo fixes
The newest mkdocs is more demanding when it comes to code block
formatting.
2019-11-06 22:34:43 +01:00
marc tobias
dea1d67d03 documentation: new page explaining calculated postcodes 2019-11-06 22:15:44 +01:00
Sarah Hoffmann
9e6fc8f073 Merge pull request #1548 from mtmail/centos7-postgresql-11
Update CentOS7 instruction to postgresql 11 (default was 9.2)
2019-11-06 21:52:45 +01:00
marc tobias
db6da75683 documentation: add note what bounding box can be used for 2019-11-06 17:33:20 +01:00
marc tobias
9cfd891fb9 setup: escape arguments when executing shell commands (psql, createdb) 2019-11-05 23:50:46 +01:00
marc tobias
f985680d2c Update CentOS instruction to postgresql 11 (default was 9.2) 2019-11-02 14:17:53 +01:00
Sarah Hoffmann
8ae317e002 Merge pull request #1545 from mtmail/details-index-html-page
HTML page with search form when /details.php called without params
2019-10-31 18:30:35 +01:00
marc tobias
36e99c43ce HTML page with search form when /details.php called without params 2019-10-31 17:13:43 +01:00
marc tobias
eeb26aaa6f Addresses with housenumber 0 are found 2019-10-31 13:52:10 +01:00
Sarah Hoffmann
260dbe302a Merge pull request #1542 from mtmail/tiger-import-more-verbose
Tiger data import: display which path is searched for files
2019-10-31 10:24:02 +01:00
marc tobias
c297584726 Tiger data import: display which path is searched for files 2019-10-30 15:50:29 +01:00
Sarah Hoffmann
5930383404 Merge branch 'fix-getcol-zero' of https://github.com/eyusupov/Nominatim into eyusupov-fix-getcol-zero 2019-10-28 23:07:28 +01:00
Sarah Hoffmann
7395eb9791 Merge pull request #1532 from eyusupov/use-extradir
Use ExtraDataPath for country grid
2019-10-28 23:01:42 +01:00
Sarah Hoffmann
e0de838b13 adapt tests to short_name demotion 2019-10-28 22:53:41 +01:00
Sarah Hoffmann
5292239714 demote short_name in the list of names to display
The usage of short_name in OSM has changed with time. It is no
longer suitable as a display name.

Fixes #783.
2019-10-28 22:12:21 +01:00
Eldar Yusupov
b54fca4f7e Handle zero in getCol() 2019-10-26 20:19:36 +03:00
Sarah Hoffmann
26f47d2eb7 switch to pygments for mkdocs hilighting 2019-10-25 23:57:23 +02:00
Eldar Yusupov
72c0898409 Add optional compilation of osm2pgsl 2019-10-25 23:01:19 +03:00
Eldar Yusupov
92d1f5122b Separate Nominatim build 2019-10-25 23:01:19 +03:00
Eldar Yusupov
314de3c3c0 Add options to compile only PG module/frontend
It makes it easier to install Nominatim server and PostgresSQL on
separate servers or use separate docker images for them.
2019-10-25 23:01:19 +03:00
Eldar Yusupov
544db43026 Separate more dependencies 2019-10-25 23:01:19 +03:00
Eldar Yusupov
4aca3700b2 Move nominatim lib deps to its dir 2019-10-25 23:01:19 +03:00
Eldar Yusupov
af833ff042 Include CheckSymbolExists CMake module 2019-10-25 23:01:19 +03:00
Eldar Yusupov
96c1a0a101 Use ExtraDataPath for country grid 2019-10-23 06:19:04 +03:00
Ganesh Krishnan
043f9d8298 allow nginx to serve files without php extensions
The apache config allows api calls without extension for eg /search?q=query string.
This does not work on nginx and we need to enable this via this patch
2018-08-29 12:59:29 -04:00
3605 changed files with 29560 additions and 25161 deletions

4
.github/ISSUE_TEMPLATE/config.yml vendored Normal file
View File

@@ -0,0 +1,4 @@
contact_links:
- name: Nominatim Discussions
url: https://github.com/osm-search/Nominatim/discussions
about: Ask questions, get support, share ideas and discuss with community members.

View File

@@ -0,0 +1,22 @@
---
name: Feature request
about: Suggest an idea for this project
title: ''
labels: ''
assignees: ''
---
<!-- Before opening a new feature request, please search through the open issue to check that your request hasn't been reported already. -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
**Describe the solution you'd like**
<!-- A clear and concise description of what you want to happen. -->
**Describe alternatives you've considered**
<!-- A clear and concise description of any alternative solutions or features you've considered. -->
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

View File

@@ -0,0 +1,37 @@
---
name: Report issues with search results
about: You have searched something with Nominatim and did not get the expected result.
title: ''
labels: ''
assignees: ''
---
## What did you search for?
<!-- Please try to provide a link to your search. You can go to https://nominatim.openstreetmap.org and repeat your search there. If you originally found the issue somewhere else, please tell us what software/website you were using. -->
## What result did you get?
## What result did you expect?
**Is the result in the right place and just named wrongly?**
<!-- Please tell us the display name you expected. -->
**Is the result missing completely?**
<!-- Make sure that the data you are looking for is in OpenStreetMap. Provide a link to the OpenStreetMap object or if you cannot get it, a link to the map on https://openstreetmap.org where you expect the result to be.
To get the link to the OSM object, you can try the following:
* Go to [https://openstreetmap.org](https://openstreetmap.org).
* Move to the area of the map where you expect the result and then zoom in as much as possible.
* Click on the question mark on the right side of the map. You get a question cursor. Use it to click on the map where your object is located.
* Find the object of interest in the list that appears on the left side.
* Click on the object and report back the URL that the browser shows.
-->
## Further details
<!-- Anything else we should know about the search. Particularities with addresses in the area etc. -->

View File

@@ -0,0 +1,36 @@
---
name: Report problems with the software
about: You have your own installation of Nominatim and found a bug.
title: ''
labels: ''
assignees: ''
---
<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first. -->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
**To Reproduce**
<!-- Please describe what you did to get to the issue. -->
**Software Environment (please complete the following information):**
- Nominatim version:
- Postgresql version:
- Postgis version:
- OS:
**Hardware Configuration (please complete the following information):**
- RAM:
- number of CPUs:
- type and size of disks:
- bare metal/AWS/other cloud service:
**Postgresql Configuration:**
<!-- List any configuration items you changed in your postgresql configuration. -->
**Additional context**
<!-- Add any other context about the problem here. -->

View File

@@ -0,0 +1,29 @@
name: 'Build Nominatim'
runs:
using: "composite"
steps:
- name: Install prerequisites
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
shell: bash
- name: Download dependencies
run: |
if [ ! -f country_grid.sql.gz ]; then
wget --no-verbose https://www.nominatim.org/data/country_grid.sql.gz
fi
cp country_grid.sql.gz Nominatim/data/country_osm_grid.sql.gz
shell: bash
- name: Configure
run: mkdir build && cd build && cmake ../Nominatim
shell: bash
- name: Build
run: |
make -j2 all
sudo make install
shell: bash
working-directory: build

View File

@@ -0,0 +1,47 @@
name: 'Setup Postgresql and Postgis'
inputs:
postgresql-version:
description: 'Version of PostgreSQL to install'
required: true
postgis-version:
description: 'Version of Postgis to install'
required: true
runs:
using: "composite"
steps:
- name: Remove existing PostgreSQL
run: |
sudo apt-get purge -yq postgresql*
sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
sudo apt-get update -qq
shell: bash
- name: Install PostgreSQL
run: |
sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER} postgresql-server-dev-${PGVER}
shell: bash
env:
PGVER: ${{ inputs.postgresql-version }}
POSTGISVER: ${{ inputs.postgis-version }}
- name: Adapt postgresql configuration
run: |
echo 'fsync = off' | sudo tee /etc/postgresql/${PGVER}/main/conf.d/local.conf
echo 'synchronous_commit = off' | sudo tee -a /etc/postgresql/${PGVER}/main/conf.d/local.conf
echo 'full_page_writes = off' | sudo tee -a /etc/postgresql/${PGVER}/main/conf.d/local.conf
echo 'shared_buffers = 1GB' | sudo tee -a /etc/postgresql/${PGVER}/main/conf.d/local.conf
echo 'port = 5432' | sudo tee -a /etc/postgresql/${PGVER}/main/conf.d/local.conf
shell: bash
env:
PGVER: ${{ inputs.postgresql-version }}
- name: Setup database
run: |
sudo systemctl restart postgresql
sudo -u postgres createuser -S www-data
sudo -u postgres createuser -s runner
shell: bash

171
.github/workflows/ci-tests.yml vendored Normal file
View File

@@ -0,0 +1,171 @@
name: CI Tests
on: [ push, pull_request ]
jobs:
tests:
runs-on: ubuntu-20.04
strategy:
matrix:
postgresql: [9.5, 13]
include:
- postgresql: 9.5
postgis: 2.5
- postgresql: 13
postgis: 3
steps:
- uses: actions/checkout@v2
with:
submodules: true
path: Nominatim
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: '7.4'
tools: phpunit, phpcs
- name: Get Date
id: get-date
run: |
echo "::set-output name=date::$(/bin/date -u "+%Y%W")"
shell: bash
- uses: actions/cache@v2
with:
path: |
country_grid.sql.gz
key: nominatim-country-data-${{ steps.get-date.outputs.date }}
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: ${{ matrix.postgresql }}
postgis-version: ${{ matrix.postgis }}
- uses: ./Nominatim/.github/actions/build-nominatim
- name: Install test prerequsites
run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
- name: Python linting
run: pylint --extension-pkg-whitelist=osmium nominatim
working-directory: Nominatim
- name: PHP unit tests
run: phpunit ./
working-directory: Nominatim/test/php
- name: Python unit tests
run: py.test-3 test/python
working-directory: Nominatim
- name: BDD tests
run: behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
working-directory: Nominatim/test/bdd
import:
strategy:
matrix:
ubuntu: [18, 20]
include:
- ubuntu: 18
postgresql: 9.5
postgis: 2.5
- ubuntu: 20
postgresql: 13
postgis: 3
runs-on: ubuntu-${{ matrix.ubuntu }}.04
steps:
- uses: actions/checkout@v2
with:
submodules: true
path: Nominatim
- name: Get Date
id: get-date
run: |
echo "::set-output name=date::$(/bin/date -u "+%Y%W")"
shell: bash
- uses: actions/cache@v2
with:
path: |
country_grid.sql.gz
key: nominatim-country-data-${{ steps.get-date.outputs.date }}
- uses: actions/cache@v2
with:
path: |
monaco-latest.osm.pbf
key: nominatim-test-data-${{ steps.get-date.outputs.date }}
- uses: actions/setup-python@v2
with:
python-version: 3.5
if: matrix.ubuntu == 18
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: ${{ matrix.postgresql }}
postgis-version: ${{ matrix.postgis }}
- uses: ./Nominatim/.github/actions/build-nominatim
- name: Install extra dependencies for Ubuntu 18
run: |
sudo apt-get install libicu-dev
pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium
shell: bash
if: matrix.ubuntu == 18
- name: Clean installation
run: rm -rf Nominatim build
shell: bash
- name: Prepare import environment
run: |
if [ ! -f monaco-latest.osm.pbf ]; then
wget --no-verbose https://download.geofabrik.de/europe/monaco-latest.osm.pbf
fi
mkdir data-env
cd data-env
shell: bash
- name: Import
run: nominatim import --osm-file ../monaco-latest.osm.pbf
shell: bash
working-directory: data-env
- name: Import special phrases
run: nominatim special-phrases --import-from-wiki
working-directory: data-env
- name: Check full import
run: nominatim admin --check-database
working-directory: data-env
- name: Warm up database
run: nominatim admin --warm
working-directory: data-env
- name: Run update
run: |
nominatim replication --init
nominatim replication --once
working-directory: data-env
- name: Run reverse-only import
run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only --no-updates
working-directory: data-env
env:
NOMINATIM_DATABASE_DSN: pgsql:dbname=reverse
- name: Check reverse import
run: nominatim admin --check-database
working-directory: data-env

1
.gitignore vendored
View File

@@ -9,3 +9,4 @@ data/wiki_specialphrases.sql
data/osmosischange.osc
.vagrant
data/country_osm_grid.sql.gz

12
.pylintrc Normal file
View File

@@ -0,0 +1,12 @@
[MASTER]
extension-pkg-whitelist=osmium
ignored-modules=icu
[MESSAGES CONTROL]
[TYPECHECK]
# closing added here because it sometimes triggers a false positive with
# 'with' statements.
ignored-classes=NominatimArgs,closing

View File

@@ -1,34 +0,0 @@
---
sudo: required
dist: xenial
language: python
python:
- "3.6"
addons:
postgresql: "9.6"
git:
depth: 3
env:
- TEST_SUITE=tests
- TEST_SUITE=monaco
before_install:
- phpenv global 7.1
install:
- vagrant/install-on-travis-ci.sh
before_script:
- psql -U postgres -c "create extension postgis"
script:
- cd $TRAVIS_BUILD_DIR/
- if [[ $TEST_SUITE == "tests" ]]; then phpcs --report-width=120 . ; fi
- cd $TRAVIS_BUILD_DIR/test/php
- if [[ $TEST_SUITE == "tests" ]]; then /usr/bin/phpunit ./ ; fi
- cd $TRAVIS_BUILD_DIR/test/bdd
- # behave --format=progress3 api
- if [[ $TEST_SUITE == "tests" ]]; then behave -DREMOVE_TEMPLATE=1 --format=progress3 db ; fi
- if [[ $TEST_SUITE == "tests" ]]; then behave --format=progress3 osm2pgsql ; fi
- cd $TRAVIS_BUILD_DIR/build
- if [[ $TEST_SUITE == "monaco" ]]; then wget --no-verbose --output-document=../data/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf; fi
- if [[ $TEST_SUITE == "monaco" ]]; then /usr/bin/env php ./utils/setup.php --osm-file ../data/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | grep -v 'ETA (seconds)'; fi
- if [[ $TEST_SUITE == "monaco" ]]; then /usr/bin/env php ./utils/specialphrases.php --wiki-import | psql -d test_api_nominatim >/dev/null; fi
notifications:
email: false

View File

@@ -6,7 +6,7 @@
#
#-----------------------------------------------------------------------------
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
@@ -19,8 +19,8 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 3)
set(NOMINATIM_VERSION_MINOR 4)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION_MINOR 7)
set(NOMINATIM_VERSION_PATCH 2)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
@@ -28,148 +28,249 @@ add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")
#-----------------------------------------------------------------------------
#
# Find external dependencies
#
# Configuration
#-----------------------------------------------------------------------------
set(BUILD_TESTS off CACHE BOOL "Build test suite" FORCE)
set(WITH_LUA off CACHE BOOL "Build with lua support" FORCE)
set(ONLY_DOCS off CACHE BOOL "Build documentation only")
set(BUILD_IMPORTER on CACHE BOOL "Build everything for importing/updating the database")
set(BUILD_API on CACHE BOOL "Build everything for the API server")
set(BUILD_MODULE on CACHE BOOL "Build PostgreSQL module")
set(BUILD_TESTS on CACHE BOOL "Build test suite")
set(BUILD_DOCS on CACHE BOOL "Build documentation")
set(BUILD_MANPAGE on CACHE BOOL "Build Manual Page")
set(BUILD_OSM2PGSQL on CACHE BOOL "Build osm2pgsql (expert only)")
if (NOT ONLY_DOCS)
#-----------------------------------------------------------------------------
# osm2pgsql (imports/updates only)
#-----------------------------------------------------------------------------
if (BUILD_IMPORTER AND BUILD_OSM2PGSQL)
if (NOT EXISTS "${CMAKE_SOURCE_DIR}/osm2pgsql/CMakeLists.txt")
message(FATAL_ERROR "The osm2pgsql directory is empty.\
Did you forget to check out Nominatim recursively?\
\nTry updating submodules with: git submodule update --init")
endif()
set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
set(BUILD_TESTS off)
set(WITH_LUA off CACHE BOOL "")
add_subdirectory(osm2pgsql)
find_package(Threads REQUIRED)
unset(PostgreSQL_TYPE_INCLUDE_DIR CACHE)
set(PostgreSQL_TYPE_INCLUDE_DIR "/usr/include/")
find_package(PostgreSQL REQUIRED)
include_directories(${PostgreSQL_INCLUDE_DIRS})
link_directories(${PostgreSQL_LIBRARY_DIRS})
find_program(PYOSMIUM pyosmium-get-changes)
if (NOT EXISTS "${PYOSMIUM}")
set(PYOSMIUM_PATH "")
message(WARNING "pyosmium-get-changes not found (required for updates)")
else()
set(PYOSMIUM_PATH "${PYOSMIUM}")
message(STATUS "Using pyosmium-get-changes at ${PYOSMIUM_PATH}")
endif()
set(BUILD_TESTS ${BUILD_TESTS_SAVED})
endif()
find_program(PG_CONFIG pg_config)
execute_process(COMMAND ${PG_CONFIG} --pgxs
OUTPUT_VARIABLE PGXS
OUTPUT_STRIP_TRAILING_WHITESPACE)
#-----------------------------------------------------------------------------
# python (imports/updates only)
#-----------------------------------------------------------------------------
if (NOT EXISTS "${PGXS}")
message(FATAL_ERROR "Postgresql server package not found.")
endif()
if (BUILD_IMPORTER)
find_package(PythonInterp 3.5 REQUIRED)
endif()
find_package(ZLIB REQUIRED)
#-----------------------------------------------------------------------------
# PHP
#-----------------------------------------------------------------------------
find_package(BZip2 REQUIRED)
# Setting PHP binary variable as to command line (prevailing) or auto detect
find_package(LibXml2 REQUIRED)
include_directories(${LIBXML2_INCLUDE_DIR})
# Setting PHP binary variable as to command line (prevailing) or auto detect
if (BUILD_API OR BUILD_IMPORTER)
if (NOT PHP_BIN)
find_program (PHP_BIN php)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHP_BIN})
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
else()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
if (NOT PHPCGI_BIN)
find_program (PHPCGI_BIN php-cgi)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHPCGI_BIN})
message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
set (PHPCGI_BIN "")
else()
message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
endif()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
#-----------------------------------------------------------------------------
#
# Setup settings and paths
#
# import scripts and utilities (importer only)
#-----------------------------------------------------------------------------
set(WEBSITESCRIPTS
website/deletable.php
website/details.php
website/hierarchy.php
website/lookup.php
website/polygons.php
website/reverse.php
website/search.php
website/status.php
)
if (BUILD_IMPORTER)
find_file(COUNTRY_GRID_FILE country_osm_grid.sql.gz
PATHS ${PROJECT_SOURCE_DIR}/data
NO_DEFAULT_PATH
DOC "Location of the country grid file."
)
set(CUSTOMSCRIPTS
utils/country_languages.php
utils/importWikipedia.php
utils/export.php
utils/query.php
utils/setup.php
utils/specialphrases.php
utils/update.php
utils/warm.php
)
if (NOT COUNTRY_GRID_FILE)
message(FATAL_ERROR "\nYou need to download the country_osm_grid first:\n"
" wget -O ${PROJECT_SOURCE_DIR}/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz")
endif()
foreach (script_source ${CUSTOMSCRIPTS})
configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl
${PROJECT_BINARY_DIR}/${script_source})
endforeach()
set(CUSTOMSCRIPTS
check_import_finished.php
country_languages.php
export.php
query.php
setup.php
update.php
warm.php
)
foreach (script_source ${WEBSITESCRIPTS})
configure_file(${PROJECT_SOURCE_DIR}/cmake/website.tmpl
${PROJECT_BINARY_DIR}/${script_source})
endforeach()
configure_file(${PROJECT_SOURCE_DIR}/settings/defaults.php
${PROJECT_BINARY_DIR}/settings/settings.php)
set(WEBPATHS css images js)
foreach (wp ${WEBPATHS})
execute_process(
COMMAND ln -sf ${PROJECT_SOURCE_DIR}/website/${wp} ${PROJECT_BINARY_DIR}/website/
)
endforeach()
foreach (script_source ${CUSTOMSCRIPTS})
configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl
${PROJECT_BINARY_DIR}/utils/${script_source})
endforeach()
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl
${PROJECT_BINARY_DIR}/nominatim)
endif()
#-----------------------------------------------------------------------------
#
# Tests
#
#-----------------------------------------------------------------------------
if (NOT ONLY_DOCS)
if (BUILD_TESTS)
include(CTest)
set(TEST_BDD db osm2pgsql api)
foreach (test ${TEST_BDD})
add_test(NAME bdd_${test}
COMMAND lettuce features/${test}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests)
set_tests_properties(bdd_${test}
PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
endforeach()
find_program(PYTHON_BEHAVE behave)
find_program(PYLINT NAMES pylint3 pylint)
find_program(PYTEST NAMES pytest py.test-3 py.test)
find_program(PHPCS phpcs)
find_program(PHPUNIT phpunit)
add_test(NAME php
COMMAND phpunit ./
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests-php)
if (PYTHON_BEHAVE)
message(STATUS "Using Python behave binary ${PYTHON_BEHAVE}")
foreach (test ${TEST_BDD})
add_test(NAME bdd_${test}
COMMAND ${PYTHON_BEHAVE} ${test}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/bdd)
set_tests_properties(bdd_${test}
PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
endforeach()
else()
message(WARNING "behave not found. BDD tests disabled." )
endif()
if (PHPUNIT)
message(STATUS "Using phpunit binary ${PHPUNIT}")
add_test(NAME php
COMMAND ${PHPUNIT} ./
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/php)
else()
message(WARNING "phpunit not found. PHP unit tests disabled." )
endif()
if (PHPCS)
message(STATUS "Using phpcs binary ${PHPCS}")
add_test(NAME phpcs
COMMAND ${PHPCS} --report-width=120 --colors lib website utils
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "phpcs not found. PHP linting tests disabled." )
endif()
if (PYLINT)
message(STATUS "Using pylint binary ${PYLINT}")
add_test(NAME pylint
COMMAND ${PYLINT} nominatim
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pylint not found. Python linting tests disabled.")
endif()
if (PYTEST)
message(STATUS "Using pytest binary ${PYTEST}")
add_test(NAME pytest
COMMAND ${PYTEST} test/python
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pytest not found. Python tests disabled." )
endif()
endif()
#-----------------------------------------------------------------------------
# Postgres module
#-----------------------------------------------------------------------------
if (NOT ONLY_DOCS)
if (BUILD_MODULE)
add_subdirectory(module)
add_subdirectory(nominatim)
endif()
add_subdirectory(docs)
#-----------------------------------------------------------------------------
# Documentation
#-----------------------------------------------------------------------------
if (BUILD_DOCS)
add_subdirectory(docs)
endif()
#-----------------------------------------------------------------------------
# Manual page
#-----------------------------------------------------------------------------
if (BUILD_MANPAGE)
add_subdirectory(manual)
endif()
#-----------------------------------------------------------------------------
# Installation
#-----------------------------------------------------------------------------
include(GNUInstallDirs)
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
if (BUILD_IMPORTER)
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
install(PROGRAMS ${PROJECT_BINARY_DIR}/installed.bin
DESTINATION ${CMAKE_INSTALL_BINDIR}
RENAME nominatim)
install(DIRECTORY nominatim
DESTINATION ${NOMINATIM_LIBDIR}/lib-python
FILES_MATCHING PATTERN "*.py"
PATTERN __pycache__ EXCLUDE)
install(DIRECTORY lib-sql DESTINATION ${NOMINATIM_LIBDIR})
install(FILES data/country_name.sql
${COUNTRY_GRID_FILE}
data/words.sql
DESTINATION ${NOMINATIM_DATADIR})
endif()
if (BUILD_OSM2PGSQL)
if (${CMAKE_VERSION} VERSION_LESS 3.13)
# Installation of subdirectory targets was only introduced in 3.13.
# So just copy the osm2pgsql file for older versions.
install(PROGRAMS ${PROJECT_BINARY_DIR}/osm2pgsql/osm2pgsql
DESTINATION ${NOMINATIM_LIBDIR})
else()
install(TARGETS osm2pgsql RUNTIME DESTINATION ${NOMINATIM_LIBDIR})
endif()
endif()
if (BUILD_MODULE)
install(PROGRAMS ${PROJECT_BINARY_DIR}/module/nominatim.so
DESTINATION ${NOMINATIM_LIBDIR}/module)
endif()
if (BUILD_API)
install(DIRECTORY lib-php DESTINATION ${NOMINATIM_LIBDIR})
endif()
install(FILES settings/env.defaults
settings/address-levels.json
settings/phrase-settings.json
settings/import-admin.style
settings/import-street.style
settings/import-address.style
settings/import-full.style
settings/import-extratags.style
DESTINATION ${NOMINATIM_CONFIGDIR})

View File

@@ -7,38 +7,6 @@ Please always open a separate issue for each problem. In particular, do
not add your bugs to closed issues. They may looks similar to you but
often are completely different from the maintainer's point of view.
### When Reporting Bad Search Results...
Please make sure to add the following information:
* the URL of the query that produces the bad result
* the result you are getting
* the expected result, preferably a link to the OSM object you want to find,
otherwise an address that is as precise as possible
To get the link to the OSM object, you can try the following:
* go to https://openstreetmap.org
* zoom to the area of the map where you expect the result and
zoom in as much as possible
* click on the question mark on the right side of the map,
then with the queston cursor on the map where your object is located
* find the object of interest in the list that appears on the left side
* click on the object and report the URL back that the browser shows
### When Reporting Problems with your Installation...
Please add the following information to your issue:
* hardware configuration: RAM size, CPUs, kind and size of disks
* Operating system (also mention if you are running on a cloud service)
* Postgres and Postgis version
* list of settings you changed in your Postgres configuration
* Nominatim version (release version or,
if you run from the git repo, the output of `git rev-parse HEAD`)
* (if applicable) exact command line of the command that was causing the issue
## Workflow for Pull Requests
We love to get pull requests from you. We operate the "Fork & Pull" model
@@ -81,22 +49,18 @@ are in process of consolidating the style. The following rules apply:
* for PHP variables use CamelCase with a prefixing letter indicating the type
(i - integer, f - float, a - array, s - string, o - object)
The coding style is enforced with PHPCS and can be tested with:
The coding style is enforced with PHPCS and pylint. It can be tested with:
```
phpcs --report-width=120 --colors .
phpcs --report-width=120 --colors .
pylint3 --extension-pkg-whitelist=osmium nominatim
```
## Testing
Before submitting a pull request make sure that the following tests pass:
Before submitting a pull request make sure that the tests pass:
```
cd test/bdd
behave -DBUILDDIR=<builddir> db osm2pgsql
```
```
cd test/php
phpunit ./
cd build
make test
```

126
ChangeLog
View File

@@ -1,3 +1,129 @@
3.7.2
* fix database check for reverse-only imports
* do not error out in status API result when import date is missing
* add array_key_last function for PHP < 7.3 (thanks to @woodpack)
* fix more url when server name is unknown (thanks to @mogita)
* commit changes to replication log table
3.7.1
* fix smaller issues with special phrases import
* add index to speed up continued indexing during import
* fix index on location_property_tiger(parent_place_id)
* make sure Python code is backward-compatible with Python 3.5
* various documentation fixes
3.7.0
* switch to dotenv for configuration file
* introduce 'make install' (reorganising most of the code)
* introduce nominatim tool as replacement for various php scripts
* introduce project directories and allow multiple installations from same build
* clean up BDD tests: drop nose, reorganise step code
* simplify test database for API BDD tests and autoinstall database
* port most of the code for command-line tools to Python
(thanks to @darkshredder and @AntoJvlt)
* add tests for all tooling
* replace pyosmium-get-changes with custom internal implementation using
pyosmium
* improve search for queries with housenumber and partial terms
* add database versioning
* use jinja2 for preprocessing SQL files
* introduce automatic migrations
* reverse fix preference of interpolations over housenumbers
* parallelize indexing of postcodes
* add non-key indexes to speed up housenumber + street searches
* switch housenumber field in placex to save transliterated names
3.6.0
* add full support for searching by and displaying of addr:* tags
* improve address output for large-area objects
* better use of country names from OSM data for search and display
* better debug output for reverse call
* add support for addr:place links without an place equivalent in OSM
* improve finding postcodes with normalisation artefacts
* batch object to index for rank 30, avoiding a wrap-around of transaction
IDs in PostgreSQL
* introduce dynamic address rank computation for administrative boundaries
depending on linked objects and their place in the admin level hierarchy
* add country-specific address ranking for Indonesia, Russia, Belgium and
the Netherlands (thanks @hendrikmoree)
* make sure wikidata/wikipedia tags are imported for all styles
* make POIs searchable by name and housenumber (thanks @joy-yyd)
* reverse geocoding now ignores places without an address rank (rivers etc.)
* installation of a webserver is no longer mandatory, for development
use the php internal webserver via 'make serve
* reduce the influence of place nodes in addresses
* drop support for the unspecific is_in tag
* various minor tweaks to supplied styles
* move HTML web frontend into its own project
* move scripts for processing external data sources into separate directories
* introduce separate configuration for website (thanks @krahulreddy)
* update documentation, in particular, clean up development docs
* update osm2pgsql to 1.4.0
3.5.2
* ensure that wikipedia tags are imported for all styles
* reinstate verbosity for indexing during updates
* make house number reappear in display name on named POIs
* introduce batch processing in indexer to avoid transaction ID overrun
* increase splitting for large geometries to improve indexing speed
* remove deprecated get_magic_quotes_gpc() function
* make sure that all postcodes have an entry in word and are thus searchable
* remove use of ST_Covers in conjunction woth ST_Intersects,
causes bad query planning and slow updates in Postgis3
* update osm2pgsql
3.5.1
* disable jit and parallel processing in PostgreSQL for osm2pgsql
* update libosmium to 2.15.6 (fixes an issue with processing hanging
on large multipolygons)
3.5.0
* structured select on HTML search page
* new PHP Nominatim\Shell class to wrap shell escaping
* remove polygon parameter from all API calls
* improve handling of postcode areas
* reorganise place linking algorithm, now using wikidata tag as well
* remove linkees from search_name and larger_area tables
* introduce country-specific address ranks
* reorganise rank address computation
* cleanup of partition function
* improve parenting for large POIs
* add support for Postgresql 12 and Postgis 3
* add earlier cleanup when --drop is given, to reduce memory usage
* remove use of place_id in URLs
* replace C nominatim indexer with a simpler Python implementation
* split up the huge sql/functions.sql file
* move osm2pgsql tests to osm2pgsql
* add new extratags style which imports all tags from OSM
* add new script for checking the import after completion
* update osm2pgsql, reducing memory usage
* use new wikipedia importance and add processing of wikidata tags
* add search form for details page
* use ExtraDataPath for country_grid table
* remove short_name from list of names to be displayed
* split up CMakeFile, so that all parts can be built separately
* update installation instructions for CentOS and Ubuntu
* add script for importing/updating multiple country extracts
* various documentation improvements
3.4.2
* fix security bug in /details endpoint where user input was not
properly sanitized
3.4.1
* update osm2pgsql to fix hans during updates and lost address numbers
during updates
3.4.0
* increase required version for PostgreSQL(9.3), PostGIS(2.2) and PHP(7.0)

View File

@@ -1,4 +1,4 @@
[![Build Status](https://travis-ci.org/openstreetmap/Nominatim.svg?branch=master)](https://travis-ci.org/openstreetmap/Nominatim)
[![Build Status](https://github.com/osm-search/Nominatim/workflows/CI%20Tests/badge.svg)](https://github.com/osm-search/Nominatim/actions?query=workflow%3A%22CI+Tests%22)
Nominatim
=========
@@ -19,12 +19,19 @@ https://nominatim.org/release-docs/develop/ .
Installation
============
The latest stable release can be downloaded from https://nominatim.org.
There you can also find [installation instructions for the release](https://nominatim.org/release-docs/latest/admin/Installation).
**Nominatim is a complex piece of software and runs in a complex environment.
Installing and running Nominatim is something for experienced system
administrators only who can do some trouble-shooting themselves. We are sorry,
but we can not provide installation support. We are all doing this in our free
time and there is just so much of that time to go around. Do not open issues in
our bug tracker if you need help. Use the discussions forum
or ask for help on [help.openstreetmap.org](https://help.openstreetmap.org/).**
Detailed installation instructions for the development version can be
found at [nominatim.org](https://nominatim.org/release-docs/develop/admin/Installation)
as well.
The latest stable release can be downloaded from https://nominatim.org.
There you can also find [installation instructions for the release](https://nominatim.org/release-docs/latest/admin/Installation), as well as an extensive [Troubleshooting/FAQ section](https://nominatim.org/release-docs/latest/admin/Faq/).
[Detailed installation instructions for current master](https://nominatim.org/release-docs/develop/admin/Installation)
can be found at nominatim.org as well.
A quick summary of the necessary steps:
@@ -34,12 +41,15 @@ A quick summary of the necessary steps:
cd build
cmake ..
make
sudo make install
2. Get OSM data and import:
2. Create a project directory, get OSM data and import:
./build/utils/setup.php --osm-file <your planet file> --all
mkdir nominatim-project
cd nominatim-project
nominatim import --osm-file <your planet file>
3. Point your webserver to the ./build/website directory.
3. Point your webserver to the nominatim-project/website directory.
License
@@ -51,13 +61,14 @@ The source code is available under a GPLv2 license.
Contributing
============
Contributions are welcome. For details see [contribution guide](CONTRIBUTING.md).
Both bug reports and pull requests are welcome.
Contributions, bugreport and pull requests are welcome.
For details see [contribution guide](CONTRIBUTING.md).
Mailing list
============
Questions and help
==================
For questions you can join the geocoding mailing list, see
https://lists.openstreetmap.org/listinfo/geocoding
For questions, community help and discussions you can use the
[Github discussions forum](https://github.com/osm-search/Nominatim/discussions)
or join the
[geocoding mailing list](https://lists.openstreetmap.org/listinfo/geocoding).

View File

@@ -141,7 +141,7 @@ No. Long running Nominatim installations will differ once new import features (o
bug fixes) get added since those usually only get applied to new/changed data.
Also this document skips the optional Wikipedia data import which affects ranking
of search results. See [Nominatim installation](http://nominatim.org/release-docs/latest/Installation) for details.
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation) for details.
##### Why Ubuntu? Can I test CentOS/Fedora/CoreOS/FreeBSD?
@@ -160,9 +160,9 @@ You can configure/download other Vagrant boxes from [https://app.vagrantup.com/b
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com` and port `5432`. The Postgres username is `postgres`. You can edit `settings/local.php` and point Nominatim to it.
pgsql://postgres@your-server.com:5432/nominatim_it
pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
No data import necessary or restarting necessary.
No data import or restarting necessary.
If the Postgres installation is behind a firewall, you can try

88
Vagrantfile vendored
View File

@@ -4,18 +4,65 @@
Vagrant.configure("2") do |config|
# Apache webserver
config.vm.network "forwarded_port", guest: 80, host: 8089
config.vm.network "forwarded_port", guest: 8088, host: 8088
# If true, then any SSH connections made will enable agent forwarding.
config.ssh.forward_agent = true
# Never sync the current directory to /vagrant.
config.vm.synced_folder ".", "/vagrant", disabled: true
checkout = "yes"
if ENV['CHECKOUT'] != 'y' then
config.vm.synced_folder ".", "/home/vagrant/Nominatim"
checkout = "no"
checkout = "no"
end
config.vm.provider "virtualbox" do |vb, override|
vb.gui = false
vb.memory = 2048
vb.customize ["setextradata", :id, "VBoxInternal2/SharedFoldersEnableSymlinksCreate//vagrant","0"]
if ENV['CHECKOUT'] != 'y' then
override.vm.synced_folder ".", "/home/vagrant/Nominatim"
end
end
config.vm.provider "libvirt" do |lv, override|
lv.memory = 2048
lv.nested = true
if ENV['CHECKOUT'] != 'y' then
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs'
end
end
config.vm.define "ubuntu", primary: true do |sub|
sub.vm.box = "bento/ubuntu-18.04"
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "ubuntu-apache" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
s.privileged = false
s.args = [checkout, "install-apache"]
end
end
config.vm.define "ubuntu-nginx" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
s.privileged = false
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "ubuntu18" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
@@ -23,48 +70,41 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "ubuntu18nginx" do |sub|
sub.vm.box = "bento/ubuntu-18.04"
config.vm.define "ubuntu18-apache" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18-nginx.sh"
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout]
s.args = [checkout, "install-apache"]
end
end
config.vm.define "ubuntu16" do |sub|
sub.vm.box = "bento/ubuntu-16.04"
config.vm.define "ubuntu18-nginx" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-16.sh"
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout]
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "travis" do |sub|
sub.vm.box = "bento/ubuntu-14.04"
config.vm.define "centos7" do |sub|
sub.vm.box = "centos/7"
sub.vm.provision :shell do |s|
s.path = "vagrant/install-on-travis-ci.sh"
s.path = "vagrant/Install-on-Centos-7.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "centos" do |sub|
sub.vm.box = "centos/7"
sub.vm.box = "generic/centos8"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Centos-7.sh"
s.path = "vagrant/Install-on-Centos-8.sh"
s.privileged = false
s.args = "yes"
s.args = [checkout]
end
sub.vm.synced_folder ".", "/home/vagrant/Nominatim", disabled: true
sub.vm.synced_folder ".", "/vagrant", disabled: true
end
config.vm.provider "virtualbox" do |vb|
vb.gui = false
vb.memory = 2048
vb.customize ["setextradata", :id, "VBoxInternal2/SharedFoldersEnableSymlinksCreate//vagrant","0"]
end
end

View File

@@ -1,4 +1,14 @@
#!@PHP_BIN@ -Cq
<?php
require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
require_once(CONST_BasePath.'/@script_source@');
require('@CMAKE_SOURCE_DIR@/lib-php/dotenv_loader.php');
@define('CONST_Default_ModulePath', '@CMAKE_BINARY_DIR@/module');
@define('CONST_Default_Osm2pgsql', '@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql');
@define('CONST_DataDir', '@CMAKE_SOURCE_DIR@/data');
@define('CONST_SqlDir', '@CMAKE_SOURCE_DIR@/lib-sql');
@define('CONST_ConfigDir', '@CMAKE_SOURCE_DIR@/settings');
loadDotEnv();
$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim';
require_once('@CMAKE_SOURCE_DIR@/lib-php/admin/@script_source@');

17
cmake/tool-installed.tmpl Normal file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env python3
import sys
import os
sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
from nominatim import cli
exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
phplib_dir='@NOMINATIM_LIBDIR@/lib-php',
sqllib_dir='@NOMINATIM_LIBDIR@/lib-sql',
data_dir='@NOMINATIM_DATADIR@',
config_dir='@NOMINATIM_CONFIGDIR@',
phpcgi_path='@PHPCGI_BIN@'))

17
cmake/tool.tmpl Executable file
View File

@@ -0,0 +1,17 @@
#!/usr/bin/env python3
import sys
import os
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
from nominatim import cli
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
phplib_dir='@CMAKE_SOURCE_DIR@/lib-php',
sqllib_dir='@CMAKE_SOURCE_DIR@/lib-sql',
data_dir='@CMAKE_SOURCE_DIR@/data',
config_dir='@CMAKE_SOURCE_DIR@/settings',
phpcgi_path='@PHPCGI_BIN@'))

View File

@@ -1,3 +0,0 @@
<?php
require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
require_once(CONST_BasePath.'/@script_source@');

View File

@@ -1,77 +0,0 @@
# Fallback Country Boundaries
Each place is assigned a `country_code` and partition. Partitions derive from `country_code`.
Nominatim imports two pre-generated files
* `data/country_name.sql` (country code, name, default language, partition)
* `data/country_osm_grid.sql` (country code, geometry)
before creating places in the database. This helps with fast lookups and missing data (e.g. if the data the user wants to import doesn't contain any country places).
The number of countries in the world can change (South Sudan created 2011, Germany reunification), so can their boundaries. This document explain how the pre-generated files can be updated.
## Country code
Each place is assigned a two letter country_code based on its location, e.g. `gb` for Great Britain. Or `NULL` if no suitable country is found (usually it's in open water then).
In `sql/functions.sql: get_country_code(geometry)` the place's center is checked against
1. country places already imported from the user's data file. Places are imported by rank low-to-high. Lowest rank 2 is countries so most places should be matched. Still the data file might be incomplete.
2. if unmatched: OSM grid boundaries
3. if still unmatched: OSM grid boundaries, but allow a small distance
## Partitions
Each place is assigned partition, which is a number 0..250. 0 is fallback/other.
During place indexing (`sql/functions.sql: placex_insert()`) a place is assigned the partition based on its country code (`sql/functions.sql: get_partition(country_code)`). It checks in the `country_name` table.
Most countries have their own partition, some share a partition. Thus partition counts vary greatly.
Several database tables are split by partition to allow queries to run against less indices and improve caching.
* `location_area_large_<partition>`
* `search_name_<partition>`
* `location_road_<partition>`
## Data files
### data/country_name.sql
Export from existing database table plus manual changes. `country_default_language_code` most taken from [https://wiki.openstreetmap.org/wiki/Nominatim/Country_Codes](), see `utils/country_languages.php`.
### data/country_osm_grid.sql
`country_grid.sql` merges territories by country. Then uses `function.sql: quad_split_geometry` to split each country into multiple [Quadtree](https://en.wikipedia.org/wiki/Quadtree) polygons for faster point-in-polygon lookups.
To visualize one country as geojson feature collection, e.g. for loading into [geojson.io](http://geojson.io/):
```
-- http://www.postgresonline.com/journal/archives/267-Creating-GeoJSON-Feature-Collections-with-JSON-and-PostGIS-functions.html
SELECT row_to_json(fc)
FROM (
SELECT 'FeatureCollection' As type, array_to_json(array_agg(f)) As features
FROM (
SELECT 'Feature' As type,
ST_AsGeoJSON(lg.geometry)::json As geometry,
row_to_json((country_code, area)) As properties
FROM country_osm_grid As lg where country_code='mx'
) As f
) As fc;
```
`cat /tmp/query.sql | psql -At nominatim > /tmp/mexico.quad.geojson`
![mexico](mexico.quad.png)

View File

@@ -1,33 +0,0 @@
-- Script to build a calculated country grid from existing tables
DROP TABLE IF EXISTS tmp_country_osm_grid;
CREATE TABLE tmp_country_osm_grid as select country_name.country_code,st_union(placex.geometry) as geometry from country_name,
placex
where (lower(placex.country_code) = country_name.country_code)
and placex.rank_search < 16 and st_area(placex.geometry) > 0
group by country_name.country_code;
ALTER TABLE tmp_country_osm_grid add column area double precision;
UPDATE tmp_country_osm_grid set area = st_area(geometry::geography);
-- compare old and new
select country_code, round, round(log(area)) from (select distinct country_code,round(log(area)) from country_osm_grid order by country_code) as x
left outer join tmp_country_osm_grid using (country_code) where area is null or round(log(area)) != round;
DROP TABLE IF EXISTS new_country_osm_grid;
CREATE TABLE new_country_osm_grid as select country_code,area,quad_split_geometry(geometry,0.5,20) as geometry from tmp_country_osm_grid;
CREATE INDEX new_idx_country_osm_grid_geometry ON new_country_osm_grid USING GIST (geometry);
-- Sometimes there are problems calculating area due to invalid data - optionally recalc
UPDATE new_country_osm_grid set area = sum from (select country_code,sum(case when st_area(geometry::geography) = 'NaN' THEN 0 ELSE st_area(geometry::geography) END)
from new_country_osm_grid group by country_code) as x where x.country_code = new_country_osm_grid.country_code;
-- compare old and new
select country_code, x.round, y.round from (select distinct country_code,round(log(area)) from country_osm_grid order by country_code) as x
left outer join (select distinct country_code,round(log(area)) from new_country_osm_grid order by country_code) as y
using (country_code) where x.round != y.round;
-- Flip the new table in
BEGIN;
DROP TABLE IF EXISTS country_osm_grid;
ALTER TABLE new_country_osm_grid rename to country_osm_grid;
ALTER INDEX new_idx_country_osm_grid_geometry RENAME TO idx_country_osm_grid_geometry;
COMMIT;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 320 KiB

View File

@@ -1,56 +0,0 @@
# GB Postcodes
The server [importing instructions](https://www.nominatim.org/release-docs/latest/admin/Import-and-Update/) allow optionally download [`gb_postcode_data.sql.gz`](https://www.nominatim.org/data/gb_postcode_data.sql.gz). This document explains how the file got created.
## GB vs UK
GB (Great Britain) is more correct as the Ordnance Survey dataset doesn't contain postcodes from Northern Ireland.
## Importing separately after the initial import
If you forgot to download the file, or have a new version, you can import it separately:
1. Import the downloaded `gb_postcode_data.sql.gz` file.
2. Run the SQL query `SELECT count(getorcreate_postcode_id(postcode)) FROM gb_postcode;`. This will update the search index.
3. Run `utils/setup.php --calculate-postcodes` from the build directory. This will copy data form the `gb_postcode` table to the `location_postcodes` table.
## Converting Code-Point Open data
1. Download from [Code-Point® Open](https://www.ordnancesurvey.co.uk/business-and-government/products/code-point-open.html). It requires an email address where a download link will be send to.
2. `unzip codepo_gb.zip`
Unpacked you'll see a directory of CSV files.
$ more codepo_gb/Data/CSV/n.csv
"N1 0AA",10,530626,183961,"E92000001","E19000003","E18000007","","E09000019","E05000368"
"N1 0AB",10,530559,183978,"E92000001","E19000003","E18000007","","E09000019","E05000368"
The coordinates are "Northings" and "Eastings" in [OSGB 1936](http://epsg.io/1314) projection. They can be projected to WGS84 like this
SELECT ST_AsText(ST_Transform(ST_SetSRID('POINT(530626 183961)'::geometry,27700), 4326));
POINT(-0.117872733220225 51.5394424719303)
[-0.117872733220225 51.5394424719303 on OSM map](https://www.openstreetmap.org/?mlon=-0.117872733220225&mlat=51.5394424719303&zoom=16)
3. Create database, import CSV files, add geometry column, dump into file
DBNAME=create_gb_postcode_file
createdb $DBNAME
echo 'CREATE EXTENSION postgis' | psql $DBNAME
cat data/gb_postcode_table.sql | psql $DBNAME
cat codepo_gb/Data/CSV/*.csv | ./data-sources/gb-postcodes/convert_codepoint.php | psql $DBNAME
cat codepo_gb/Doc/licence.txt | iconv -f iso-8859-1 -t utf-8 | dos2unix | sed 's/^/-- /g' > gb_postcode_data.sql
pg_dump -a -t gb_postcode $DBNAME | grep -v '^--' >> gb_postcode_data.sql
gzip -9 -f gb_postcode_data.sql
ls -lah gb_postcode_data.*
# dropdb $DBNAME

View File

@@ -1,37 +0,0 @@
#!/usr/bin/env php
<?php
echo <<< EOT
ALTER TABLE gb_postcode ADD COLUMN easting bigint;
ALTER TABLE gb_postcode ADD COLUMN northing bigint;
TRUNCATE gb_postcode;
COPY gb_postcode (id, postcode, easting, northing) FROM stdin;
EOT;
$iCounter = 0;
while ($sLine = fgets(STDIN)) {
$aColumns = str_getcsv($sLine);
// insert space before the third last position
// https://stackoverflow.com/a/9144834
$postcode = $aColumns[0];
$postcode = preg_replace('/\s*(...)$/', ' $1', $postcode);
echo join("\t", array($iCounter, $postcode, $aColumns[2], $aColumns[3]))."\n";
$iCounter = $iCounter + 1;
}
echo <<< EOT
\.
UPDATE gb_postcode SET geometry=ST_Transform(ST_SetSRID(CONCAT('POINT(', easting, ' ', northing, ')')::geometry, 27700), 4326);
ALTER TABLE gb_postcode DROP COLUMN easting;
ALTER TABLE gb_postcode DROP COLUMN northing;
EOT;

View File

@@ -1,26 +0,0 @@
# US TIGER address data
Convert [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)/Line dataset of the US Census Bureau to SQL files which can be imported by Nominatim. The created tables in the Nominatim database are separate from OpenStreetMap tables and get queried at search time separately.
The dataset gets updated once per year. Downloading is prone to be slow (can take a full day) and converting them can take hours as well.
Replace '2019' with the current year throughout.
1. Install the GDAL library and python bindings and the unzip tool
# Ubuntu:
sudo apt-get install python3-gdal unzip
2. Get the TIGER 2019 data. You will need the EDGES files
(3,233 zip files, 11GB total).
wget -r ftp://ftp2.census.gov/geo/tiger/TIGER2019/EDGES/
3. Convert the data into SQL statements. Adjust the file paths in the scripts as needed
cd data-sources/us-tiger
./convert.sh <input-path> <output-path>
4. Maybe: package the created files
tar -czf tiger2019-nominatim-preprocessed.tar.gz tiger

View File

@@ -1,48 +0,0 @@
#!/bin/bash
INPATH=$1
OUTPATH=$2
if [[ ! -d "$INPATH" ]]; then
echo "input path does not exist"
exit 1
fi
if [[ ! -d "$OUTPATH" ]]; then
echo "output path does not exist"
exit 1
fi
INREGEX='_([0-9]{5})_edges.zip'
WORKPATH="$OUTPATH/tmp-workdir/"
mkdir -p "$WORKPATH"
INFILES=($INPATH/*.zip)
echo "Found ${#INFILES[*]} files."
for F in ${INFILES[*]}; do
# echo $F
if [[ "$F" =~ $INREGEX ]]; then
COUNTYID=${BASH_REMATCH[1]}
SHAPEFILE="$WORKPATH/$(basename $F '.zip').shp"
SQLFILE="$OUTPATH/$COUNTYID.sql"
unzip -o -q -d "$WORKPATH" "$F"
if [[ ! -e "$SHAPEFILE" ]]; then
echo "Unzip failed. $SHAPEFILE not found."
exit 1
fi
./tiger_address_convert.py "$SHAPEFILE" "$SQLFILE"
rm $WORKPATH/*
fi
done
OUTFILES=($OUTPATH/*.sql)
echo "Wrote ${#OUTFILES[*]} files."
rmdir $WORKPATH

View File

@@ -1,620 +0,0 @@
#!/usr/bin/python3
# Tiger road data to OSM conversion script
# Creates Karlsruhe-style address ways beside the main way
# based on the Massachusetts GIS script by christopher schmidt
#BUGS:
# On very tight curves, a loop may be generated in the address way.
# It would be nice if the ends of the address ways were not pulled back from dead ends
# Ways that include these mtfccs should not be uploaded
# H1100 Connector
# H3010 Stream/River
# H3013 Braided Stream
# H3020 Canal, Ditch or Aqueduct
# L4130 Point-to-Point Line
# L4140 Property/Parcel Line (Including PLSS)
# P0001 Nonvisible Linear Legal/Statistical Boundary
# P0002 Perennial Shoreline
# P0003 Intermittent Shoreline
# P0004 Other non-visible bounding Edge (e.g., Census water boundary, boundary of an areal feature)
ignoremtfcc = [ "H1100", "H3010", "H3013", "H3020", "L4130", "L4140", "P0001", "P0002", "P0003", "P0004" ]
# Sets the distance that the address ways should be from the main way, in feet.
address_distance = 30
# Sets the distance that the ends of the address ways should be pulled back from the ends of the main way, in feet
address_pullback = 45
import sys, os.path, json
try:
from osgeo import ogr
from osgeo import osr
except:
import ogr
import osr
# https://www.census.gov/geo/reference/codes/cou.html
# tiger_county_fips.json was generated from the following:
# wget https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt
# cat national_county.txt | perl -F, -naE'($F[0] ne 'AS') && $F[3] =~ s/ ((city|City|County|District|Borough|City and Borough|Municipio|Municipality|Parish|Island|Census Area)(?:, |\Z))+//; say qq( "$F[1]$F[2]": "$F[3], $F[0]",)'
json_fh = open(os.path.dirname(sys.argv[0]) + "/tiger_county_fips.json")
county_fips_data = json.load(json_fh)
def parse_shp_for_geom_and_tags( filename ):
#ogr.RegisterAll()
dr = ogr.GetDriverByName("ESRI Shapefile")
poDS = dr.Open( filename )
if poDS == None:
raise "Open failed."
poLayer = poDS.GetLayer( 0 )
fieldNameList = []
layerDefinition = poLayer.GetLayerDefn()
for i in range(layerDefinition.GetFieldCount()):
fieldNameList.append(layerDefinition.GetFieldDefn(i).GetName())
# sys.stderr.write(",".join(fieldNameList))
poLayer.ResetReading()
ret = []
poFeature = poLayer.GetNextFeature()
while poFeature:
tags = {}
# WAY ID
tags["tiger:way_id"] = int( poFeature.GetField("TLID") )
# FEATURE IDENTIFICATION
mtfcc = poFeature.GetField("MTFCC");
if mtfcc != None:
if mtfcc == "L4010": #Pipeline
tags["man_made"] = "pipeline"
if mtfcc == "L4020": #Powerline
tags["power"] = "line"
if mtfcc == "L4031": #Aerial Tramway/Ski Lift
tags["aerialway"] = "cable_car"
if mtfcc == "L4110": #Fence Line
tags["barrier"] = "fence"
if mtfcc == "L4125": #Cliff/Escarpment
tags["natural"] = "cliff"
if mtfcc == "L4165": #Ferry Crossing
tags["route"] = "ferry"
if mtfcc == "R1011": #Railroad Feature (Main, Spur, or Yard)
tags["railway"] = "rail"
ttyp = poFeature.GetField("TTYP")
if ttyp != None:
if ttyp == "S":
tags["service"] = "spur"
if ttyp == "Y":
tags["service"] = "yard"
tags["tiger:ttyp"] = ttyp
if mtfcc == "R1051": #Carline, Streetcar Track, Monorail, Other Mass Transit Rail)
tags["railway"] = "light_rail"
if mtfcc == "R1052": #Cog Rail Line, Incline Rail Line, Tram
tags["railway"] = "incline"
if mtfcc == "S1100":
tags["highway"] = "primary"
if mtfcc == "S1200":
tags["highway"] = "secondary"
if mtfcc == "S1400":
tags["highway"] = "residential"
if mtfcc == "S1500":
tags["highway"] = "track"
if mtfcc == "S1630": #Ramp
tags["highway"] = "motorway_link"
if mtfcc == "S1640": #Service Drive usually along a limited access highway
tags["highway"] = "service"
if mtfcc == "S1710": #Walkway/Pedestrian Trail
tags["highway"] = "path"
if mtfcc == "S1720":
tags["highway"] = "steps"
if mtfcc == "S1730": #Alley
tags["highway"] = "service"
tags["service"] = "alley"
if mtfcc == "S1740": #Private Road for service vehicles (logging, oil, fields, ranches, etc.)
tags["highway"] = "service"
tags["access"] = "private"
if mtfcc == "S1750": #Private Driveway
tags["highway"] = "service"
tags["access"] = "private"
tags["service"] = "driveway"
if mtfcc == "S1780": #Parking Lot Road
tags["highway"] = "service"
tags["service"] = "parking_aisle"
if mtfcc == "S1820": #Bike Path or Trail
tags["highway"] = "cycleway"
if mtfcc == "S1830": #Bridle Path
tags["highway"] = "bridleway"
tags["tiger:mtfcc"] = mtfcc
# FEATURE NAME
if poFeature.GetField("FULLNAME"):
#capitalizes the first letter of each word
name = poFeature.GetField( "FULLNAME" )
tags["name"] = name
#Attempt to guess highway grade
if name[0:2] == "I-":
tags["highway"] = "motorway"
if name[0:3] == "US ":
tags["highway"] = "primary"
if name[0:3] == "US-":
tags["highway"] = "primary"
if name[0:3] == "Hwy":
if tags["highway"] != "primary":
tags["highway"] = "secondary"
# TIGER 2017 no longer contains this field
if 'DIVROAD' in fieldNameList:
divroad = poFeature.GetField("DIVROAD")
if divroad != None:
if divroad == "Y" and "highway" in tags and tags["highway"] == "residential":
tags["highway"] = "tertiary"
tags["tiger:separated"] = divroad
statefp = poFeature.GetField("STATEFP")
countyfp = poFeature.GetField("COUNTYFP")
if (statefp != None) and (countyfp != None):
county_name = county_fips_data.get(statefp + '' + countyfp)
if county_name:
tags["tiger:county"] = county_name
# tlid = poFeature.GetField("TLID")
# if tlid != None:
# tags["tiger:tlid"] = tlid
lfromadd = poFeature.GetField("LFROMADD")
if lfromadd != None:
tags["tiger:lfromadd"] = lfromadd
rfromadd = poFeature.GetField("RFROMADD")
if rfromadd != None:
tags["tiger:rfromadd"] = rfromadd
ltoadd = poFeature.GetField("LTOADD")
if ltoadd != None:
tags["tiger:ltoadd"] = ltoadd
rtoadd = poFeature.GetField("RTOADD")
if rtoadd != None:
tags["tiger:rtoadd"] = rtoadd
zipl = poFeature.GetField("ZIPL")
if zipl != None:
tags["tiger:zip_left"] = zipl
zipr = poFeature.GetField("ZIPR")
if zipr != None:
tags["tiger:zip_right"] = zipr
if mtfcc not in ignoremtfcc:
# COPY DOWN THE GEOMETRY
geom = []
rawgeom = poFeature.GetGeometryRef()
for i in range( rawgeom.GetPointCount() ):
geom.append( (rawgeom.GetX(i), rawgeom.GetY(i)) )
ret.append( (geom, tags) )
poFeature = poLayer.GetNextFeature()
return ret
# ====================================
# to do read .prj file for this data
# Change the Projcs_wkt to match your datas prj file.
# ====================================
projcs_wkt = \
"""GEOGCS["GCS_North_American_1983",
DATUM["D_North_American_1983",
SPHEROID["GRS_1980",6378137,298.257222101]],
PRIMEM["Greenwich",0],
UNIT["Degree",0.017453292519943295]]"""
from_proj = osr.SpatialReference()
from_proj.ImportFromWkt( projcs_wkt )
# output to WGS84
to_proj = osr.SpatialReference()
to_proj.SetWellKnownGeogCS( "EPSG:4326" )
tr = osr.CoordinateTransformation( from_proj, to_proj )
import math
def length(segment, nodelist):
'''Returns the length (in feet) of a segment'''
first = True
distance = 0
lat_feet = 364613 #The approximate number of feet in one degree of latitude
for point in segment:
pointid, (lat, lon) = nodelist[ round_point( point ) ]
if first:
first = False
else:
#The approximate number of feet in one degree of longitute
lrad = math.radians(lat)
lon_feet = 365527.822 * math.cos(lrad) - 306.75853 * math.cos(3 * lrad) + 0.3937 * math.cos(5 * lrad)
distance += math.sqrt(((lat - previous[0])*lat_feet)**2 + ((lon - previous[1])*lon_feet)**2)
previous = (lat, lon)
return distance
def addressways(waylist, nodelist, first_id):
id = first_id
lat_feet = 364613 #The approximate number of feet in one degree of latitude
distance = float(address_distance)
ret = []
for waykey, segments in waylist.items():
waykey = dict(waykey)
rsegments = []
lsegments = []
for segment in segments:
lsegment = []
rsegment = []
lastpoint = None
# Don't pull back the ends of very short ways too much
seglength = length(segment, nodelist)
if seglength < float(address_pullback) * 3.0:
pullback = seglength / 3.0
else:
pullback = float(address_pullback)
if "tiger:lfromadd" in waykey:
lfromadd = waykey["tiger:lfromadd"]
else:
lfromadd = None
if "tiger:ltoadd" in waykey:
ltoadd = waykey["tiger:ltoadd"]
else:
ltoadd = None
if "tiger:rfromadd" in waykey:
rfromadd = waykey["tiger:rfromadd"]
else:
rfromadd = None
if "tiger:rtoadd" in waykey:
rtoadd = waykey["tiger:rtoadd"]
else:
rtoadd = None
if rfromadd != None and rtoadd != None:
right = True
else:
right = False
if lfromadd != None and ltoadd != None:
left = True
else:
left = False
if left or right:
first = True
firstpointid, firstpoint = nodelist[ round_point( segment[0] ) ]
finalpointid, finalpoint = nodelist[ round_point( segment[len(segment) - 1] ) ]
for point in segment:
pointid, (lat, lon) = nodelist[ round_point( point ) ]
#The approximate number of feet in one degree of longitute
lrad = math.radians(lat)
lon_feet = 365527.822 * math.cos(lrad) - 306.75853 * math.cos(3 * lrad) + 0.3937 * math.cos(5 * lrad)
#Calculate the points of the offset ways
if lastpoint != None:
#Skip points too close to start
if math.sqrt((lat * lat_feet - firstpoint[0] * lat_feet)**2 + (lon * lon_feet - firstpoint[1] * lon_feet)**2) < pullback:
#Preserve very short ways (but will be rendered backwards)
if pointid != finalpointid:
continue
#Skip points too close to end
if math.sqrt((lat * lat_feet - finalpoint[0] * lat_feet)**2 + (lon * lon_feet - finalpoint[1] * lon_feet)**2) < pullback:
#Preserve very short ways (but will be rendered backwards)
if (pointid != firstpointid) and (pointid != finalpointid):
continue
X = (lon - lastpoint[1]) * lon_feet
Y = (lat - lastpoint[0]) * lat_feet
if Y != 0:
theta = math.pi/2 - math.atan( X / Y)
Xp = math.sin(theta) * distance
Yp = math.cos(theta) * distance
else:
Xp = 0
if X > 0:
Yp = -distance
else:
Yp = distance
if Y > 0:
Xp = -Xp
else:
Yp = -Yp
if first:
first = False
dX = - (Yp * (pullback / distance)) / lon_feet #Pull back the first point
dY = (Xp * (pullback / distance)) / lat_feet
if left:
lpoint = (lastpoint[0] + (Yp / lat_feet) - dY, lastpoint[1] + (Xp / lon_feet) - dX)
lsegment.append( (id, lpoint) )
id += 1
if right:
rpoint = (lastpoint[0] - (Yp / lat_feet) - dY, lastpoint[1] - (Xp / lon_feet) - dX)
rsegment.append( (id, rpoint) )
id += 1
else:
#round the curves
if delta[1] != 0:
theta = abs(math.atan(delta[0] / delta[1]))
else:
theta = math.pi / 2
if Xp != 0:
theta = theta - abs(math.atan(Yp / Xp))
else: theta = theta - math.pi / 2
r = 1 + abs(math.tan(theta/2))
if left:
lpoint = (lastpoint[0] + (Yp + delta[0]) * r / (lat_feet * 2), lastpoint[1] + (Xp + delta[1]) * r / (lon_feet * 2))
lsegment.append( (id, lpoint) )
id += 1
if right:
rpoint = (lastpoint[0] - (Yp + delta[0]) * r / (lat_feet * 2), lastpoint[1] - (Xp + delta[1]) * r / (lon_feet * 2))
rsegment.append( (id, rpoint) )
id += 1
delta = (Yp, Xp)
lastpoint = (lat, lon)
#Add in the last node
dX = - (Yp * (pullback / distance)) / lon_feet
dY = (Xp * (pullback / distance)) / lat_feet
if left:
lpoint = (lastpoint[0] + (Yp + delta[0]) / (lat_feet * 2) + dY, lastpoint[1] + (Xp + delta[1]) / (lon_feet * 2) + dX )
lsegment.append( (id, lpoint) )
id += 1
if right:
rpoint = (lastpoint[0] - Yp / lat_feet + dY, lastpoint[1] - Xp / lon_feet + dX)
rsegment.append( (id, rpoint) )
id += 1
#Generate the tags for ways and nodes
zipr = ''
zipl = ''
name = ''
county = ''
if "tiger:zip_right" in waykey:
zipr = waykey["tiger:zip_right"]
if "tiger:zip_left" in waykey:
zipl = waykey["tiger:zip_left"]
if "name" in waykey:
name = waykey["name"]
if "tiger:county" in waykey:
county = waykey["tiger:county"]
if "tiger:separated" in waykey: # No longer set in Tiger-2017
separated = waykey["tiger:separated"]
else:
separated = "N"
#Write the nodes of the offset ways
if right:
rlinestring = [];
for i, point in rsegment:
rlinestring.append( "%f %f" % (point[1], point[0]) )
if left:
llinestring = [];
for i, point in lsegment:
llinestring.append( "%f %f" % (point[1], point[0]) )
if right:
rsegments.append( rsegment )
if left:
lsegments.append( lsegment )
rtofromint = right #Do the addresses convert to integers?
ltofromint = left #Do the addresses convert to integers?
if right:
try: rfromint = int(rfromadd)
except:
print("Non integer address: %s" % rfromadd)
rtofromint = False
try: rtoint = int(rtoadd)
except:
print("Non integer address: %s" % rtoadd)
rtofromint = False
if left:
try: lfromint = int(lfromadd)
except:
print("Non integer address: %s" % lfromadd)
ltofromint = False
try: ltoint = int(ltoadd)
except:
print("Non integer address: %s" % ltoadd)
ltofromint = False
if right:
id += 1
interpolationtype = "all";
if rtofromint:
if (rfromint % 2) == 0 and (rtoint % 2) == 0:
if separated == "Y": #Doesn't matter if there is another side
interpolationtype = "even";
elif ltofromint and (lfromint % 2) == 1 and (ltoint % 2) == 1:
interpolationtype = "even";
elif (rfromint % 2) == 1 and (rtoint % 2) == 1:
if separated == "Y": #Doesn't matter if there is another side
interpolationtype = "odd";
elif ltofromint and (lfromint % 2) == 0 and (ltoint % 2) == 0:
interpolationtype = "odd";
ret.append( "SELECT tiger_line_import(ST_GeomFromText('LINESTRING(%s)',4326), %s, %s, %s, %s, %s, %s);" %
( ",".join(rlinestring), sql_quote(rfromadd), sql_quote(rtoadd), sql_quote(interpolationtype), sql_quote(name), sql_quote(county), sql_quote(zipr) ) )
if left:
id += 1
interpolationtype = "all";
if ltofromint:
if (lfromint % 2) == 0 and (ltoint % 2) == 0:
if separated == "Y":
interpolationtype = "even";
elif rtofromint and (rfromint % 2) == 1 and (rtoint % 2) == 1:
interpolationtype = "even";
elif (lfromint % 2) == 1 and (ltoint % 2) == 1:
if separated == "Y":
interpolationtype = "odd";
elif rtofromint and (rfromint %2 ) == 0 and (rtoint % 2) == 0:
interpolationtype = "odd";
ret.append( "SELECT tiger_line_import(ST_GeomFromText('LINESTRING(%s)',4326), %s, %s, %s, %s, %s, %s);" %
( ",".join(llinestring), sql_quote(lfromadd), sql_quote(ltoadd), sql_quote(interpolationtype), sql_quote(name), sql_quote(county), sql_quote(zipl) ) )
return ret
def sql_quote( string ):
return "'" + string.replace("'", "''") + "'"
def unproject( point ):
pt = tr.TransformPoint( point[0], point[1] )
return (pt[1], pt[0])
def round_point( point, accuracy=8 ):
return tuple( [ round(x,accuracy) for x in point ] )
def compile_nodelist( parsed_gisdata, first_id=1 ):
nodelist = {}
i = first_id
for geom, tags in parsed_gisdata:
if len( geom )==0:
continue
for point in geom:
r_point = round_point( point )
if r_point not in nodelist:
nodelist[ r_point ] = (i, unproject( point ))
i += 1
return (i, nodelist)
def adjacent( left, right ):
left_left = round_point(left[0])
left_right = round_point(left[-1])
right_left = round_point(right[0])
right_right = round_point(right[-1])
return ( left_left == right_left or
left_left == right_right or
left_right == right_left or
left_right == right_right )
def glom( left, right ):
left = list( left )
right = list( right )
left_left = round_point(left[0])
left_right = round_point(left[-1])
right_left = round_point(right[0])
right_right = round_point(right[-1])
if left_left == right_left:
left.reverse()
return left[0:-1] + right
if left_left == right_right:
return right[0:-1] + left
if left_right == right_left:
return left[0:-1] + right
if left_right == right_right:
right.reverse()
return left[0:-1] + right
raise 'segments are not adjacent'
def glom_once( segments ):
if len(segments)==0:
return segments
unsorted = list( segments )
x = unsorted.pop(0)
while len( unsorted ) > 0:
n = len( unsorted )
for i in range(0, n):
y = unsorted[i]
if adjacent( x, y ):
y = unsorted.pop(i)
x = glom( x, y )
break
# Sorted and unsorted lists have no adjacent segments
if len( unsorted ) == n:
break
return x, unsorted
def glom_all( segments ):
unsorted = segments
chunks = []
while unsorted != []:
chunk, unsorted = glom_once( unsorted )
chunks.append( chunk )
return chunks
def compile_waylist( parsed_gisdata ):
waylist = {}
#Group by tiger:way_id
for geom, tags in parsed_gisdata:
way_key = tags.copy()
way_key = ( way_key['tiger:way_id'], tuple( [(k,v) for k,v in way_key.items()] ) )
if way_key not in waylist:
waylist[way_key] = []
waylist[way_key].append( geom )
ret = {}
for (way_id, way_key), segments in waylist.items():
ret[way_key] = glom_all( segments )
return ret
def shape_to_sql( shp_filename, sql_filename ):
print("parsing shpfile %s" % shp_filename)
parsed_features = parse_shp_for_geom_and_tags( shp_filename )
print("compiling nodelist")
i, nodelist = compile_nodelist( parsed_features )
print("compiling waylist")
waylist = compile_waylist( parsed_features )
print("preparing address ways")
sql_lines = addressways(waylist, nodelist, i)
print("writing %s" % sql_filename)
fp = open( sql_filename, "w" )
fp.write( "\n".join( sql_lines ) )
fp.close()
if __name__ == '__main__':
import sys, os.path
if len(sys.argv) < 3:
print("%s input.shp output.sql" % sys.argv[0])
sys.exit()
shp_filename = sys.argv[1]
sql_filename = sys.argv[2]
shape_to_sql(shp_filename, sql_filename)

File diff suppressed because it is too large Load Diff

View File

@@ -1,58 +0,0 @@
## Add Wikipedia and Wikidata to Nominatim
OSM contributors frequently tag items with links to Wikipedia and Wikidata. Nominatim can use the page ranking of Wikipedia pages to help indicate the relative importance of osm features. This is done by calculating an importance score between 0 and 1 based on the number of inlinks to an article for a location. If two places have the same name and one is more important than the other, the wikipedia score often points to the correct place.
These scripts extract and prepare both Wikipedia page rank and Wikidata links for use in Nominatim.
#### Create a new postgres DB for Processing
Due to the size of initial and intermediate tables, processing can be done in an external database:
```
CREATE DATABASE wikiprocessingdb;
```
---
Wikipedia
---
Processing these data requires a large amount of disk space (~1TB) and considerable time (>24 hours).
#### Import & Process Wikipedia tables
This step downloads and converts [Wikipedia](https://dumps.wikimedia.org/) page data SQL dumps to postgreSQL files which can be imported and processed with pagelink information from Wikipedia language sites to calculate importance scores.
- The script will processes data from whatever set of Wikipedia languages are specified in the initial languages array
- Note that processing the top 40 Wikipedia languages can take over a day, and will add nearly 1TB to the processing database. The final output tables will be approximately 11GB and 2GB in size
To download, convert, and import the data, then process summary statistics and compute importance scores, run:
```
./wikipedia_import.sh
```
---
Wikidata
---
This script downloads and processes Wikidata to enrich the previously created Wikipedia tables for use in Nominatim.
#### Import & Process Wikidata
This step downloads and converts [Wikidata](https://dumps.wikimedia.org/wikidatawiki/) page data SQL dumps to postgreSQL files which can be processed and imported into Nominatim database. Also utilizes Wikidata Query Service API to discover and include place types.
- Script presumes that the user has already processed Wikipedia tables as specified above
- Script requires wikidata_place_types.txt and wikidata_place_type_levles.csv
- script requires the [jq json parser](https://stedolan.github.io/jq/)
- Script processes data from whatever set of Wikipedia languages are specified in the initial languages array
- Script queries Wikidata Query Service API and imports all instances of place types listed in wikidata_place_types.txt
- Script updates wikipedia_articles table with extracted wikidata
By including Wikidata in the wikipedia_articles table, new connections can be made on the fly from the Nominatim placex table to wikipedia_article importance scores.
To download, convert, and import the data, then process required items, run:
```
./wikidata_import.sh
```

View File

@@ -1,95 +0,0 @@
#!/bin/bash
psqlcmd() {
psql wikiprocessingdb
}
mysql2pgsqlcmd() {
./mysql2pgsql.perl /dev/stdin /dev/stdout
}
# list the languages to process (refer to List of Wikipedias here: https://en.wikipedia.org/wiki/List_of_Wikipedias)
language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
# get a few wikidata dump tables
wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-geo_tags.sql.gz
wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-page.sql.gz
wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-wb_items_per_site.sql.gz
# import wikidata tables
gzip -dc wikidatawiki-latest-geo_tags.sql.gz | mysql2pgsqlcmd | psqlcmd
gzip -dc wikidatawiki-latest-page.sql.gz | mysql2pgsqlcmd | psqlcmd
gzip -dc wikidatawiki-latest-wb_items_per_site.sql.gz | mysql2pgsqlcmd | psqlcmd
# get wikidata places from wikidata query API
while read F ; do
wget "https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query=SELECT ?item WHERE{?item wdt:P31*/wdt:P279*wd:$F;}" -O $F.json
jq -r '.results | .[] | .[] | [.item.value] | @csv' $F.json >> $F.txt
awk -v qid=$F '{print $0 ","qid}' $F.txt | sed -e 's!"http://www.wikidata.org/entity/!!' | sed 's/"//g' >> $F.csv
cat $F.csv >> wikidata_place_dump.csv
rm $F.json $F.txt $F.csv
done < wikidata_place_types.txt
# import wikidata places
echo "CREATE TABLE wikidata_place_dump (item text, instance_of text);" | psqlcmd
echo "COPY wikidata_place_dump (item, instance_of) FROM '/srv/nominatim/Nominatim/data-sources/wikipedia-wikidata/wikidata_place_dump.csv' DELIMITER ',' CSV;" | psqlcmd
echo "CREATE TABLE wikidata_place_type_levels (place_type text, level integer);" | psqlcmd
echo "COPY wikidata_place_type_levels (place_type, level) FROM '/srv/nominatim/Nominatim/data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv' DELIMITER ',' CSV HEADER;" | psqlcmd
# create derived tables
echo "CREATE TABLE geo_earth_primary AS SELECT gt_page_id, gt_lat, gt_lon FROM geo_tags WHERE gt_globe = 'earth' AND gt_primary = 1 AND NOT( gt_lat < -90 OR gt_lat > 90 OR gt_lon < -180 OR gt_lon > 180 OR gt_lat=0 OR gt_lon=0) ;" | psqlcmd
echo "CREATE TABLE geo_earth_wikidata AS SELECT DISTINCT geo_earth_primary.gt_page_id, geo_earth_primary.gt_lat, geo_earth_primary.gt_lon, page.page_title, page.page_namespace FROM geo_earth_primary LEFT OUTER JOIN page ON (geo_earth_primary.gt_page_id = page.page_id) ORDER BY geo_earth_primary.gt_page_id;" | psqlcmd
echo "ALTER TABLE wikidata_place_dump ADD COLUMN ont_level integer, ADD COLUMN lat numeric(11,8), ADD COLUMN lon numeric(11,8);" | psqlcmd
echo "UPDATE wikidata_place_dump SET ont_level = wikidata_place_type_levels.level FROM wikidata_place_type_levels WHERE wikidata_place_dump.instance_of = wikidata_place_type_levels.place_type;" | psqlcmd
echo "CREATE TABLE wikidata_places AS SELECT DISTINCT ON (item) item, instance_of, MAX(ont_level) AS ont_level, lat, lon FROM wikidata_place_dump GROUP BY item, instance_of, ont_level, lat, lon ORDER BY item;" | psqlcmd
echo "UPDATE wikidata_places SET lat = geo_earth_wikidata.gt_lat, lon = geo_earth_wikidata.gt_lon FROM geo_earth_wikidata WHERE wikidata_places.item = geo_earth_wikidata.page_title" | psqlcmd
# process language pages
echo "CREATE TABLE wikidata_pages (item text, instance_of text, lat numeric(11,8), lon numeric(11,8), ips_site_page text, language text );" | psqlcmd
for i in "${language[@]}"
do
echo "CREATE TABLE wikidata_${i}_pages as select wikidata_places.item, wikidata_places.instance_of, wikidata_places.lat, wikidata_places.lon, wb_items_per_site.ips_site_page FROM wikidata_places LEFT JOIN wb_items_per_site ON (CAST (( LTRIM(wikidata_places.item, 'Q')) AS INTEGER) = wb_items_per_site.ips_item_id) WHERE ips_site_id = '${i}wiki' AND LEFT(wikidata_places.item,1) = 'Q' order by wikidata_places.item;" | psqlcmd
echo "ALTER TABLE wikidata_${i}_pages ADD COLUMN language text;" | psqlcmd
echo "UPDATE wikidata_${i}_pages SET language = '${i}';" | psqlcmd
echo "INSERT INTO wikidata_pages SELECT item, instance_of, lat, lon, ips_site_page, language FROM wikidata_${i}_pages;" | psqlcmd
done
echo "ALTER TABLE wikidata_pages ADD COLUMN wp_page_title text;" | psqlcmd
echo "UPDATE wikidata_pages SET wp_page_title = REPLACE(ips_site_page, ' ', '_');" | psqlcmd
echo "ALTER TABLE wikidata_pages DROP COLUMN ips_site_page;" | psqlcmd
# add wikidata to wikipedia_article table
echo "UPDATE wikipedia_article SET lat = wikidata_pages.lat, lon = wikidata_pages.lon, wd_page_title = wikidata_pages.item, instance_of = wikidata_pages.instance_of FROM wikidata_pages WHERE wikipedia_article.language = wikidata_pages.language AND wikipedia_article.title = wikidata_pages.wp_page_title;" | psqlcmd
echo "CREATE TABLE wikipedia_article_slim AS SELECT * FROM wikipedia_article WHERE wikidata_id IS NOT NULL;" | psqlcmd
echo "ALTER TABLE wikipedia_article RENAME TO wikipedia_article_full;" | psqlcmd
echo "ALTER TABLE wikipedia_article_slim RENAME TO wikipedia_article;" | psqlcmd
# clean up intermediate tables
echo "DROP TABLE wikidata_place_dump;" | psqlcmd
echo "DROP TABLE geo_earth_primary;" | psqlcmd
for i in "${language[@]}"
do
echo "DROP TABLE wikidata_${i}_pages;" | psqlcmd
done

View File

@@ -1,77 +0,0 @@
#!/bin/bash
psqlcmd() {
psql wikiprocessingdb
}
mysql2pgsqlcmd() {
./mysql2pgsql.perl /dev/stdin /dev/stdout
}
# list the languages to process (refer to List of Wikipedias here: https://en.wikipedia.org/wiki/List_of_Wikipedias)
language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
# create wikipedia calculation tables
echo "CREATE TABLE linkcounts (language text, title text, count integer, sumcount integer, lat double precision, lon double precision);" | psqlcmd
echo "CREATE TABLE wikipedia_article (language text NOT NULL, title text NOT NULL, langcount integer, othercount integer, totalcount integer, lat double precision, lon double precision, importance double precision, title_en text, osm_type character(1), osm_id bigint );" | psqlcmd
echo "CREATE TABLE wikipedia_redirect (language text, from_title text, to_title text );" | psqlcmd
# download individual wikipedia language tables
for i in "${language[@]}"
do
wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-page.sql.gz
wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-pagelinks.sql.gz
wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-langlinks.sql.gz
wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-redirect.sql.gz
done
# import individual wikipedia language tables
for i in "${language[@]}"
do
gzip -dc ${i}wiki-latest-pagelinks.sql.gz | sed "s/\`pagelinks\`/\`${i}pagelinks\`/g" | mysql2pgsqlcmd | psqlcmd
gzip -dc ${i}wiki-latest-page.sql.gz | sed "s/\`page\`/\`${i}page\`/g" | mysql2pgsqlcmd | psqlcmd
gzip -dc ${i}wiki-latest-langlinks.sql.gz | sed "s/\`langlinks\`/\`${i}langlinks\`/g" | mysql2pgsqlcmd | psqlcmd
gzip -dc ${i}wiki-latest-redirect.sql.gz | sed "s/\`redirect\`/\`${i}redirect\`/g" | mysql2pgsqlcmd | psqlcmd
done
# process language tables and associated pagelink counts
for i in "${language[@]}"
do
echo "create table ${i}pagelinkcount as select pl_title as title,count(*) as count from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | psqlcmd
echo "insert into linkcounts select '${i}',pl_title,count(*) from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | psqlcmd
echo "insert into wikipedia_redirect select '${i}',page_title,rd_title from ${i}redirect join ${i}page on (rd_from = page_id) where page_namespace = 0 and rd_namespace = 0;" | psqlcmd
echo "alter table ${i}pagelinkcount add column othercount integer;" | psqlcmd
echo "update ${i}pagelinkcount set othercount = 0;" | psqlcmd
for j in "${language[@]}"
do
echo "update ${i}pagelinkcount set othercount = ${i}pagelinkcount.othercount + x.count from (select page_title as title,count from ${i}langlinks join ${i}page on (ll_from = page_id) join ${j}pagelinkcount on (ll_lang = '${j}' and ll_title = title)) as x where x.title = ${i}pagelinkcount.title;" | psqlcmd
done
echo "insert into wikipedia_article select '${i}', title, count, othercount, count+othercount from ${i}pagelinkcount;" | psqlcmd
done
# calculate importance score for each wikipedia page
echo "update wikipedia_article set importance = log(totalcount)/log((select max(totalcount) from wikipedia_article))" | psqlcmd
# clean up intermediate tables to conserve space
for i in "${language[@]}"
do
echo "DROP TABLE ${i}pagelinks;" | psqlcmd
echo "DROP TABLE ${i}page;" | psqlcmd
echo "DROP TABLE ${i}langlinks;" | psqlcmd
echo "DROP TABLE ${i}redirect;" | psqlcmd
echo "DROP TABLE ${i}pagelinkcount;" | psqlcmd
done

View File

@@ -1,951 +0,0 @@
#!/usr/bin/perl -w
# mysql2pgsql
# MySQL to PostgreSQL dump file converter
#
# For usage: perl mysql2pgsql.perl --help
#
# ddl statments are changed but none or only minimal real data
# formatting are done.
# data consistency is up to the DBA.
#
# (c) 2004-2007 Jose M Duarte and Joseph Speigle ... gborg
#
# (c) 2000-2004 Maxim Rudensky <fonin@omnistaronline.com>
# (c) 2000 Valentine Danilchuk <valdan@ziet.zhitomir.ua>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# 3. All advertising materials mentioning features or use of this software
# must display the following acknowledgement:
# This product includes software developed by the Max Rudensky
# and its contributors.
# 4. Neither the name of the author nor the names of its contributors
# may be used to endorse or promote products derived from this software
# without specific prior written permission.
# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
use Getopt::Long;
use POSIX;
use strict;
use warnings;
# main sections
# -------------
# 1 variable declarations
# 2 subroutines
# 3 get commandline options and specify help statement
# 4 loop through file and process
# 5. print_plpgsql function prototype
#################################################################
# 1. variable declarations
#################################################################
# command line options
my( $ENC_IN, $ENC_OUT, $PRESERVE_CASE, $HELP, $DEBUG, $SCHEMA, $LOWERCASE, $CHAR2VARCHAR, $NODROP, $SEP_FILE, $opt_debug, $opt_help, $opt_schema, $opt_preserve_case, $opt_char2varchar, $opt_nodrop, $opt_sepfile, $opt_enc_in, $opt_enc_out );
# variables for constructing pre-create-table entities
my $pre_create_sql=''; # comments, 'enum' constraints preceding create table statement
my $auto_increment_seq= ''; # so we can easily substitute it if we need a default value
my $create_sql=''; # all the datatypes in the create table section
my $post_create_sql=''; # create indexes, foreign keys, table comments
my $function_create_sql = ''; # for the set (function,trigger) and CURRENT_TIMESTAMP ( function,trigger )
# constraints
my ($type, $column_valuesStr, @column_values, $value );
my %constraints=(); # holds values constraints used to emulate mysql datatypes (e.g. year, set)
# datatype conversion variables
my ( $index,$seq);
my ( $column_name, $col, $quoted_column);
my ( @year_holder, $year, $constraint_table_name);
my $table=""; # table_name for create sql statements
my $table_no_quotes=""; # table_name for create sql statements
my $sl = '^\s+\w+\s+'; # matches the column name
my $tables_first_timestamp_column= 1; # decision to print warnings about default_timestamp not being in postgres
my $mysql_numeric_datatypes = "TINYINT|SMALLINT|MEDIUMINT|INT|INTEGER|BIGINT|REAL|DOUBLE|FLOAT|DECIMAL|NUMERIC";
my $mysql_datetime_datatypes = "|DATE|TIME|TIMESTAMP|DATETIME|YEAR";
my $mysql_text_datatypes = "CHAR|VARCHAR|BINARY|VARBINARY|TINYBLOB|BLOB|MEDIUMBLOB|LONGBLOB|TINYTEXT|TEXT|MEDIUMTEXT|LONGTEXT|ENUM|SET";
my $mysql_datatypesStr = $mysql_numeric_datatypes . "|". $mysql_datetime_datatypes . "|". $mysql_text_datatypes ;
# handling INSERT INTO statements
my $rowRe = qr{
\( # opening parens
( # (start capture)
(?: # (start group)
' # string start
[^'\\]* # up to string-end or backslash (escape)
(?: # (start group)
\\. # gobble escaped character
[^'\\]* # up to string-end of backslash
)* # (end group, repeat zero or more)
' # string end
| # (OR)
.*? # everything else (not strings)
)* # (end group, repeat zero or more)
) # (end capture)
\) # closing parent
}x;
my ($insert_table, $valueString);
#
########################################################
# 2. subroutines
#
# get_identifier
# print_post_create_sql()
# quote_and_lc()
# make_plpgsql($table,$column_name) -- at end of file
########################################################
# returns an identifier with the given suffix doing controlled
# truncation if necessary
sub get_identifier($$$) {
my ($table, $col, $suffix) = @_;
my $name = '';
$table=~s/\"//g; # make sure that $table doesn't have quotes so we don't end up with redundant quoting
# in the case of multiple columns
my @cols = split(/,/,$col);
$col =~ s/,//g;
# in case all columns together too long we have to truncate them
if (length($col) > 55) {
my $totaltocut = length($col)-55;
my $tocut = ceil($totaltocut / @cols);
@cols = map {substr($_,0,abs(length($_)-$tocut))} @cols;
$col="";
foreach (@cols){
$col.=$_;
}
}
my $max_table_length = 63 - length("_${col}_$suffix");
if (length($table) > $max_table_length) {
$table = substr($table, length($table) - $max_table_length, $max_table_length);
}
return quote_and_lc("${table}_${col}_${suffix}");
}
#
#
# called when we encounter next CREATE TABLE statement
# also called at EOF to print out for last table
# prints comments, indexes, foreign key constraints (the latter 2 possibly to a separate file)
sub print_post_create_sql() {
my ( @create_idx_comments_constraints_commandsArr, $stmts, $table_field_combination);
my %stmts;
# loop to check for duplicates in $post_create_sql
# Needed because of duplicate key declarations ( PRIMARY KEY and KEY), auto_increment columns
@create_idx_comments_constraints_commandsArr = split(';\n?', $post_create_sql);
if ($SEP_FILE) {
open(SEP_FILE, ">>:encoding($ENC_OUT)", $SEP_FILE) or die "Unable to open $SEP_FILE for output: $!\n";
}
foreach (@create_idx_comments_constraints_commandsArr) {
if (m/CREATE INDEX "*(\S+)"*\s/i) { # CREATE INDEX korean_english_wordsize_idx ON korean_english USING btree (wordsize);
$table_field_combination = $1;
# if this particular table_field_combination was already used do not print the statement:
if ($SEP_FILE) {
print SEP_FILE "$_;\n" if !defined($stmts{$table_field_combination});
} else {
print OUT "$_;\n" if !defined($stmts{$table_field_combination});
}
$stmts{$table_field_combination} = 1;
}
elsif (m/COMMENT/i) { # COMMENT ON object IS 'text'; but comment may be part of table name so use 'elsif'
print OUT "$_;\n"
} else { # foreign key constraint or comments (those preceded by -- )
if ($SEP_FILE) {
print SEP_FILE "$_;\n";
} else {
print OUT "$_;\n"
}
}
}
if ($SEP_FILE) {
close SEP_FILE;
}
$post_create_sql='';
# empty %constraints for next " create table" statement
}
# quotes a string or a multicolumn string (comma separated)
# and optionally lowercase (if LOWERCASE is set)
# lowercase .... if user wants default postgres behavior
# quotes .... to preserve keywords and to preserve case when case-sensitive tables are to be used
sub quote_and_lc($)
{
my $col = shift;
if ($LOWERCASE) {
$col = lc($col);
}
if ($col =~ m/,/) {
my @cols = split(/,\s?/, $col);
@cols = map {"\"$_\""} @cols;
return join(', ', @cols);
} else {
return "\"$col\"";
}
}
########################################################
# 3. get commandline options and maybe print help
########################################################
GetOptions("help", "debug"=> \$opt_debug, "schema=s" => \$SCHEMA, "preserve_case" => \$opt_preserve_case, "char2varchar" => \$opt_char2varchar, "nodrop" => \$opt_nodrop, "sepfile=s" => \$opt_sepfile, "enc_in=s" => \$opt_enc_in, "enc_out=s" => \$opt_enc_out );
$HELP = $opt_help || 0;
$DEBUG = $opt_debug || 0;
$PRESERVE_CASE = $opt_preserve_case || 0;
if ($PRESERVE_CASE == 1) { $LOWERCASE = 0; }
else { $LOWERCASE = 1; }
$CHAR2VARCHAR = $opt_char2varchar || 0;
$NODROP = $opt_nodrop || 0;
$SEP_FILE = $opt_sepfile || 0;
$ENC_IN = $opt_enc_in || 'utf8';
$ENC_OUT = $opt_enc_out || 'utf8';
if (($HELP) || ! defined($ARGV[0]) || ! defined($ARGV[1])) {
print "\n\nUsage: perl $0 {--help --debug --preserve_case --char2varchar --nodrop --schema --sepfile --enc_in --enc_out } mysql.sql pg.sql\n";
print "\t* OPTIONS WITHOUT ARGS\n";
print "\t--help: prints this message \n";
print "\t--debug: output the commented-out mysql line above the postgres line in pg.sql \n";
print "\t--preserve_case: prevents automatic case-lowering of column and table names\n";
print "\t\tIf you want to preserve case, you must set this flag. For example,\n";
print "\t\tIf your client application quotes table and column-names and they have cases in them, set this flag\n";
print "\t--char2varchar: converts all char fields to varchar\n";
print "\t--nodrop: strips out DROP TABLE statements\n";
print "\t\totherise harmless warnings are printed by psql when the dropped table does not exist\n";
print "\n\t* OPTIONS WITH ARGS\n";
print "\t--schema: outputs a line into the postgres sql file setting search_path \n";
print "\t--sepfile: output foreign key constraints and indexes to a separate file so that it can be\n";
print "\t\timported after large data set is inserted from another dump file\n";
print "\t--enc_in: encoding of mysql in file (default utf8) \n";
print "\t--enc_out: encoding of postgres out file (default utf8) \n";
print "\n\t* REQUIRED ARGUMENTS\n";
if (defined ($ARGV[0])) {
print "\tmysql.sql ($ARGV[0])\n";
} else {
print "\tmysql.sql (undefined)\n";
}
if (defined ($ARGV[1])) {
print "\tpg.sql ($ARGV[1])\n";
} else {
print "\tpg.sql (undefined)\n";
}
print "\n";
exit 1;
}
########################################################
# 4. process through mysql_dump.sql file
# in a big loop
########################################################
# open in and out files
open(IN,"<:encoding($ENC_IN)", $ARGV[0]) || die "can't open mysql dump file $ARGV[0]";
open(OUT,">:encoding($ENC_OUT)", $ARGV[1]) || die "can't open pg dump file $ARGV[1]";
# output header
print OUT "--\n";
print OUT "-- Generated from mysql2pgsql.perl\n";
print OUT "-- http://gborg.postgresql.org/project/mysql2psql/\n";
print OUT "-- (c) 2001 - 2007 Jose M. Duarte, Joseph Speigle\n";
print OUT "--\n";
print OUT "\n";
print OUT "-- warnings are printed for drop tables if they do not exist\n";
print OUT "-- please see http://archives.postgresql.org/pgsql-novice/2004-10/msg00158.php\n\n";
print OUT "-- ##############################################################\n";
if ($SCHEMA ) {
print OUT "set search_path='" . $SCHEMA . "'\\g\n" ;
}
# loop through mysql file on a per-line basis
while(<IN>) {
############## flow #########################
# (the lines are directed to different string variables at different times)
#
# handle drop table , unlock, connect statements
# if ( start of create table) {
# print out post_create table (indexes, foreign key constraints, comments from previous table)
# add drop table statement if !$NODROP to pre_create_sql
# next;
# }
# else if ( inside create table) {
# add comments in this portion to create_sql
# if ( end of create table) {
# delete mysql-unique CREATE TABLE commands
# print pre_create_sql
# print the constraint tables for set and year datatypes
# print create_sql
# print function_create_sql (this is for the enum columns only)
# next;
# }
# do substitutions
# -- NUMERIC DATATYPES
# -- CHARACTER DATATYPES
# -- DATE AND TIME DATATYPES
# -- KEY AND UNIQUE CREATIONS
# and append them to create_sql
# } else {
# print inserts on-the-spot (this script only changes default timestamp of 0000-00-00)
# }
# LOOP until EOF
#
########################################################
if (!/^\s*insert into/i) { # not inside create table so don't worry about data corruption
s/`//g; # '`pgsql uses no backticks to denote table name (CREATE TABLE `sd`) or around field
# and table names like mysql
# doh! we hope all dashes and special chars are caught by the regular expressions :)
}
if (/^\s*USE\s*([^;]*);/) {
print OUT "\\c ". $1;
next;
}
if (/^(UN)?LOCK TABLES/i || /drop\s+table/i ) {
# skip
# DROP TABLE is added when we see the CREATE TABLE
next;
}
if (/(create\s+table\s+)([-_\w]+)\s/i) { # example: CREATE TABLE `english_english`
print_post_create_sql(); # for last table
$tables_first_timestamp_column= 1; # decision to print warnings about default_timestamp not being in postgres
$create_sql = '';
$table_no_quotes = $2 ;
$table=quote_and_lc($2);
if ( !$NODROP ) { # always print drop table if user doesn't explicitly say not to
# to drop a table that is referenced by a view or a foreign-key constraint of another table,
# CASCADE must be specified. (CASCADE will remove a dependent view entirely, but in the
# in the foreign-key case it will only remove the foreign-key constraint, not the other table entirely.)
# (source: 8.1.3 docs, section "drop table")
warn "table $table will be dropped CASCADE\n";
$pre_create_sql .= "DROP TABLE $table CASCADE;\n"; # custom dumps may be missing the 'dump' commands
}
s/(create\s+table\s+)([-_\w]+)\s/$1 $table /i;
if ($DEBUG) {
$create_sql .= '-- ' . $_;
}
$create_sql .= $_;
next;
}
if ($create_sql ne "") { # we are inside create table statement so lets process datatypes
# print out comments or empty lines in context
if ($DEBUG) {
$create_sql .= '-- ' . $_;
}
if (/^#/ || /^$/ || /^\s*--/) {
s/^#/--/; # Two hyphens (--) is the SQL-92 standard indicator for comments
$create_sql.=$_;
next;
}
if (/\).*;/i) { # end of create table squence
s/INSERT METHOD[=\s+][^;\s]+//i;
s/PASSWORD=[^;\s]+//i;
s/ROW_FORMAT=(?:DEFAULT|DYNAMIC|FIXED|COMPRESSED|REDUNDANT|COMPACT)+//i;
s/KEY_BLOCK_SIZE=8//i;
s/DELAY KEY WRITE=[^;\s]+//i;
s/INDEX DIRECTORY[=\s+][^;\s]+//i;
s/DATA DIRECTORY=[^;\s]+//i;
s/CONNECTION=[^;\s]+//i;
s/CHECKSUM=[^;\s]+//i;
s/Type=[^;\s]+//i; # ISAM , # older versions
s/COLLATE=[^;\s]+//i; # table's collate
s/COLLATE\s+[^;\s]+//i; # table's collate
# possible AUTO_INCREMENT starting index, it is used in mysql 5.0.26, not sure since which version
if (/AUTO_INCREMENT=(\d+)/i) {
# should take < ---- ) ENGINE=MyISAM AUTO_INCREMENT=16 DEFAULT CHARSET=latin1;
# and should ouput ---> CREATE SEQUENCE "rhm_host_info_id_seq" START WITH 16;
my $start_value = $1;
print $auto_increment_seq . "--\n";
# print $pre_create_sql . "--\n";
$pre_create_sql =~ s/(CREATE SEQUENCE $auto_increment_seq )/$1 START WITH $start_value /;
}
s/AUTO_INCREMENT=\d+//i;
s/PACK_KEYS=\d//i; # mysql 5.0.22
s/DEFAULT CHARSET=[^;\s]+//i; # my mysql version is 4.1.11
s/ENGINE\s*=\s*[^;\s]+//i; # my mysql version is 4.1.11
s/ROW_FORMAT=[^;\s]+//i; # my mysql version is 5.0.22
s/KEY_BLOCK_SIZE=8//i;
s/MIN_ROWS=[^;\s]+//i;
s/MAX_ROWS=[^;\s]+//i;
s/AVG_ROW_LENGTH=[^;\s]+//i;
if (/COMMENT='([^']*)'/) { # ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='must be country zones';
$post_create_sql.="COMMENT ON TABLE $table IS '$1'\;"; # COMMENT ON table_name IS 'text';
s/COMMENT='[^']*'//i;
}
$create_sql =~ s/,$//g; # strip last , inside create table
# make sure we end in a comma, as KEY statments are turned
# into post_create_sql indices
# they often are the last line so leaving a 'hanging comma'
my @array = split("\n", $create_sql);
for (my $a = $#array; $a >= 0; $a--) { #loop backwards
if ($a == $#array && $array[$a] =~ m/,\s*$/) { # for last line
$array[$a] =~ s/,\s*$//;
next;
}
if ($array[$a] !~ m/create table/i) { # i.e. if there was more than one column in table
if ($a != $#array && $array[$a] !~ m/,\s*$/ ) { # for second to last
$array[$a] =~ s/$/,/;
last;
}
elsif ($a != $#array && $array[$a] =~ m/,\s*$/ ) { # for second to last
last;
}
}
}
$create_sql = join("\n", @array) . "\n";
$create_sql .= $_;
# put comments out first
print OUT $pre_create_sql;
# create separate table to reference and to hold mysql's possible set data-type
# values. do that table's creation before create table
# definition
foreach $column_name (keys %constraints) {
$type=$constraints{$column_name}{'type'};
$column_valuesStr = $constraints{$column_name}{'values'};
$constraint_table_name = get_identifier(${table},${column_name} ,"constraint_table");
if ($type eq 'set') {
print OUT qq~DROP TABLE $constraint_table_name CASCADE\\g\n~ ;
print OUT qq~create table $constraint_table_name ( set_values varchar UNIQUE)\\g\n~ ;
$function_create_sql .= make_plpgsql($table,$column_name);
} elsif ($type eq 'year') {
print OUT qq~DROP TABLE $constraint_table_name CASCADE\\g\n~ ;
print OUT qq~create table $constraint_table_name ( year_values varchar UNIQUE)\\g\n~ ;
}
@column_values = split /,/, $column_valuesStr;
foreach $value (@column_values) {
print OUT qq~insert into $constraint_table_name values ( $value )\\g\n~; # ad ' for ints and varchars
}
}
$create_sql =~ s/double double/double precision/g;
# print create table and reset create table vars
# when moving from each "create table" to "insert" part of dump
print OUT $create_sql;
print OUT $function_create_sql;
$pre_create_sql="";
$auto_increment_seq="";
$create_sql="";
$function_create_sql='';
%constraints=();
# the post_create_sql for this table is output at the beginning of the next table def
# in case we want to make indexes after doing inserting
next;
}
if (/^\s*(\w+)\s+.*COMMENT\s*'([^']*)'/) { #`zone_country_id` int(11) COMMENT 'column comment here',
$quoted_column=quote_and_lc($1);
$post_create_sql.="COMMENT ON COLUMN $table"."."." $quoted_column IS '$2'\;"; # COMMENT ON table_name.column_name IS 'text';
s/COMMENT\s*'[^']*'//i;
}
# NUMERIC DATATYPES
#
# auto_increment -> sequences
# UNSIGNED conversions
# TINYINT
# SMALLINT
# MEDIUMINT
# INT, INTEGER
# BIGINT
#
# DOUBLE [PRECISION], REAL
# DECIMAL(M,D), NUMERIC(M,D)
# FLOAT(p)
# FLOAT
s/(\w*int)\(\d+\)/$1/g; # hack of the (n) stuff for e.g. mediumint(2) int(3)
if (/^(\s*)(\w+)\s*.*numeric.*auto_increment/i) { # int,auto_increment -> serial
$seq = get_identifier($table, $2, 'seq');
$quoted_column=quote_and_lc($2);
# Smash datatype to int8 and autogenerate the sequence.
s/^(\s*)(\w+)\s*.*NUMERIC(.*)auto_increment([^,]*)/$1 $quoted_column serial8 $4/ig;
$create_sql.=$_;
next;
}
if (/^\s*(\w+)\s+.*int.*auto_increment/i) { # example: data_id mediumint(8) unsigned NOT NULL auto_increment,
$seq = get_identifier($table, $1, 'seq');
$quoted_column=quote_and_lc($1);
s/(\s*)(\w+)\s+.*int.*auto_increment([^,]*)/$1 $quoted_column serial8 $3/ig;
$create_sql.=$_;
next;
}
# convert UNSIGNED to CHECK constraints
if (m/^(\s*)(\w+)\s+((float|double precision|double|real|decimal|numeric))(.*)unsigned/i) {
$quoted_column = quote_and_lc($2);
s/^(\s*)(\w+)\s+((float|double precision|double|real|decimal|numeric))(.*)unsigned/$1 $quoted_column $3 $4 CHECK ($quoted_column >= 0)/i;
}
# example: `wordsize` tinyint(3) unsigned default NULL,
if (m/^(\s+)(\w+)\s+(\w+)\s+unsigned/i) {
$quoted_column=quote_and_lc($2);
s/^(\s+)(\w+)\s+(\w+)\s+unsigned/$1 $quoted_column $3 CHECK ($quoted_column >= 0)/i;
}
if (m/^(\s*)(\w+)\s+(bigint.*)unsigned/) {
$quoted_column=quote_and_lc($2);
# see http://archives.postgresql.org/pgsql-general/2005-07/msg01178.php
# and see http://www.postgresql.org/docs/8.2/interactive/datatype-numeric.html
# see http://dev.mysql.com/doc/refman/5.1/en/numeric-types.html max size == 20 digits
s/^(\s*)(\w+)\s+bigint(.*)unsigned/$1 $quoted_column NUMERIC (20,0) CHECK ($quoted_column >= 0)/i;
}
# int type conversion
# TINYINT (signed) -128 to 127 (unsigned) 0 255
# SMALLINT A small integer. The signed range is -32768 to 32767. The unsigned range is 0 to 65535.
# MEDIUMINT A medium-sized integer. The signed range is -8388608 to 8388607. The unsigned range is 0 to 16777215.
# INT A normal-size integer. The signed range is -2147483648 to 2147483647. The unsigned range is 0 to 4294967295.
# BIGINT The signed range is -9223372036854775808 to 9223372036854775807. The unsigned range is 0 to 18446744073709551615
# for postgres see http://www.postgresql.org/docs/8.2/static/datatype-numeric.html#DATATYPE-INT
s/^(\s+"*\w+"*\s+)tinyint/$1 smallint/i;
s/^(\s+"*\w+"*\s+)mediumint/$1 integer/i;
# the floating point types
# double -> double precision
# double(n,m) -> double precision
# float - no need for conversion
# float(n) - no need for conversion
# float(n,m) -> double precision
s/(^\s*\w+\s+)double(\(\d+,\d+\))?/$1float/i;
s/float(\(\d+,\d+\))/float/i;
#
# CHARACTER TYPES
#
# set
# enum
# binary(M), VARBINARy(M), tinyblob, tinytext,
# bit
# char(M), varchar(M)
# blob -> text
# mediumblob
# longblob, longtext
# text -> text
# mediumtext
# longtext
# mysql docs: A BLOB is a binary large object that can hold a variable amount of data.
# set
# For example, a column specified as SET('one', 'two') NOT NULL can have any of these values:
# ''
# 'one'
# 'two'
# 'one,two'
if (/(\w*)\s+set\(((?:['"]\w+['"]\s*,*)+(?:['"]\w+['"])*)\)(.*)$/i) { # example: `au_auth` set('r','w','d') NOT NULL default '',
$column_name = $1;
$constraints{$column_name}{'values'} = $2; # 'abc','def', ...
$constraints{$column_name}{'type'} = "set"; # 'abc','def', ...
$_ = qq~ $column_name varchar , ~;
$column_name = quote_and_lc($1);
$create_sql.=$_;
next;
}
if (/(\S*)\s+enum\(((?:['"][^'"]+['"]\s*,)+['"][^'"]+['"])\)(.*)$/i) { # enum handling
# example: `test` enum('?','+','-') NOT NULL default '?'
# $2 is the values of the enum 'abc','def', ...
$quoted_column=quote_and_lc($1);
# "test" NOT NULL default '?' CONSTRAINT test_test_constraint CHECK ("test" IN ('?','+','-'))
$_ = qq~ $quoted_column varchar CHECK ($quoted_column IN ( $2 ))$3\n~; # just assume varchar?
$create_sql.=$_;
next;
}
# Take care of "binary" option for char and varchar
# (pre-4.1.2, it indicated a byte array; from 4.1.2, indicates
# a binary collation)
s/(?:var)?char(?:\(\d+\))? (?:byte|binary)/text/i;
if (m/(?:var)?binary\s*\(\d+\)/i) { # c varBINARY(3) in Mysql
warn "WARNING in table '$table' '$_': binary type is converted to bytea (unsized) for Postgres\n";
}
s/(?:var)?binary(?:\(\d+\))?/text/i; # c varBINARY(3) in Mysql
s/bit(?:\(\d+\))?/bytea/i; # bit datatype -> bytea
# large datatypes
s/\w*blob/bytea/gi;
s/tinytext/text/gi;
s/mediumtext/text/gi;
s/longtext/text/gi;
# char -> varchar -- if specified as a command line option
# PostgreSQL would otherwise pad with spaces as opposed
# to MySQL! Your user interface may depend on this!
if ($CHAR2VARCHAR) {
s/(^\s+\S+\s+)char/${1}varchar/gi;
}
# nuke column's collate and character set
s/(\S+)\s+character\s+set\s+\w+/$1/gi;
s/(\S+)\s+collate\s+\w+/$1/gi;
#
# DATE AND TIME TYPES
#
# date time
# year
# datetime
# timestamp
# date time
# these are the same types in postgres, just do the replacement of 0000-00-00 date
if (m/default '(\d+)-(\d+)-(\d+)([^']*)'/i) { # we grab the year, month and day
# NOTE: times of 00:00:00 are possible and are okay
my $time = '';
my $year=$1;
my $month= $2;
my $day = $3;
if ($4) {
$time = $4;
}
if ($year eq "0000") { $year = '1970'; }
if ($month eq "00") { $month = '01'; }
if ($day eq "00") { $day = '01'; }
s/default '[^']+'/default '$year-$month-$day$time'/i; # finally we replace with $datetime
}
# convert mysql's year datatype to a constraint
if (/(\w*)\s+year\(4\)(.*)$/i) { # can be integer OR string 1901-2155
$constraint_table_name = get_identifier($table,$1 ,"constraint_table");
$column_name=quote_and_lc($1);
@year_holder = ();
$year='';
for (1901 .. 2155) {
$year = "'$_'";
unless ($year =~ /2155/) { $year .= ','; }
push( @year_holder, $year);
}
$constraints{$column_name}{'values'} = join('','',@year_holder); # '1901','1902', ...
$constraints{$column_name}{'type'} = "year";
$_ = qq~ $column_name varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES $constraint_table_name ("year_values") $2\n~;
$create_sql.=$_;
next;
} elsif (/(\w*)\s+year\(2\)(.*)$/i) { # same for a 2-integer string
$constraint_table_name = get_identifier($table,$1 ,"constraint_table");
$column_name=quote_and_lc($1);
@year_holder = ();
$year='';
for (1970 .. 2069) {
$year = "'$_'";
if ($year =~ /2069/) { next; }
push( @year_holder, $year);
}
push( @year_holder, '0000');
$constraints{$column_name}{'values'} = join(',',@year_holder); # '1971','1972', ...
$constraints{$column_name}{'type'} = "year"; # 'abc','def', ...
$_ = qq~ $1 varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES $constraint_table_name ("year_values") $2\n~;
$create_sql.=$_;
next;
}
# datetime
# Default on a dump from MySQL 5.0.22 is in the same form as datetime so let it flow down
# to the timestamp section and deal with it there
s/(${sl})datetime /$1timestamp without time zone /i;
# change not null datetime field to null valid ones
# (to support remapping of "zero time" to null
# s/($sl)datetime not null/$1timestamp without time zone/i;
# timestamps
#
# nuke datetime representation (not supported in PostgreSQL)
# change default time of 0000-00-00 to 1970-01-01
# we may possibly need to create a trigger to provide
# equal functionality with ON UPDATE CURRENT TIMESTAMP
if (m/${sl}timestamp/i) {
if ( m/ON UPDATE CURRENT_TIMESTAMP/i ) { # the ... default CURRENT_TIMESTAMP only applies for blank inserts, not updates
s/ON UPDATE CURRENT_TIMESTAMP//i ;
m/^\s*(\w+)\s+timestamp/i ;
# automatic trigger creation
$table_no_quotes =~ s/"//g;
$function_create_sql .= " CREATE OR REPLACE FUNCTION update_". $table_no_quotes . "() RETURNS trigger AS '
BEGIN
NEW.$1 := CURRENT_TIMESTAMP;
RETURN NEW;
END;
' LANGUAGE 'plpgsql';
-- before INSERT is handled by 'default CURRENT_TIMESTAMP'
CREATE TRIGGER add_current_date_to_".$table_no_quotes." BEFORE UPDATE ON ". $table . " FOR EACH ROW EXECUTE PROCEDURE
update_".$table_no_quotes."();\n";
}
if ($tables_first_timestamp_column && m/DEFAULT NULL/i) {
# DEFAULT NULL is the same as DEFAULT CURRENT_TIMESTAMP for the first TIMESTAMP column. (MYSQL manual)
s/($sl)(timestamp\s+)default null/$1 $2 DEFAULT CURRENT_TIMESTAMP/i;
}
$tables_first_timestamp_column= 0;
if (m/${sl}timestamp\s*\(\d+\)/i) { # fix for timestamps with width spec not handled (ID: 1628)
warn "WARNING for in table '$table' '$_': your default timestamp width is being ignored for table $table \n";
s/($sl)timestamp(?:\(\d+\))/$1datetime/i;
}
} # end timestamp section
# KEY AND UNIQUE CREATIONS
#
# unique
if ( /^\s+unique\s+\(([^(]+)\)/i ) { # example UNIQUE `name` (`name`), same as UNIQUE KEY
# POSTGRESQL: treat same as mysql unique
$quoted_column = quote_and_lc($1);
s/\s+unique\s+\(([^(]+)\)/ unique ($quoted_column) /i;
$create_sql.=$_;
next;
} elsif ( /^\s+unique\s+key\s*(\w+)\s*\(([^(]+)\)/i ) { # example UNIQUE KEY `name` (`name`)
# MYSQL: unique key: allows null=YES, allows duplicates=NO (*)
# ... new ... UNIQUE KEY `unique_fullname` (`fullname`) in my mysql v. Ver 14.12 Distrib 5.1.7-beta
# POSTGRESQL: treat same as mysql unique
# just quote columns
$quoted_column = quote_and_lc($2);
s/\s+unique\s+key\s*(\w+)\s*\(([^(]+)\)/ unique ($quoted_column) /i;
$create_sql.=$_;
# the index corresponding to the 'key' is automatically created
next;
}
# keys
if ( /^\s+fulltext key\s+/i) { # example: FULLTEXT KEY `commenttext` (`commenttext`)
# that is key as a word in the first check for a match
# the tsvector datatype is made for these types of things
# example mysql file:
# what is tsvector datatype?
# http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html
warn "dba must do fulltext key transformation for $table\n";
next;
}
if ( /^(\s+)constraint (\S+) foreign key \((\S+)\) references (\S+) \((\S+)\)(.*)/i ) {
$quoted_column =quote_and_lc($3);
$col=quote_and_lc($5);
$post_create_sql .= "ALTER TABLE $table ADD FOREIGN KEY ($quoted_column) REFERENCES " . quote_and_lc($4) . " ($col);\n";
next;
}
if ( /^\s*primary key\s*\(([^)]+)\)([,\s]+)/i ) { # example PRIMARY KEY (`name`)
# MYSQL: primary key: allows null=NO , allows duplicates=NO
# POSTGRESQL: When an index is declared unique, multiple table rows with equal indexed values will not be
# allowed. Null values are not considered equal.
# POSTGRESQL quote's source: 8.1.3 docs section 11.5 "unique indexes"
# so, in postgres, we need to add a NOT NULL to the UNIQUE constraint
# and, primary key (mysql) == primary key (postgres) so that we *really* don't need change anything
$quoted_column = quote_and_lc($1);
s/(\s*)primary key\s+\(([^)]+)\)([,\s]+)/$1 primary key ($quoted_column)$3/i;
# indexes are automatically created for unique columns
$create_sql.=$_;
next;
} elsif (m/^\s+key\s[-_\s\w]+\((.+)\)/i ) { # example: KEY `idx_mod_english_def_word` (`word`),
# regular key: allows null=YES, allows duplicates=YES
# MYSQL: KEY is normally a synonym for INDEX. http://dev.mysql.com/doc/refman/5.1/en/create-table.html
#
# * MySQL: ALTER TABLE {$table} ADD KEY $column ($column)
# * PostgreSQL: CREATE INDEX {$table}_$column_idx ON {$table}($column) // Please note the _idx "extension"
# PRIMARY KEY (`postid`),
# KEY `ownerid` (`ownerid`)
# create an index for everything which has a key listed for it.
my $col = $1;
# TODO we don't have a translation for the substring syntax in text columns in MySQL (e.g. "KEY my_idx (mytextcol(20))")
# for now just getting rid of the brackets and numbers (the substring specifier):
$col=~s/\(\d+\)//g;
$quoted_column = quote_and_lc($col);
if ($col =~ m/,/) {
$col = s/,/_/;
}
$index = get_identifier($table, $col, 'idx');
$post_create_sql.="CREATE INDEX $index ON $table USING btree ($quoted_column)\;";
# just create index do not add to create table statement
next;
}
# handle 'key' declared at end of column
if (/\w+.*primary key/i) { # mysql: key is normally just a synonym for index
# just leave as is ( postgres has primary key type)
} elsif (/(\w+\s+(?:$mysql_datatypesStr)\s+.*)key/i) { # mysql: key is normally just a synonym for index
# I can't find a reference for 'key' in a postgres command without using the word 'primary key'
s/$1key/$1/i ;
$index = get_identifier($table, $1, 'idx');
$quoted_column =quote_and_lc($1);
$post_create_sql.="CREATE INDEX $index ON $table USING btree ($quoted_column) \;";
$create_sql.=$_;
}
# do we really need this anymore?
# remap colums with names of existing system attribute
if (/"oid"/i) {
s/"oid"/"_oid"/g;
print STDERR "WARNING: table $table uses column \"oid\" which is renamed to \"_oid\"\nYou should fix application manually! Press return to continue.";
my $wait=<STDIN>;
}
s/oid/_oid/i if (/key/i && /oid/i); # fix oid in key
# FINAL QUOTING OF ALL COLUMNS
# quote column names which were not already quoted
# perhaps they were not quoted because they were not explicitly handled
if (!/^\s*"(\w+)"(\s+)/i) {
/^(\s*)(\w+)(\s+)(.*)$/i ;
$quoted_column= quote_and_lc($2);
s/^(\s*)(\w+)(\s+)(.*)$/$1 $quoted_column $3 $4 /;
}
$create_sql.=$_;
# END of if ($create_sql ne "") i.e. were inside create table statement so processed datatypes
}
# add "not in create table" comments or empty lines to pre_create_sql
elsif (/^#/ || /^$/ || /^\s*--/) {
s/^#/--/; # Two hyphens (--) is the SQL-92 standard indicator for comments
$pre_create_sql .= $_ ; # printed above create table statement
next;
}
elsif (/^\s*insert into/i) { # not inside create table and doing insert
# fix mysql's zero/null value for timestamps
s/'0000-00-00/'1970-01-01/gi;
# commented out to fix bug "Field contents interpreted as a timestamp", what was the point of this line anyway?
#s/([12]\d\d\d)([01]\d)([0-3]\d)([0-2]\d)([0-6]\d)([0-6]\d)/'$1-$2-$3 $4:$5:$6'/;
#---- fix data in inserted data: (from MS world)
s!\x96!-!g; # --
s!\x93!"!g; # ``
s!\x94!"!g; # ''
s!\x85!... !g; # \ldots
s!\x92!`!g;
print OUT $pre_create_sql; # print comments preceding the insert section
$pre_create_sql="";
$auto_increment_seq = "";
s/'((?:[^'\\]++|\\.)*+)'(?=[),])/E'$1'/g;
# for the E'' see http://www.postgresql.org/docs/8.2/interactive/release-8-1.html
s!\\\\!\\\\\\\\!g; # replace \\ with ]\\\\
# split 'extended' INSERT INTO statements to something PostgreSQL can understand
( $insert_table, $valueString) = $_ =~ m/^INSERT\s+INTO\s+['`"]*(.*?)['`"]*\s+VALUES\s*(.*)/i;
$insert_table = quote_and_lc($insert_table);
s/^INSERT INTO.*?\);//i; # hose the statement which is to be replaced whether a run-on or not
# guarantee table names are quoted
print OUT qq(INSERT INTO $insert_table VALUES $valueString \n);
} else {
print OUT $_ ; # example: /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
}
# keep looping and get next line of IN file
} # END while(<IN>)
print_post_create_sql(); # in case there is extra from the last table
#################################################################
# 5. print_plgsql function prototype
# emulate the set datatype with the following plpgsql function
# looks ugly so putting at end of file
#################################################################
#
sub make_plpgsql {
my ($table,$column_name) = ($_[0],$_[1]);
$table=~s/\"//g; # make sure that $table doesn't have quotes so we don't end up with redundant quoting
my $constraint_table = get_identifier($table,$column_name ,"constraint_table");
return "
-- this function is called by the insert/update trigger
-- it checks if the INSERT/UPDATE for the 'set' column
-- contains members which comprise a valid mysql set
-- this TRIGGER function therefore acts like a constraint
-- provided limited functionality for mysql's set datatype
-- just verifies and matches for string representations of the set at this point
-- though the set datatype uses bit comparisons, the only supported arguments to our
-- set datatype are VARCHAR arguments
-- to add a member to the set add it to the ".$table."_".$column_name." table
CREATE OR REPLACE FUNCTION check_".$table."_".$column_name."_set( ) RETURNS TRIGGER AS \$\$\n
DECLARE
----
arg_str VARCHAR ;
argx VARCHAR := '';
nobreak INT := 1;
rec_count INT := 0;
psn INT := 0;
str_in VARCHAR := NEW.$column_name;
----
BEGIN
----
IF str_in IS NULL THEN RETURN NEW ; END IF;
arg_str := REGEXP_REPLACE(str_in, '\\',\\'', ','); -- str_in is CONSTANT
arg_str := REGEXP_REPLACE(arg_str, '^\\'', '');
arg_str := REGEXP_REPLACE(arg_str, '\\'\$', '');
-- RAISE NOTICE 'arg_str %',arg_str;
psn := POSITION(',' in arg_str);
IF psn > 0 THEN
psn := psn - 1; -- minus-1 from comma position
-- RAISE NOTICE 'psn %',psn;
argx := SUBSTRING(arg_str FROM 1 FOR psn); -- get one set member
psn := psn + 2; -- go to first starting letter
arg_str := SUBSTRING(arg_str FROM psn); -- hack it off
ELSE
psn := 0; -- minus-1 from comma position
argx := arg_str;
END IF;
-- RAISE NOTICE 'argx %',argx;
-- RAISE NOTICE 'new arg_str: %',arg_str;
WHILE nobreak LOOP
EXECUTE 'SELECT count(*) FROM $constraint_table WHERE set_values = ' || quote_literal(argx) INTO rec_count;
IF rec_count = 0 THEN RAISE EXCEPTION 'one of the set values was not found';
END IF;
IF psn > 0 THEN
psn := psn - 1; -- minus-1 from comma position
-- RAISE NOTICE 'psn %',psn;
argx := SUBSTRING(arg_str FROM 1 FOR psn); -- get one set member
psn := psn + 2; -- go to first starting letter
arg_str := SUBSTRING(arg_str FROM psn); -- hack it off
psn := POSITION(',' in arg_str);
ELSE nobreak = 0;
END IF;
-- RAISE NOTICE 'next argx % and next arg_str %', argx, arg_str;
END LOOP;
RETURN NEW;
----
END;
\$\$ LANGUAGE 'plpgsql' VOLATILE;
drop trigger set_test ON $table;
-- make a trigger for each set field
-- make trigger and hard-code in column names
-- see http://archives.postgresql.org/pgsql-interfaces/2005-02/msg00020.php
CREATE TRIGGER set_test
BEFORE INSERT OR UPDATE ON $table FOR EACH ROW
EXECUTE PROCEDURE check_".$table."_".$column_name."_set();\n";
} # end sub make_plpgsql();

View File

@@ -1,199 +0,0 @@
place_type,level
Q9842,4
Q9430,3
Q928830,4
Q9259,1
Q91028,5
Q8514,2
Q8502,2
Q83405,3
Q82794,2
Q820477,1
Q811979,1
Q8072,2
Q79007,2
Q786014,3
Q75848,2
Q75520,2
Q728937,4
Q7275,2
Q719456,3
Q7075,3
Q697295,4
Q6852233,2
Q682943,3
Q665487,5
Q655686,3
Q643589,5
Q641226,2
Q631305,2
Q6256,2
Q6023295,2
Q5773747,5
Q56061,1
Q55659167,4
Q55488,4
Q55465477,3
Q54050,2
Q532,3
Q53060,2
Q52177058,4
Q515716,5
Q5153984,4
Q515,3
Q5144960,5
Q5119,4
Q5119,4
Q5107,2
Q5084,4
Q5031071,4
Q5003624,2
Q4989906,1
Q4976993,3
Q486972,1
Q486972,2
Q483110,3
Q4830453,4
Q47521,3
Q473972,1
Q46831,2
Q46614560,5
Q44782,3
Q44613,4
Q44539,4
Q44494,2
Q44377,2
Q4421,2
Q43501,2
Q4286337,3
Q42523,3
Q41176,2
Q40357,3
Q4022,4
Q40080,2
Q39816,2
Q39715,3
Q39614,1
Q3957,3
Q3947,4
Q3914,3
Q38723,2
Q38720,3
Q3623867,5
Q35666,2
Q355304,3
Q35509,2
Q35112127,3
Q34985575,4
Q34876,5
Q34763,2
Q34627,4
Q3455524,3
Q34442,4
Q33837,2
Q33506,3
Q32815,4
Q3257686,2
Q3240715,2
Q3191695,5
Q3153117,2
Q30198,2
Q30139652,3
Q294422,3
Q2870166,3
Q27686,3
Q274153,3
Q271669,1
Q2659904,2
Q24529780,2
Q24354,3
Q2354973,4
Q23442,2
Q23413,3
Q23397,3
Q2327515,4
Q2311958,5
Q22927291,6
Q22698,1
Q2175765,4
Q205495,4
Q204832,3
Q2042028,2
Q202216,6
Q1970725,3
Q194203,5
Q194195,2
Q190429,2
Q185187,3
Q185113,2
Q183366,2
Q1799794,1
Q1788454,4
Q1785071,3
Q1777138,3
Q177634,2
Q177380,2
Q174814,4
Q174782,2
Q17350442,2
Q17343829,3
Q17334923,0
Q17018380,3
Q16970,4
Q16917,3
Q16831714,4
Q165,3
Q160742,4
Q159719,3
Q159334,4
Q15640612,5
Q15324,2
Q15284,5
Q15243209,6
Q152081,1
Q15195406,4
Q1500350,5
Q149621,5
Q14757767,4
Q14350,3
Q1410668,3
Q1394476,3
Q1377575,2
Q1353183,3
Q134447,4
Q133215,3
Q133056,2
Q13221722,3
Q13220204,2
Q1311958,4
Q1303167,3
Q130003,3
Q12518,2
Q12516,3
Q1248784,3
Q123705,3
Q12323,3
Q12284,4
Q12280,4
Q121359,2
Q1210950,2
Q11755880,3
Q11707,3
Q11315,3
Q11303,3
Q1115575,4
Q1107656,1
Q10864048,1
Q1076486,2
Q105731,3
Q105190,3
Q1048525,3
Q102496,5
Q28872924,1
Q15617994,1
Q159313,2
Q24398318,3
Q327333,2
Q43229,1
Q860861,1
Q4989906,1
1 place_type level
2 Q9842 4
3 Q9430 3
4 Q928830 4
5 Q9259 1
6 Q91028 5
7 Q8514 2
8 Q8502 2
9 Q83405 3
10 Q82794 2
11 Q820477 1
12 Q811979 1
13 Q8072 2
14 Q79007 2
15 Q786014 3
16 Q75848 2
17 Q75520 2
18 Q728937 4
19 Q7275 2
20 Q719456 3
21 Q7075 3
22 Q697295 4
23 Q6852233 2
24 Q682943 3
25 Q665487 5
26 Q655686 3
27 Q643589 5
28 Q641226 2
29 Q631305 2
30 Q6256 2
31 Q6023295 2
32 Q5773747 5
33 Q56061 1
34 Q55659167 4
35 Q55488 4
36 Q55465477 3
37 Q54050 2
38 Q532 3
39 Q53060 2
40 Q52177058 4
41 Q515716 5
42 Q5153984 4
43 Q515 3
44 Q5144960 5
45 Q5119 4
46 Q5119 4
47 Q5107 2
48 Q5084 4
49 Q5031071 4
50 Q5003624 2
51 Q4989906 1
52 Q4976993 3
53 Q486972 1
54 Q486972 2
55 Q483110 3
56 Q4830453 4
57 Q47521 3
58 Q473972 1
59 Q46831 2
60 Q46614560 5
61 Q44782 3
62 Q44613 4
63 Q44539 4
64 Q44494 2
65 Q44377 2
66 Q4421 2
67 Q43501 2
68 Q4286337 3
69 Q42523 3
70 Q41176 2
71 Q40357 3
72 Q4022 4
73 Q40080 2
74 Q39816 2
75 Q39715 3
76 Q39614 1
77 Q3957 3
78 Q3947 4
79 Q3914 3
80 Q38723 2
81 Q38720 3
82 Q3623867 5
83 Q35666 2
84 Q355304 3
85 Q35509 2
86 Q35112127 3
87 Q34985575 4
88 Q34876 5
89 Q34763 2
90 Q34627 4
91 Q3455524 3
92 Q34442 4
93 Q33837 2
94 Q33506 3
95 Q32815 4
96 Q3257686 2
97 Q3240715 2
98 Q3191695 5
99 Q3153117 2
100 Q30198 2
101 Q30139652 3
102 Q294422 3
103 Q2870166 3
104 Q27686 3
105 Q274153 3
106 Q271669 1
107 Q2659904 2
108 Q24529780 2
109 Q24354 3
110 Q2354973 4
111 Q23442 2
112 Q23413 3
113 Q23397 3
114 Q2327515 4
115 Q2311958 5
116 Q22927291 6
117 Q22698 1
118 Q2175765 4
119 Q205495 4
120 Q204832 3
121 Q2042028 2
122 Q202216 6
123 Q1970725 3
124 Q194203 5
125 Q194195 2
126 Q190429 2
127 Q185187 3
128 Q185113 2
129 Q183366 2
130 Q1799794 1
131 Q1788454 4
132 Q1785071 3
133 Q1777138 3
134 Q177634 2
135 Q177380 2
136 Q174814 4
137 Q174782 2
138 Q17350442 2
139 Q17343829 3
140 Q17334923 0
141 Q17018380 3
142 Q16970 4
143 Q16917 3
144 Q16831714 4
145 Q165 3
146 Q160742 4
147 Q159719 3
148 Q159334 4
149 Q15640612 5
150 Q15324 2
151 Q15284 5
152 Q15243209 6
153 Q152081 1
154 Q15195406 4
155 Q1500350 5
156 Q149621 5
157 Q14757767 4
158 Q14350 3
159 Q1410668 3
160 Q1394476 3
161 Q1377575 2
162 Q1353183 3
163 Q134447 4
164 Q133215 3
165 Q133056 2
166 Q13221722 3
167 Q13220204 2
168 Q1311958 4
169 Q1303167 3
170 Q130003 3
171 Q12518 2
172 Q12516 3
173 Q1248784 3
174 Q123705 3
175 Q12323 3
176 Q12284 4
177 Q12280 4
178 Q121359 2
179 Q1210950 2
180 Q11755880 3
181 Q11707 3
182 Q11315 3
183 Q11303 3
184 Q1115575 4
185 Q1107656 1
186 Q10864048 1
187 Q1076486 2
188 Q105731 3
189 Q105190 3
190 Q1048525 3
191 Q102496 5
192 Q28872924 1
193 Q15617994 1
194 Q159313 2
195 Q24398318 3
196 Q327333 2
197 Q43229 1
198 Q860861 1
199 Q4989906 1

View File

@@ -1,195 +0,0 @@
Q9842
Q9430
Q928830
Q9259
Q91028
Q8514
Q8502
Q83405
Q82794
Q820477
Q811979
Q8072
Q79007
Q786014
Q75848
Q75520
Q728937
Q7275
Q719456
Q7075
Q697295
Q6852233
Q682943
Q665487
Q655686
Q643589
Q641226
Q631305
Q6256
Q6023295
Q5773747
Q56061
Q55659167
Q55488
Q55465477
Q54050
Q532
Q53060
Q52177058
Q515716
Q5153984
Q515
Q5144960
Q5119
Q5107
Q5084
Q5031071
Q5003624
Q4989906
Q4976993
Q486972
Q483110
Q4830453
Q47521
Q473972
Q46831
Q46614560
Q44782
Q44613
Q44539
Q44494
Q44377
Q4421
Q43501
Q4286337
Q42523
Q41176
Q40357
Q4022
Q40080
Q39816
Q39715
Q39614
Q3957
Q3947
Q3914
Q38723
Q38720
Q3623867
Q35666
Q355304
Q35509
Q35112127
Q34985575
Q34876
Q34763
Q34627
Q3455524
Q34442
Q33837
Q33506
Q32815
Q3257686
Q3240715
Q3191695
Q3153117
Q30198
Q30139652
Q294422
Q2870166
Q27686
Q274153
Q271669
Q2659904
Q24529780
Q24354
Q2354973
Q23442
Q23413
Q23397
Q2327515
Q2311958
Q22927291
Q22698
Q2175765
Q205495
Q204832
Q2042028
Q202216
Q1970725
Q194203
Q194195
Q190429
Q185187
Q185113
Q183366
Q1799794
Q1788454
Q1785071
Q1777138
Q177634
Q177380
Q174814
Q174782
Q17350442
Q17343829
Q17334923
Q17018380
Q16970
Q16917
Q16831714
Q165
Q160742
Q159719
Q159334
Q15640612
Q15324
Q15284
Q15243209
Q152081
Q15195406
Q1500350
Q149621
Q14757767
Q14350
Q1410668
Q1394476
Q1377575
Q1353183
Q134447
Q133215
Q133056
Q13221722
Q13220204
Q1311958
Q1303167
Q130003
Q12518
Q12516
Q1248784
Q123705
Q12323
Q12284
Q12280
Q121359
Q1210950
Q11755880
Q11707
Q11315
Q11303
Q1115575
Q1107656
Q10864048
Q1076486
Q105731
Q105190
Q1048525
Q102496
Q28872924
Q15617994
Q159313
Q24398318
Q327333
Q43229
Q860861

View File

@@ -1,200 +0,0 @@
## Wikidata place types and related OSM Tags
Wikidata does not have any official ontologies, however the [DBpedia project](https://wiki.dbpedia.org/) has created an [ontology](https://wiki.dbpedia.org/services-resources/ontology) that covered [place types](http://mappings.dbpedia.org/server/ontology/classes/#Place). The table below used the DBpedia place ontology as a starting point, and is provided as a cross-reference to the relevant OSM tags.
The Wikidata place types listed in the table below can be used in conjunction with the [Wikidata Query Service](https://query.wikidata.org/) to retrieve instances of those place types from the Wikidata knowledgebase.
```
SELECT ?item ?lat ?lon
WHERE {
?item wdt:P31*/wdt:P279*wd:Q9430; wdt:P625 ?pt.
?item p:P625?loc.
?loc psv:P625?cnode.
?cnode wikibase:geoLatitude?lat.
?cnode wikibase:geoLongitude?lon.
}
```
An example json return for all instances of the Wikidata item "Q9430" (Ocean) can be seen at [json](https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query=SELECT?item?lat?lon%20WHERE{?item%20wdt:P31*/wdt:P279*wd:Q9430;wdt:P625?pt.?item%20p:P625?loc.?loc%20psv:P625?cnode.?cnode%20wikibase:geoLatitude?lat.?cnode%20wikibase:geoLongitude?lon.})
**NOTE** the OSM tags listed are those listed in the wikidata entries, and not all the possible matches for tags within OSM.
title | concept | OSM Tag |
-----------|---------------------------------------|------------------|
[Q17334923](https://www.wikidata.org/entity/Q17334923) | Location | |
[Q811979](https://www.wikidata.org/entity/Q811979) | Architectural Structure | |
[Q194195](https://www.wikidata.org/entity/Q194195) | Amusement park |
[Q204832](https://www.wikidata.org/entity/Q204832) | Roller coaster | [attraction=roller_coaster](https://wiki.openstreetmap.org/wiki/Tag:attraction=roller_coaster) |
[Q2870166](https://www.wikidata.org/entity/Q2870166) | Water ride | |
[Q641226](https://www.wikidata.org/entity/Q641226) | Arena | [amenity=events_centre](https://wiki.openstreetmap.org/wiki/Tag:amenity=events_centre) |
[Q41176](https://www.wikidata.org/entity/Q41176) | Building | [building=yes](https://wiki.openstreetmap.org/wiki/Key:building) |
[Q1303167](https://www.wikidata.org/entity/Q1303167) | Barn | [building=barn](https://wiki.openstreetmap.org/wiki/Tag:building=barn) |
[Q655686](https://www.wikidata.org/entity/Q655686) | Commercial building | [building=commercial](https://wiki.openstreetmap.org/wiki/Tag:building=commercial) |
[Q4830453](https://www.wikidata.org/entity/Q4830453) | Business | |
[Q7075](https://www.wikidata.org/entity/Q7075) | Library | [amenity=library](https://wiki.openstreetmap.org/wiki/Tag:amenity=library) |
[Q133215](https://www.wikidata.org/entity/Q133215) | Casino | [amenity=casino](https://wiki.openstreetmap.org/wiki/Tag:amenity=casino) |
[Q23413](https://www.wikidata.org/entity/Q23413) | Castle | [historic=castle](https://wiki.openstreetmap.org/wiki/Tag:historic=castle) |
[Q83405](https://www.wikidata.org/entity/Q83405) | Factory | |
[Q53060](https://www.wikidata.org/entity/Q53060) | Gate | [barrier=gate](https://wiki.openstreetmap.org/wiki/Tag:barrier=gate) |cnode%20wikibase:geoLatitude?lat.?cnode%20wikibase:geoLongitude?lon.})
[Q11755880](https://www.wikidata.org/entity/Q11755880) | Residential Building | [building=residential](https://wiki.openstreetmap.org/wiki/Tag:building=residential) |
[Q3947](https://www.wikidata.org/entity/Q3947) | House | [building=house](https://wiki.openstreetmap.org/wiki/Tag:building=house) |
[Q35112127](https://www.wikidata.org/entity/Q35112127) | Historic Building | |
[Q5773747](https://www.wikidata.org/entity/Q5773747) | Historic house | |
[Q38723](https://www.wikidata.org/entity/Q38723) | Higher Education Institution |
[Q3914](https://www.wikidata.org/entity/Q3914) | School | [amenity=school](https://wiki.openstreetmap.org/wiki/Tag:amenity=school) |
[Q9842](https://www.wikidata.org/entity/Q9842) | Primary school | |
[Q159334](https://www.wikidata.org/entity/Q159334) | Secondary school | |
[Q16917](https://www.wikidata.org/entity/Q16917) | Hospital | [amenity=hospital](https://wiki.openstreetmap.org/wiki/Tag:amenity=hospital), [healthcare=hospital](https://wiki.openstreetmap.org/wiki/Tag:healthcare=hospital), [building=hospital](https://wiki.openstreetmap.org/wiki/Tag:building=hospital) |
[Q27686](https://www.wikidata.org/entity/Q27686) | Hotel | [tourism=hotel](https://wiki.openstreetmap.org/wiki/Tag:tourism=hotel), [building=hotel](https://wiki.openstreetmap.org/wiki/Tag:building=hotel) |
[Q33506](https://www.wikidata.org/entity/Q33506) | Museum | [tourism=museum](https://wiki.openstreetmap.org/wiki/Tag:tourism=museum) |
[Q40357](https://www.wikidata.org/entity/Q40357) | Prison | [amenity=prison](https://wiki.openstreetmap.org/wiki/Tag:amenity=prison) |
[Q24398318](https://www.wikidata.org/entity/Q24398318) | Religious Building | |
[Q160742](https://www.wikidata.org/entity/Q160742) | Abbey | |
[Q16970](https://www.wikidata.org/entity/Q16970) | Church (building) | [building=church](https://wiki.openstreetmap.org/wiki/Tag:building=church) |
[Q44613](https://www.wikidata.org/entity/Q44613) | Monastery | [amenity=monastery](https://wiki.openstreetmap.org/wiki/Tag:amenity=monastery) |
[Q32815](https://www.wikidata.org/entity/Q32815) | Mosque | [building=mosque](https://wiki.openstreetmap.org/wiki/Tag:building=mosque) |
[Q697295](https://www.wikidata.org/entity/Q697295) | Shrine | [building=shrine](https://wiki.openstreetmap.org/wiki/Tag:building=shrine) |
[Q34627](https://www.wikidata.org/entity/Q34627) | Synagogue | [building=synagogue](https://wiki.openstreetmap.org/wiki/Tag:building=synagogue) |
[Q44539](https://www.wikidata.org/entity/Q44539) | Temple | [building=temple](https://wiki.openstreetmap.org/wiki/Tag:building=temple) |
[Q11707](https://www.wikidata.org/entity/Q11707) | Restaurant | [amenity=restaurant](https://wiki.openstreetmap.org/wiki/Tag:amenity=restaurant) |
[Q11315](https://www.wikidata.org/entity/Q11315) | Shopping mall | [shop=mall](https://wiki.openstreetmap.org/wiki/Tag:shop=mall), [shop=shopping_centre](https://wiki.openstreetmap.org/wiki/Tag:shop=shopping_centre) |
[Q11303](https://www.wikidata.org/entity/Q11303) | Skyscraper | |
[Q17350442](https://www.wikidata.org/entity/Q17350442) | Venue | |
[Q41253](https://www.wikidata.org/entity/Q41253) | Movie Theater | [amenity=cinema](https://wiki.openstreetmap.org/wiki/Tag:amenity=cinema) |
[Q483110](https://www.wikidata.org/entity/Q483110) | Stadium | [leisure=stadium](https://wiki.openstreetmap.org/wiki/Tag:leisure=stadium), [building=stadium](https://wiki.openstreetmap.org/wiki/Tag:building=stadium) |
[Q24354](https://www.wikidata.org/entity/Q24354) | Theater (structure) | [amenity=theatre](https://wiki.openstreetmap.org/wiki/Tag:amenity=theatre) |
[Q121359](https://www.wikidata.org/entity/Q121359) | Infrastructure | |
[Q1248784](https://www.wikidata.org/entity/Q1248784) | Airport | |
[Q12323](https://www.wikidata.org/entity/Q12323) | Dam | [waterway=dam](https://wiki.openstreetmap.org/wiki/Tag:waterway=dam) |
[Q1353183](https://www.wikidata.org/entity/Q1353183) | Launch pad | |
[Q105190](https://www.wikidata.org/entity/Q105190) | Levee | [man_made=dyke](https://wiki.openstreetmap.org/wiki/Tag:man_made=dyke) |
[Q105731](https://www.wikidata.org/entity/Q105731) | Lock (water navigation) | [lock=yes](https://wiki.openstreetmap.org/wiki/Key:lock) |
[Q44782](https://www.wikidata.org/entity/Q44782) | Port | |
[Q159719](https://www.wikidata.org/entity/Q159719) | Power station | [power=plant](https://wiki.openstreetmap.org/wiki/Tag:power=plant) |
[Q174814](https://www.wikidata.org/entity/Q174814) | Electrical substation | |
[Q134447](https://www.wikidata.org/entity/Q134447) | Nuclear power plant | [plant:source=nuclear](https://wiki.openstreetmap.org/wiki/Tag:plant:source=nuclear) |
[Q786014](https://www.wikidata.org/entity/Q786014) | Rest area | [highway=rest_area](https://wiki.openstreetmap.org/wiki/Tag:highway=rest_area), [highway=services](https://wiki.openstreetmap.org/wiki/Tag:highway=services) |
[Q12280](https://www.wikidata.org/entity/Q12280) | Bridge | [bridge=* ](https://wiki.openstreetmap.org/wiki/Key:bridge), [man_made=bridge](https://wiki.openstreetmap.org/wiki/Tag:man_made=bridge) |
[Q728937](https://www.wikidata.org/entity/Q728937) | Railroad Line | [railway=rail](https://wiki.openstreetmap.org/wiki/Tag:railway=rail) |
[Q1311958](https://www.wikidata.org/entity/Q1311958) | Railway Tunnel | |
[Q34442](https://www.wikidata.org/entity/Q34442) | Road | [highway=* ](https://wiki.openstreetmap.org/wiki/Key:highway), [route=road](https://wiki.openstreetmap.org/wiki/Tag:route=road) |
[Q1788454](https://www.wikidata.org/entity/Q1788454) | Road junction | |
[Q44377](https://www.wikidata.org/entity/Q44377) | Tunnel | [tunnel=* ](https://wiki.openstreetmap.org/wiki/Key:tunnel) |
[Q5031071](https://www.wikidata.org/entity/Q5031071) | Canal tunnel | |
[Q719456](https://www.wikidata.org/entity/Q719456) | Station | [public_transport=station](https://wiki.openstreetmap.org/wiki/Tag:public_transport=station) |
[Q205495](https://www.wikidata.org/entity/Q205495) | Filling station | [amenity=fuel](https://wiki.openstreetmap.org/wiki/Tag:amenity=fuel) |
[Q928830](https://www.wikidata.org/entity/Q928830) | Metro station | [station=subway](https://wiki.openstreetmap.org/wiki/Tag:station=subway) |
[Q55488](https://www.wikidata.org/entity/Q55488) | Train station | [railway=station](https://wiki.openstreetmap.org/wiki/Tag:railway=station) |
[Q2175765](https://www.wikidata.org/entity/Q2175765) | Tram stop | [railway=tram_stop](https://wiki.openstreetmap.org/wiki/Tag:railway=tram_stop), [public_transport=stop_position](https://wiki.openstreetmap.org/wiki/Tag:public_transport=stop_position) |
[Q6852233](https://www.wikidata.org/entity/Q6852233) | Military building | |
[Q44494](https://www.wikidata.org/entity/Q44494) | Mill (grinding) | |
[Q185187](https://www.wikidata.org/entity/Q185187) | Watermill | [man_made=watermill](https://wiki.openstreetmap.org/wiki/Tag:man_made=watermill) |
[Q38720](https://www.wikidata.org/entity/Q38720) | Windmill | [man_made=windmill](https://wiki.openstreetmap.org/wiki/Tag:man_made=windmill) |
[Q4989906](https://www.wikidata.org/entity/Q4989906) | Monument | [historic=monument](https://wiki.openstreetmap.org/wiki/Tag:historic=monument) |
[Q5003624](https://www.wikidata.org/entity/Q5003624) | Memorial | [historic=memorial](https://wiki.openstreetmap.org/wiki/Tag:historic=memorial) |
[Q271669](https://www.wikidata.org/entity/Q271669) | Landform | |
[Q190429](https://www.wikidata.org/entity/Q190429) | Depression (geology) | |
[Q17018380](https://www.wikidata.org/entity/Q17018380) | Bight (geography) | |
[Q54050](https://www.wikidata.org/entity/Q54050) | Hill | |
[Q1210950](https://www.wikidata.org/entity/Q1210950) | Channel (geography) | |
[Q23442](https://www.wikidata.org/entity/Q23442) | Island | [place=island](https://wiki.openstreetmap.org/wiki/Tag:place=island) |
[Q42523](https://www.wikidata.org/entity/Q42523) | Atoll | |
[Q34763](https://www.wikidata.org/entity/Q34763) | Peninsula | |
[Q355304](https://www.wikidata.org/entity/Q355304) | Watercourse | |
[Q30198](https://www.wikidata.org/entity/Q30198) | Marsh | [wetland=marsh](https://wiki.openstreetmap.org/wiki/Tag:wetland=marsh) |
[Q75520](https://www.wikidata.org/entity/Q75520) | Plateau | |
[Q2042028](https://www.wikidata.org/entity/Q2042028) | Ravine | |
[Q631305](https://www.wikidata.org/entity/Q631305) | Rock formation | |
[Q12516](https://www.wikidata.org/entity/Q12516) | Pyramid | |
[Q1076486](https://www.wikidata.org/entity/Q1076486) | Sports venue | |
[Q682943](https://www.wikidata.org/entity/Q682943) | Cricket field | [sport=cricket](https://wiki.openstreetmap.org/wiki/Tag:sport=cricket) |
[Q1048525](https://www.wikidata.org/entity/Q1048525) | Golf course | [leisure=golf_course](https://wiki.openstreetmap.org/wiki/Tag:leisure=golf_course) |
[Q1777138](https://www.wikidata.org/entity/Q1777138) | Race track | [highway=raceway](https://wiki.openstreetmap.org/wiki/Tag:highway=raceway) |
[Q130003](https://www.wikidata.org/entity/Q130003) | Ski resort | |
[Q174782](https://www.wikidata.org/entity/Q174782) | Town square | [place=square](https://wiki.openstreetmap.org/wiki/Tag:place=square) |
[Q12518](https://www.wikidata.org/entity/Q12518) | Tower | [building=tower](https://wiki.openstreetmap.org/wiki/Tag:building=tower), [man_made=tower](https://wiki.openstreetmap.org/wiki/Tag:man_made=tower) |
[Q39715](https://www.wikidata.org/entity/Q39715) | Lighthouse | [man_made=lighthouse](https://wiki.openstreetmap.org/wiki/Tag:man_made=lighthouse) |
[Q274153](https://www.wikidata.org/entity/Q274153) | Water tower | [building=water_tower](https://wiki.openstreetmap.org/wiki/Tag:building=water_tower), [man_made=water_tower](https://wiki.openstreetmap.org/wiki/Tag:man_made=water_tower) |
[Q43501](https://www.wikidata.org/entity/Q43501) | Zoo | [tourism=zoo](https://wiki.openstreetmap.org/wiki/Tag:tourism=zoo) |
[Q39614](https://www.wikidata.org/entity/Q39614) | Cemetery | [amenity=grave_yard](https://wiki.openstreetmap.org/wiki/Tag:amenity=grave_yard), [landuse=cemetery](https://wiki.openstreetmap.org/wiki/Tag:landuse=cemetery) |
[Q152081](https://www.wikidata.org/entity/Q152081) | Concentration camp | |
[Q1107656](https://www.wikidata.org/entity/Q1107656) | Garden | [leisure=garden](https://wiki.openstreetmap.org/wiki/Tag:leisure=garden) |
[Q820477](https://www.wikidata.org/entity/Q820477) | Mine | |
[Q33837](https://www.wikidata.org/entity/Q33837) | Archipelago | [place=archipelago](https://wiki.openstreetmap.org/wiki/Tag:place=archipelago) |
[Q40080](https://www.wikidata.org/entity/Q40080) | Beach | [natural=beach](https://wiki.openstreetmap.org/wiki/Tag:natural=beach) |
[Q15324](https://www.wikidata.org/entity/Q15324) | Body of water | [natural=water](https://wiki.openstreetmap.org/wiki/Tag:natural=water) |
[Q23397](https://www.wikidata.org/entity/Q23397) | Lake | [water=lake](https://wiki.openstreetmap.org/wiki/Tag:water=lake) |
[Q9430](https://www.wikidata.org/entity/Q9430) | Ocean | |
[Q165](https://www.wikidata.org/entity/Q165) | Sea | |
[Q47521](https://www.wikidata.org/entity/Q47521) | Stream | |
[Q12284](https://www.wikidata.org/entity/Q12284) | Canal | [waterway=canal](https://wiki.openstreetmap.org/wiki/Tag:waterway=canal) |
[Q4022](https://www.wikidata.org/entity/Q4022) | River | [waterway=river](https://wiki.openstreetmap.org/wiki/Tag:waterway=river), [type=waterway](https://wiki.openstreetmap.org/wiki/Relation:waterway) |
[Q185113](https://www.wikidata.org/entity/Q185113) | Cape | [natural=cape](https://wiki.openstreetmap.org/wiki/Tag:natural=cape) |
[Q35509](https://www.wikidata.org/entity/Q35509) | Cave | [natural=cave_entrance](https://wiki.openstreetmap.org/wiki/Tag:natural=cave_entrance) |
[Q8514](https://www.wikidata.org/entity/Q8514) | Desert | |
[Q4421](https://www.wikidata.org/entity/Q4421) | Forest | [natural=wood](https://wiki.openstreetmap.org/wiki/Tag:natural=wood) |
[Q35666](https://www.wikidata.org/entity/Q35666) | Glacier | [natural=glacier](https://wiki.openstreetmap.org/wiki/Tag:natural=glacier) |
[Q177380](https://www.wikidata.org/entity/Q177380) | Hot spring | |
[Q8502](https://www.wikidata.org/entity/Q8502) | Mountain | [natural=peak](https://wiki.openstreetmap.org/wiki/Tag:natural=peak) |
[Q133056](https://www.wikidata.org/entity/Q133056) | Mountain pass | |
[Q46831](https://www.wikidata.org/entity/Q46831) | Mountain range | |
[Q39816](https://www.wikidata.org/entity/Q39816) | Valley | [natural=valley](https://wiki.openstreetmap.org/wiki/Tag:natural=valley) |
[Q8072](https://www.wikidata.org/entity/Q8072) | Volcano | [natural=volcano](https://wiki.openstreetmap.org/wiki/Tag:natural=volcano) |
[Q43229](https://www.wikidata.org/entity/Q43229) | Organization | |
[Q327333](https://www.wikidata.org/entity/Q327333) | Government agency | [office=government](https://wiki.openstreetmap.org/wiki/Tag:office=government)|
[Q22698](https://www.wikidata.org/entity/Q22698) | Park | [leisure=park](https://wiki.openstreetmap.org/wiki/Tag:leisure=park) |
[Q159313](https://www.wikidata.org/entity/Q159313) | Urban agglomeration | |
[Q177634](https://www.wikidata.org/entity/Q177634) | Community | |
[Q5107](https://www.wikidata.org/entity/Q5107) | Continent | [place=continent](https://wiki.openstreetmap.org/wiki/Tag:place=continent) |
[Q6256](https://www.wikidata.org/entity/Q6256) | Country | [place=country](https://wiki.openstreetmap.org/wiki/Tag:place=country) |
[Q75848](https://www.wikidata.org/entity/Q75848) | Gated community | |
[Q3153117](https://www.wikidata.org/entity/Q3153117) | Intercommunality | |
[Q82794](https://www.wikidata.org/entity/Q82794) | Region | |
[Q56061](https://www.wikidata.org/entity/Q56061) | Administrative division | [boundary=administrative](https://wiki.openstreetmap.org/wiki/Tag:boundary=administrative) |
[Q665487](https://www.wikidata.org/entity/Q665487) | Diocese | |
[Q4976993](https://www.wikidata.org/entity/Q4976993) | Parish | [boundary=civil_parish](https://wiki.openstreetmap.org/wiki/Tag:boundary=civil_parish) |
[Q194203](https://www.wikidata.org/entity/Q194203) | Arrondissements of France | |
[Q91028](https://www.wikidata.org/entity/Q91028) | Arrondissements of Belgium | |
[Q3623867](https://www.wikidata.org/entity/Q3623867) | Arrondissements of Benin | |
[Q2311958](https://www.wikidata.org/entity/Q2311958) | Canton (country subdivision) | [political_division=canton](https://wiki.openstreetmap.org/wiki/FR:Cantons_in_France) |
[Q643589](https://www.wikidata.org/entity/Q643589) | Department | |
[Q202216](https://www.wikidata.org/entity/Q202216) | Overseas department and region | |
[Q149621](https://www.wikidata.org/entity/Q149621) | District | [place=district](https://wiki.openstreetmap.org/wiki/Tag:place=district) |
[Q15243209](https://www.wikidata.org/wiki/Q15243209) | Historic district | |
[Q5144960](https://www.wikidata.org/entity/Q5144960) | Microregion | |
[Q15284](https://www.wikidata.org/entity/Q15284) | Municipality | |
[Q515716](https://www.wikidata.org/entity/Q515716) | Prefecture | |
[Q34876](https://www.wikidata.org/entity/Q34876) | Province | |
[Q3191695](https://www.wikidata.org/entity/Q3191695) | Regency (Indonesia) | |
[Q1970725](https://www.wikidata.org/entity/Q1970725) | Natural region | |
[Q486972](https://www.wikidata.org/entity/Q486972) | Human settlement | |
[Q515](https://www.wikidata.org/entity/Q515) | City | [place=city](https://wiki.openstreetmap.org/wiki/Tag:place=city) |
[Q5119](https://www.wikidata.org/entity/Q5119) | Capital city | [capital=yes](https://wiki.openstreetmap.org/wiki/Key:capital) |
[Q4286337](https://www.wikidata.org/entity/Q4286337) | City district | |
[Q1394476](https://www.wikidata.org/entity/Q1394476) | Civil township | |
[Q1115575](https://www.wikidata.org/entity/Q1115575) | Civil parish | [designation=civil_parish](https://wiki.openstreetmap.org/wiki/Tag:designation=civil_parish) |
[Q5153984](https://www.wikidata.org/entity/Q5153984) | Commune-level subdivisions | |
[Q123705](https://www.wikidata.org/entity/Q123705) | Neighbourhood | [place=neighbourhood](https://wiki.openstreetmap.org/wiki/Tag:place=neighbourhood) |
[Q1500350](https://www.wikidata.org/entity/Q1500350) | Townships of China | |
[Q17343829](https://www.wikidata.org/entity/Q17343829) | Unincorporated Community | |
[Q3957](https://www.wikidata.org/entity/Q3957) | Town | [place=town](https://wiki.openstreetmap.org/wiki/Tag:place=town) |
[Q532](https://www.wikidata.org/entity/Q532) | Village | [place=village](https://wiki.openstreetmap.org/wiki/Tag:place=village) |
[Q5084](https://www.wikidata.org/entity/Q5084) | Hamlet | [place=hamlet](https://wiki.openstreetmap.org/wiki/Tag:place=hamlet) |
[Q7275](https://www.wikidata.org/entity/Q7275) | State | |
[Q79007](https://www.wikidata.org/entity/Q79007) | Street | |
[Q473972](https://www.wikidata.org/entity/Q473972) | Protected area | [boundary=protected_area](https://wiki.openstreetmap.org/wiki/Tag:boundary=protected_area) |
[Q1377575](https://www.wikidata.org/entity/Q1377575) | Wildlife refuge | |
[Q1410668](https://www.wikidata.org/entity/Q1410668) | National Wildlife Refuge | [protection_title=National Wildlife Refuge](ownership=national), [ownership=national](https://wiki.openstreetmap.org/wiki/Tag:ownership=national)|
[Q9259](https://www.wikidata.org/entity/Q9259) | World Heritage Site | |
---
### Future Work
The Wikidata improvements to Nominatim can be further enhanced by:
- continuing to add new Wikidata links to OSM objects
- increasing the number of place types accounted for in the wikipedia_articles table
- working to use place types in the wikipedia_article matching process

File diff suppressed because one or more lines are too long

View File

@@ -1,26 +0,0 @@
-- This data contains Ordnance Survey data © Crown copyright and database right 2010.
-- Code-Point Open contains Royal Mail data © Royal Mail copyright and database right 2010.
-- OS data may be used under the terms of the OS OpenData licence:
-- http://www.ordnancesurvey.co.uk/oswebsite/opendata/licence/docs/licence.pdf
SET statement_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = off;
SET check_function_bodies = false;
SET client_min_messages = warning;
SET escape_string_warning = off;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
CREATE TABLE gb_postcode (
id integer,
postcode character varying(9),
geometry geometry,
CONSTRAINT enforce_dims_geometry CHECK ((st_ndims(geometry) = 2)),
CONSTRAINT enforce_srid_geometry CHECK ((st_srid(geometry) = 4326))
);

View File

@@ -1,16 +0,0 @@
SET statement_timeout = 0;
SET client_encoding = 'UTF8';
SET check_function_bodies = false;
SET client_min_messages = warning;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
CREATE TABLE us_postcode (
postcode text,
x double precision,
y double precision
);

View File

@@ -5,23 +5,27 @@
configure_file(mkdocs.yml ../mkdocs.yml)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/appendix)
file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/data-sources)
set (DOC_SOURCES
admin
develop
api
index.md
extra.css
styles.css
)
foreach (src ${DOC_SOURCES})
execute_process(
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/${src} ${CMAKE_CURRENT_BINARY_DIR}/${src}
)
endforeach()
ADD_CUSTOM_TARGET(doc
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/admin ${CMAKE_CURRENT_BINARY_DIR}/admin
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/develop ${CMAKE_CURRENT_BINARY_DIR}/develop
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/api ${CMAKE_CURRENT_BINARY_DIR}/api
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/index.md ${CMAKE_CURRENT_BINARY_DIR}/index.md
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/extra.css ${CMAKE_CURRENT_BINARY_DIR}/extra.css
COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/data-sources/overview.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/overview.md
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/us-tiger/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/US-Tiger.md
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/gb-postcodes/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/GB-Postcodes.md
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/country-grid/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/Country-Grid.md
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/country-grid/mexico.quad.png ${CMAKE_CURRENT_BINARY_DIR}/data-sources/mexico.quad.png
COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/wikipedia-wikidata/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/Wikipedia-Wikidata.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-16.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-16.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-8.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-8.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
)

View File

@@ -0,0 +1,173 @@
# Advanced installations
This page contains instructions for setting up multiple countries in
your Nominatim database. It is assumed that you have already successfully
installed the Nominatim software itself, if not return to the
[installation page](Installation.md).
## Importing multiple regions
To import multiple regions in your database, you need to configure and run `utils/import_multiple_regions.sh` file. This script will set up the update directory which has the following structure:
```bash
update
   ├── europe
   │   ├── andorra
   │   │   └── sequence.state
   │   └── monaco
   │   └── sequence.state
   └── tmp
├── combined.osm.pbf
└── europe
├── andorra-latest.osm.pbf
└── monaco-latest.osm.pbf
```
The `sequence.state` files will contain the sequence ID, which will be used by pyosmium to get updates. The tmp folder is used for import dump.
### Configuring multiple regions
The file `import_multiple_regions.sh` needs to be edited as per your requirement:
1. List of countries. eg:
COUNTRIES="europe/monaco europe/andorra"
2. Path to Build directory. eg:
NOMINATIMBUILD="/srv/nominatim/build"
3. Path to Update directory. eg:
UPDATEDIR="/srv/nominatim/update"
4. Replication URL. eg:
BASEURL="https://download.geofabrik.de"
DOWNCOUNTRYPOSTFIX="-latest.osm.pbf"
### Setting up multiple regions
!!! tip
If your database already exists and you want to add more countries,
replace the setting up part
`${SETUPFILE} --osm-file ${UPDATEDIR}/tmp/combined.osm.pbf --all 2>&1`
with `${UPDATEFILE} --import-file ${UPDATEDIR}/tmp/combined.osm.pbf --index --index-instances N 2>&1`
where N is the numbers of CPUs in your system.
Run the following command from your Nominatim directory after configuring the file.
bash ./utils/import_multiple_regions.sh
!!! danger "Important"
This file uses osmium-tool. It must be installed before executing the import script.
Installation instructions can be found [here](https://osmcode.org/osmium-tool/manual.html#installation).
### Updating multiple regions
To import multiple regions in your database, you need to configure and run ```utils/update_database.sh```.
This uses the update directory set up while setting up the DB.
### Configuring multiple regions
The file `update_database.sh` needs to be edited as per your requirement:
1. List of countries. eg:
COUNTRIES="europe/monaco europe/andorra"
2. Path to Build directory. eg:
NOMINATIMBUILD="/srv/nominatim/build"
3. Path to Update directory. eg:
UPDATEDIR="/srv/nominatim/update"
4. Replication URL. eg:
BASEURL="https://download.geofabrik.de"
DOWNCOUNTRYPOSTFIX="-updates"
5. Followup can be set according to your installation. eg: For Photon,
FOLLOWUP="curl http://localhost:2322/nominatim-update"
will handle the indexing.
### Updating the database
Run the following command from your Nominatim directory after configuring the file.
bash ./utils/update_database.sh
This will get diffs from the replication server, import diffs and index the database. The default replication server in the script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
## Importing Nominatim to an external PostgreSQL database
You can install Nominatim using a database that runs on a different server when
you have physical access to the file system on the other server. Nominatim
uses a custom normalization library that needs to be made accessible to the
PostgreSQL server. This section explains how to set up the normalization
library.
### Option 1: Compiling the library on the database server
The most sure way to get a working library is to compile it on the database
server. From the prerequisites you need at least cmake, gcc and the
PostgreSQL server package.
Clone or unpack the Nominatim source code, enter the source directory and
create and enter a build directory.
```sh
cd Nominatim
mkdir build
cd build
```
Now configure cmake to only build the PostgreSQL module and build it:
```
cmake -DBUILD_IMPORTER=off -DBUILD_API=off -DBUILD_TESTS=off -DBUILD_DOCS=off -DBUILD_OSM2PGSQL=off ..
make
```
When done, you find the normalization library in `build/module/nominatim.so`.
Copy it to a place where it is readable and executable by the PostgreSQL server
process.
### Option 2: Compiling the library on the import machine
You can also compile the normalization library on the machine from where you
run the import.
!!! important
You can only do this when the database server and the import machine have
the same architecture and run the same version of Linux. Otherwise there is
no guarantee that the compiled library is compatible with the PostgreSQL
server running on the database server.
Make sure that the PostgreSQL server package is installed on the machine
**with the same version as on the database server**. You do not need to install
the PostgreSQL server itself.
Download and compile Nominatim as per standard instructions. Once done, you find
the normalization library in `build/module/nominatim.so`. Copy the file to
the database server at a location where it is readable and executable by the
PostgreSQL server process.
### Running the import
On the client side you now need to configure the import to point to the
correct location of the library **on the database server**. Add the following
line to your your `.env` file:
```php
NOMINATIM_DATABASE_MODULE_PATH="<directory on the database server where nominatim.so resides>"
```
Now change the `NOMINATIM_DATABASE_DSN` to point to your remote server and continue
to follow the [standard instructions for importing](/admin/Import).

142
docs/admin/Deployment.md Normal file
View File

@@ -0,0 +1,142 @@
# Deploying Nominatim
The Nominatim API is implemented as a PHP application. The `website/` directory
in the project directory contains the configured website. You can serve this
in a production environment with any web server that is capable to run
PHP scripts.
This section gives a quick overview on how to configure Apache and Nginx to
serve Nominatim. It is not meant as a full system administration guide on how
to run a web service. Please refer to the documentation of
[Apache](http://httpd.apache.org/docs/current/) and
[Nginx](https://nginx.org/en/docs/)
for background information on configuring the services.
!!! Note
Throughout this page, we assume that your Nominatim project directory is
located in `/srv/nominatim-project` and that you have installed Nominatim
using the default installation prefix `/usr/local`. If you have put it
somewhere else, you need to adjust the commands and configuration
accordingly.
We further assume that your web server runs as user `www-data`. Older
versions of CentOS may still use the user name `apache`. You also need
to adapt the instructions in this case.
## Making the website directory accessible
You need to make sure that the `website` directory is accessible for the
web server user. You can check that the permissions are correct by accessing
on of the php files as the web server user:
``` sh
sudo -u www-data head -n 1 /srv/nominatim-project/website/search.php
```
If this shows a permission error, then you need to adapt the permissions of
each directory in the path so that it is executable for `www-data`.
If you have SELinux enabled, further adjustments may be necessary to give the
web server access. At a minimum the following SELinux labelling should be done
for Nominatim:
``` sh
sudo semanage fcontext -a -t httpd_sys_content_t "/usr/local/nominatim/lib/lib-php(/.*)?"
sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim-project/website(/.*)?"
sudo semanage fcontext -a -t lib_t "/srv/nominatim-project/module/nominatim.so"
sudo restorecon -R -v /usr/local/lib/nominatim
sudo restorecon -R -v /srv/nominatim-project
```
## Nominatim with Apache
### Installing the required packages
With Apache you can use the PHP module to run Nominatim.
Under Ubuntu/Debian install them with:
``` sh
sudo apt install apache2 libapache2-mod-php
```
### Configuring Apache
Make sure your Apache configuration contains the required permissions for the
directory and create an alias:
``` apache
<Directory "/srv/nominatim-project/website">
Options FollowSymLinks MultiViews
AddType text/html .php
DirectoryIndex search.php
Require all granted
</Directory>
Alias /nominatim /srv/nominatim-project/website
```
After making changes in the apache config you need to restart apache.
The website should now be available on `http://localhost/nominatim`.
## Nominatim with Nginx
### Installing the required packages
Nginx has no built-in PHP interpreter. You need to use php-fpm as a deamon for
serving PHP cgi.
On Ubuntu/Debian install nginx and php-fpm with:
``` sh
sudo apt install nginx php-fpm
```
### Configure php-fpm and Nginx
By default php-fpm listens on a network socket. If you want it to listen to a
Unix socket instead, change the pool configuration
(`/etc/php/<php version>/fpm/pool.d/www.conf`) as follows:
``` ini
; Replace the tcp listener and add the unix socket
listen = /var/run/php-fpm.sock
; Ensure that the daemon runs as the correct user
listen.owner = www-data
listen.group = www-data
listen.mode = 0666
```
Tell nginx that php files are special and to fastcgi_pass to the php-fpm
unix socket by adding the location definition to the default configuration.
``` nginx
root /srv/nominatim-project/website;
index search.php;
location / {
try_files $uri $uri/ @php;
}
location @php {
fastcgi_param SCRIPT_FILENAME "$document_root$uri.php";
fastcgi_param PATH_TRANSLATED "$document_root$uri.php";
fastcgi_param QUERY_STRING $args;
fastcgi_pass unix:/var/run/php-fpm.sock;
fastcgi_index index.php;
include fastcgi_params;
}
location ~ [^/]\.php(/|$) {
fastcgi_split_path_info ^(.+?\.php)(/.*)$;
if (!-f $document_root$fastcgi_script_name) {
return 404;
}
fastcgi_pass unix:/var/run/php-fpm.sock;
fastcgi_index search.php;
include fastcgi.conf;
}
```
Restart the nginx and php-fpm services and the website should now be available
at `http://localhost/`.

View File

@@ -16,27 +16,44 @@ was killed. If it looks like this:
then you can resume with the following command:
```sh
./utils/setup.php --index --create-search-indices --create-country-names
nominatim import --continue indexing
```
If the reported rank is 26 or higher, you can also safely add `--index-noanalyse`.
### PostgreSQL crashed "invalid page in block"
Usually serious problem, can be a hardware issue, not all data written to disc
for example. Check PostgreSQL log file and search PostgreSQL issues/mailing
list for hints.
If it happened during index creation you can try rerunning the step with
```sh
nominatim import --continue indexing
```
Otherwise it's best to start the full setup from the beginning.
### PHP "open_basedir restriction in effect" warnings
PHP Warning: file_get_contents(): open_basedir restriction in effect.
You need to adjust the [open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir) setting
in your PHP configuration (`php.ini file`). By default this setting may look like this:
You need to adjust the
[open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir)
setting in your PHP configuration (`php.ini` file). By default this setting may
look like this:
open_basedir = /srv/http/:/home/:/tmp/:/usr/share/pear/
Either add reported directories to the list or disable this setting temporarily by
dding ";" at the beginning of the line. Don't forget to enable this setting again
once you are done with the PHP command line operations.
Either add reported directories to the list or disable this setting temporarily
by adding ";" at the beginning of the line. Don't forget to enable this setting
again once you are done with the PHP command line operations.
### PHP timzeone warnings
### PHP timezeone warnings
The Apache log may contain lots of PHP warnings like this:
`PHP Warning: date_default_timezone_set() function.`
@@ -66,7 +83,7 @@ server development libraries (`postgresql-server-dev-9.5` on Ubuntu)
and recompile (`cmake .. && make`).
## I see the error "ERROR: permission denied for language c"
### I see the error "ERROR: permission denied for language c"
`nominatim.so`, written in C, is required to be installed on the database
server. Some managed database (cloud) services like Amazon RDS do not allow
@@ -76,7 +93,7 @@ on a non-managed machine.
### I see the error: "function transliteration(text) does not exist"
Reinstall the nominatim functions with `setup.php --create--functions`
Reinstall the nominatim functions with `nominatim refresh --functions`
and check for any errors, e.g. a missing `nominatim.so` file.
### I see the error: "ERROR: mmap (remap) failed"
@@ -89,9 +106,15 @@ If you are using a flatnode file, then it may also be that the underlying
filesystem does not fully support 'mmap'. A notable candidate is virtualbox's
vboxfs.
### I see the error: "clang: Command not found" on CentOS
On CentOS 7 users reported `/opt/rh/llvm-toolset-7/root/usr/bin/clang: Command not found`.
Double-check clang is installed. Instead of `make` try running `make CLANG=true`.
### nominatim UPDATE failed: ERROR: buffer 179261 is not owned by resource owner Portal
Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168) during the initial import of the database. It's
Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168)
during the initial import of the database. It's
something PostgreSQL internal Nominatim doesn't control. And PostgreSQL forums
suggest it's threading related but definitely some kind of crash of a process.
Users reported either rebooting the server, different hardware or just trying
@@ -107,10 +130,11 @@ to get the full error message.
`could not connect to server: No such file or directory`
On CentOS v7 the PostgreSQL server is started with `systemd`.
Check if `/usr/lib/systemd/system/httpd.service` contains a line `PrivateTmp=true`.
If so then Apache cannot see the `/tmp/.s.PGSQL.5432` file. It's a good security feature,
so use the [preferred solution](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
On CentOS v7 the PostgreSQL server is started with `systemd`. Check if
`/usr/lib/systemd/system/httpd.service` contains a line `PrivateTmp=true`. If
so then Apache cannot see the `/tmp/.s.PGSQL.5432` file. It's a good security
feature, so use the
[preferred solution](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
However, you can solve this the quick and dirty way by commenting out that line and then run
@@ -118,14 +142,12 @@ However, you can solve this the quick and dirty way by commenting out that line
sudo systemctl restart httpd
### "must be an array or an object that implements Countable" warning in /usr/share/pear/DB.php
The warning started with PHP 7.2. Make sure you have at least [version 1.9.3 of PEAR DB](https://github.com/pear/DB/releases)
installed.
### Website reports "DB Error: insufficient permissions"
The user the webserver, e.g. Apache, runs under needs to have access to the Nominatim database. You can find the user like [this](https://serverfault.com/questions/125865/finding-out-what-user-apache-is-running-as), for default Ubuntu operating system for example it's `www-data`.
The user the webserver, e.g. Apache, runs under needs to have access to the
Nominatim database. You can find the user like
[this](https://serverfault.com/questions/125865/finding-out-what-user-apache-is-running-as),
for default Ubuntu operating system for example it's `www-data`.
1. Repeat the `createuser` step of the installation instructions.
@@ -150,7 +172,8 @@ Example error message
The PostgreSQL database, i.e. user `postgres`, needs to have access to that file.
The permission need to be read & executable by everybody, e.g.
The permission need to be read & executable by everybody, but not writeable
by everybody, e.g.
```
-rwxr-xr-x 1 nominatim nominatim 297984 build/module/nominatim.so
@@ -161,55 +184,31 @@ Try `chmod a+r nominatim.so; chmod a+x nominatim.so`.
When running SELinux, make sure that the
[context is set up correctly](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
When you recently updated your operating system, updated PostgreSQL to
a new version or moved files (e.g. the build directory) you should
recreate `nominatim.so`. Try
```
cd build
rm -r module/
cmake $main_Nominatim_path && make
```
### Setup.php fails with "DB Error: extension not found"
Make sure you have the PostgreSQL extensions "hstore" and "postgis" installed.
See the installation instruction for a full list of required packages.
See the installation instructions for a full list of required packages.
### Setup.php reports "Cannot redeclare getDB()"
`Cannot redeclare getDB() (previously declared in /your/path/Nominatim/lib/db.php:4)`
The message is a bit misleading as PHP needs to load the file `DB.php` and
instead re-loads Nominatim's `db.php`. To solve this make sure you
have the [Pear module 'DB'](https://pear.php.net/package/DB/) installed.
sudo pear install DB
### I forgot to delete the flatnodes file before starting an import.
That's fine. For each import the flatnodes file get overwritten.
See [https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage]()
See [https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage](https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage)
for more information.
## Running your own instance
### Can I import multiple countries and keep them up to date?
You should use the extracts and updates from https://download.geofabrik.de.
For the initial import, download the countries you need and merge them.
See [OSM Help](https://help.openstreetmap.org/questions/48843/merging-two-or-more-geographical-areas-to-import-two-or-more-osm-files-in-nominatim)
for examples how to do that. Use the resulting single osm file when
running `setup.php`.
For updates you need to download the change files for each country
once per day and apply them **separately** using
./utils/update.php --import-diff <filename> --index
See [this issue](https://github.com/openstreetmap/Nominatim/issues/60#issuecomment-18679446)
for a script that runs the updates using osmosis.
### Can I import negative OSM ids into Nominatim?
See [this question of Stackoverflow](https://help.openstreetmap.org/questions/64662/nominatim-flatnode-with-negative-id).
### Missing XML or text declaration
The website might show: `XML Parsing Error: XML or text declaration not at start of entity Location.`
Make sure there are no spaces at the beginning of your `settings/local.php` file.

View File

@@ -1,276 +0,0 @@
# Importing and Updating the Database
The following instructions explain how to create a Nominatim database
from an OSM planet file and how to keep the database up to date. It
is assumed that you have already successfully installed the Nominatim
software itself, if not return to the [installation page](Installation.md).
## Configuration setup in settings/local.php
The Nominatim server can be customized via the file `settings/local.php`
in the build directory. Note that this is a PHP file, so it must always
start like this:
<?php
without any leading spaces.
There are lots of configuration settings you can tweak. Have a look
at `settings/default.php` for a full list. Most should have a sensible default.
#### Flatnode files
If you plan to import a large dataset (e.g. Europe, North America, planet),
you should also enable flatnode storage of node locations. With this
setting enabled, node coordinates are stored in a simple file instead
of the database. This will save you import time and disk storage.
Add to your `settings/local.php`:
@define('CONST_Osm2pgsql_Flatnode_File', '/path/to/flatnode.file');
Replace the second part with a suitable path on your system and make sure
the directory exists. There should be at least 40GB of free space.
## Downloading additional data
### Wikipedia rankings
Wikipedia can be used as an optional auxiliary data source to help indicate
the importance of OSM features. Nominatim will work without this information
but it will improve the quality of the results if this is installed.
This data is available as a binary download:
cd $NOMINATIM_SOURCE_DIR/data
wget https://www.nominatim.org/data/wikipedia_article.sql.bin
wget https://www.nominatim.org/data/wikipedia_redirect.sql.bin
Combined the 2 files are around 1.5GB and add around 30GB to the install
size of Nominatim. They also increase the install time by an hour or so.
*NOTE:* you'll need to download the Wikipedia rankings before performing
the initial import of the data if you want the rankings applied to the
loaded data.
### Great Britain, USA postcodes
Nominatim can use postcodes from an external source to improve searches that
involve a GB or US postcode. This data can be optionally downloaded:
cd $NOMINATIM_SOURCE_DIR/data
wget https://www.nominatim.org/data/gb_postcode_data.sql.gz
wget https://www.nominatim.org/data/us_postcode_data.sql.gz
## Choosing the Data to Import
In its default setup Nominatim is configured to import the full OSM data
set for the entire planet. Such a setup requires a powerful machine with
at least 32GB of RAM and around 800GB of SSD hard disks. Depending on your
use case there are various ways to reduce the amount of data imported. This
section discusses these methods. They can also be combined.
### Using an extract
If you only need geocoding for a smaller region, then precomputed extracts
are a good way to reduce the database size and import time.
[Geofabrik](https://download.geofabrik.de) offers extracts for most countries.
They even have daily updates which can be used with the update process described
below. There are also
[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading).
Please be aware that some extracts are not cut exactly along the country
boundaries. As a result some parts of the boundary may be missing which means
that Nominatim cannot compute the areas for some administrative areas.
### Dropping Data Required for Dynamic Updates
About half of the data in Nominatim's database is not really used for serving
the API. It is only there to allow the data to be updated from the latest
changes from OSM. For many uses these dynamic updates are not really required.
If you don't plan to apply updates, the dynamic part of the database can be
safely dropped using the following command:
```
./utils/setup.php --drop
```
Note that you still need to provide for sufficient disk space for the initial
import. So this option is particularly interesting if you plan to transfer the
database or reuse the space later.
### Reverse-only Imports
If you only want to use the Nominatim database for reverse lookups or
if you plan to use the installation only for exports to a
[photon](https://photon.komoot.de/) database, then you can set up a database
without search indexes. Add `--reverse-only` to your setup command above.
This saves about 5% of disk space.
### Filtering Imported Data
Nominatim normally sets up a full search database containing administrative
boundaries, places, streets, addresses and POI data. There are also other
import styles available which only read selected data:
* **settings/import-admin.style**
Only import administrative boundaries and places.
* **settings/import-street.style**
Like the admin style but also adds streets.
* **settings/import-address.style**
Import all data necessary to compute addresses down to house number level.
* **settings/import-full.style**
Default style that also includes points of interest.
The style can be changed with the configuration `CONST_Import_Style`.
To give you an idea of the impact of using the different styles, the table
below gives rough estimates of the final database size after import of a
2018 planet and after using the `--drop` option. It also shows the time
needed for the import on a machine with 32GB RAM, 4 CPUS and SSDs. Note that
the given sizes are just an estimate meant for comparison of style requirements.
Your planet import is likely to be larger as the OSM data grows with time.
style | Import time | DB size | after drop
----------|--------------|------------|------------
admin | 5h | 190 GB | 20 GB
street | 42h | 400 GB | 180 GB
address | 59h | 500 GB | 260 GB
full | 80h | 575 GB | 300 GB
You can also customize the styles further. For an description of the
style format see [the development section](../develop/Import.md).
## Initial import of the data
**Important:** first try the import with a small extract, for example from
[Geofabrik](https://download.geofabrik.de).
Download the data to import and load the data with the following command
from the build directory:
```sh
./utils/setup.php --osm-file <data file> --all [--osm2pgsql-cache 28000] 2>&1 | tee setup.log
```
The `--osm2pgsql-cache` parameter is optional but strongly recommended for
planet imports. It sets the node cache size for the osm2pgsql import part
(see `-C` parameter in osm2pgsql help). As a rule of thumb, this should be
about the same size as the file you are importing but never more than
2/3 of RAM available. If your machine starts swapping reduce the size.
Computing word frequency for search terms can improve the performance of
forward geocoding in particular under high load as it helps PostgreSQL's query
planner to make the right decisions. To recompute word counts run:
```sh
./utils/update.php --recompute-word-counts
```
This will take a couple of hours for a full planet installation. You can
also defer that step to a later point in time when you realise that
performance becomes an issue. Just make sure that updates are stopped before
running this function.
If you want to be able to search for places by their type through
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to enable these key phrases like this:
./utils/specialphrases.php --wiki-import > specialphrases.sql
psql -d nominatim -f specialphrases.sql
Note that this command downloads the phrases from the wiki link above.
## Installing Tiger housenumber data for the US
Nominatim is able to use the official [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)
address set to complement the OSM house number data in the US. You can add
TIGER data to your own Nominatim instance by following these steps. The
entire US adds about 10GB to your database.
1. Get preprocessed TIGER 2019 data and unpack it into the
data directory in your Nominatim sources:
cd Nominatim/data
wget https://nominatim.org/data/tiger2019-nominatim-preprocessed.tar.gz
tar xf tiger2019-nominatim-preprocessed.tar.gz
`data-source/us-tiger/README.md` explains how the data got preprocessed.
2. Import the data into your Nominatim database:
./utils/setup.php --import-tiger-data
3. Enable use of the Tiger data in your `settings/local.php` by adding:
@define('CONST_Use_US_Tiger_Data', true);
4. Apply the new settings:
```sh
./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
```
## Updates
There are many different ways to update your Nominatim database.
The following section describes how to keep it up-to-date with Pyosmium.
For a list of other methods see the output of `./utils/update.php --help`.
#### Installing the newest version of Pyosmium
It is recommended to install Pyosmium via pip. Make sure to use python3.
Run (as the same user who will later run the updates):
```sh
pip3 install --user osmium
```
Nominatim needs a tool called `pyosmium-get-updates` which comes with
Pyosmium. You need to tell Nominatim where to find it. Add the
following line to your `settings/local.php`:
@define('CONST_Pyosmium_Binary', '/home/user/.local/bin/pyosmium-get-changes');
The path above is fine if you used the `--user` parameter with pip.
Replace `user` with your user name.
#### Setting up the update process
Next the update needs to be initialised. By default Nominatim is configured
to update using the global minutely diffs.
If you want a different update source you will need to add some settings
to `settings/local.php`. For example, to use the daily country extracts
diffs for Ireland from Geofabrik add the following:
// base URL of the replication service
@define('CONST_Replication_Url', 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates');
// How often upstream publishes diffs
@define('CONST_Replication_Update_Interval', '86400');
// How long to sleep if no update found yet
@define('CONST_Replication_Recheck_Interval', '900');
To set up the update process now run the following command:
./utils/update.php --init-updates
It outputs the date where updates will start. Recheck that this date is
what you expect.
The `--init-updates` command needs to be rerun whenever the replication service
is changed.
#### Updating Nominatim
The following command will keep your database constantly up to date:
./utils/update.php --import-osmosis-all
(Note that even though the old name "import-osmosis-all" has been kept for compatibility reasons, Osmosis is not required to run this - it uses pyosmium behind the scenes.)
If you have imported multiple country extracts and want to keep them
up-to-date, have a look at the script in
[issue #60](https://github.com/openstreetmap/Nominatim/issues/60).

306
docs/admin/Import.md Normal file
View File

@@ -0,0 +1,306 @@
# Importing the Database
The following instructions explain how to create a Nominatim database
from an OSM planet file. It is assumed that you have already successfully
installed the Nominatim software itself and the `nominatim` tool can be found
in your `PATH`. If this is not the case, return to the
[installation page](Installation.md).
## Creating the project directory
Before you start the import, you should create a project directory for your
new database installation. This directory receives all data that is related
to a single Nominatim setup: configuration, extra data, etc. Create a project
directory apart from the Nominatim software and change into the directory:
```
mkdir ~/nominatim-planet
cd ~/nominatim-planet
```
In the following, we refer to the project directory as `$PROJECT_DIR`. To be
able to copy&paste instructions, you can export the appropriate variable:
```
export PROJECT_DIR=~/nominatim-planet
```
The Nominatim tool assumes per default that the current working directory is
the project directory but you may explicitly state a different directory using
the `--project-dir` parameter. The following instructions assume that you run
all commands from the project directory.
!!! tip "Migration Tip"
Nominatim used to be run directly from the build directory until version 3.6.
Essentially, the build directory functioned as the project directory
for the database installation. This setup still works and can be useful for
development purposes. It is not recommended anymore for production setups.
Create a project directory that is separate from the Nominatim software.
### Configuration setup in `.env`
The Nominatim server can be customized via an `.env` configuration file in the
project directory. This is a file in [dotenv](https://github.com/theskumar/python-dotenv)
format which looks the same as variable settings in a standard shell environment.
You can also set the same configuration via environment variables. All
settings have a `NOMINATIM_` prefix to avoid conflicts with other environment
variables.
There are lots of configuration settings you can tweak. Have a look
at `settings/env.default` for a full list. Most should have a sensible default.
#### Flatnode files
If you plan to import a large dataset (e.g. Europe, North America, planet),
you should also enable flatnode storage of node locations. With this
setting enabled, node coordinates are stored in a simple file instead
of the database. This will save you import time and disk storage.
Add to your `.env`:
NOMINATIM_FLATNODE_FILE="/path/to/flatnode.file"
Replace the second part with a suitable path on your system and make sure
the directory exists. There should be at least 75GB of free space.
## Downloading additional data
### Wikipedia/Wikidata rankings
Wikipedia can be used as an optional auxiliary data source to help indicate
the importance of OSM features. Nominatim will work without this information
but it will improve the quality of the results if this is installed.
This data is available as a binary download. Put it into your project directory:
cd $PROJECT_DIR
wget https://www.nominatim.org/data/wikimedia-importance.sql.gz
The file is about 400MB and adds around 4GB to the Nominatim database.
!!! tip
If you forgot to download the wikipedia rankings, you can also add
importances after the import. Download the files, then run
`nominatim refresh --wiki-data --importance`. Updating importances for
a planet can take a couple of hours.
### Great Britain, USA postcodes
Nominatim can use postcodes from an external source to improve searches that
involve a GB or US postcode. This data can be optionally downloaded into the
project directory:
cd $PROJECT_DIR
wget https://www.nominatim.org/data/gb_postcode_data.sql.gz
wget https://www.nominatim.org/data/us_postcode_data.sql.gz
## Choosing the data to import
In its default setup Nominatim is configured to import the full OSM data
set for the entire planet. Such a setup requires a powerful machine with
at least 64GB of RAM and around 900GB of SSD hard disks. Depending on your
use case there are various ways to reduce the amount of data imported. This
section discusses these methods. They can also be combined.
### Using an extract
If you only need geocoding for a smaller region, then precomputed OSM extracts
are a good way to reduce the database size and import time.
[Geofabrik](https://download.geofabrik.de) offers extracts for most countries.
They even have daily updates which can be used with the update process described
[in the next section](../Update). There are also
[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading).
Please be aware that some extracts are not cut exactly along the country
boundaries. As a result some parts of the boundary may be missing which means
that Nominatim cannot compute the areas for some administrative areas.
### Dropping Data Required for Dynamic Updates
About half of the data in Nominatim's database is not really used for serving
the API. It is only there to allow the data to be updated from the latest
changes from OSM. For many uses these dynamic updates are not really required.
If you don't plan to apply updates, you can run the import with the
`--no-updates` parameter. This will drop the dynamic part of the database as
soon as it is not required anymore.
You can also drop the dynamic part later using the following command:
```
nominatim freeze
```
Note that you still need to provide for sufficient disk space for the initial
import. So this option is particularly interesting if you plan to transfer the
database or reuse the space later.
### Reverse-only Imports
If you only want to use the Nominatim database for reverse lookups or
if you plan to use the installation only for exports to a
[photon](https://photon.komoot.de/) database, then you can set up a database
without search indexes. Add `--reverse-only` to your setup command above.
This saves about 5% of disk space.
### Filtering Imported Data
Nominatim normally sets up a full search database containing administrative
boundaries, places, streets, addresses and POI data. There are also other
import styles available which only read selected data:
* **settings/import-admin.style**
Only import administrative boundaries and places.
* **settings/import-street.style**
Like the admin style but also adds streets.
* **settings/import-address.style**
Import all data necessary to compute addresses down to house number level.
* **settings/import-full.style**
Default style that also includes points of interest.
* **settings/import-extratags.style**
Like the full style but also adds most of the OSM tags into the extratags
column.
The style can be changed with the configuration `NOMINATIM_IMPORT_STYLE`.
To give you an idea of the impact of using the different styles, the table
below gives rough estimates of the final database size after import of a
2020 planet and after using the `--drop` option. It also shows the time
needed for the import on a machine with 64GB RAM, 4 CPUS and NVME disks.
Note that the given sizes are just an estimate meant for comparison of
style requirements. Your planet import is likely to be larger as the
OSM data grows with time.
style | Import time | DB size | after drop
----------|--------------|------------|------------
admin | 4h | 215 GB | 20 GB
street | 22h | 440 GB | 185 GB
address | 36h | 545 GB | 260 GB
full | 54h | 640 GB | 330 GB
extratags | 54h | 650 GB | 340 GB
You can also customize the styles further.
A [description of the style format](../develop/Import.md#configuring-the-import)
can be found in the development section.
## Initial import of the data
!!! danger "Important"
First try the import with a small extract, for example from
[Geofabrik](https://download.geofabrik.de).
Download the data to import. Then issue the following command
from the **build directory** to start the import:
```sh
nominatim import --osm-file <data file> 2>&1 | tee setup.log
```
### Notes on full planet imports
Even on a perfectly configured machine
the import of a full planet takes around 2 days. Once you see messages
with `Rank .. ETA` appear, the indexing process has started. This part takes
the most time. There are 30 ranks to process. Rank 26 and 30 are the most complex.
They take each about a third of the total import time. If you have not reached
rank 26 after two days of import, it is worth revisiting your system
configuration as it may not be optimal for the import.
### Notes on memory usage
In the first step of the import Nominatim uses [osm2pgsql](https://osm2pgsql.org)
to load the OSM data into the PostgreSQL database. This step is very demanding
in terms of RAM usage. osm2pgsql and PostgreSQL are running in parallel at
this point. PostgreSQL blocks at least the part of RAM that has been configured
with the `shared_buffers` parameter during
[PostgreSQL tuning](Installation#postgresql-tuning)
and needs some memory on top of that. osm2pgsql needs at least 2GB of RAM for
its internal data structures, potentially more when it has to process very large
relations. In addition it needs to maintain a cache for node locations. The size
of this cache can be configured with the parameter `--osm2pgsql-cache`.
When importing with a flatnode file, it is best to disable the node cache
completely and leave the memory for the flatnode file. Nominatim will do this
by default, so you do not need to configure anything in this case.
For imports without a flatnode file, set `--osm2pgsql-cache` approximately to
the size of the OSM pbf file you are importing. The size needs to be given in
MB. Make sure you leave enough RAM for PostgreSQL and osm2pgsql as mentioned
above. If the system starts swapping or you are getting out-of-memory errors,
reduce the cache size or even consider using a flatnode file.
### Testing the installation
Run this script to verify all required tables and indices got created successfully.
```sh
nominatim admin --check-database
```
Now you can try out your installation by running:
```sh
nominatim serve
```
This runs a small test server normally used for development. You can use it
to verify that your installation is working. Go to
`http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
To run Nominatim via webservers like Apache or nginx, please read the
[Deployment chapter](Deployment.md).
## Tuning the database
Accurate word frequency information for search terms helps PostgreSQL's query
planner to make the right decisions. Recomputing them can improve the performance
of forward geocoding in particular under high load. To recompute word counts run:
```sh
nominatim refresh --word-counts
```
This will take a couple of hours for a full planet installation. You can
also defer that step to a later point in time when you realise that
performance becomes an issue. Just make sure that updates are stopped before
running this function.
If you want to be able to search for places by their type through
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to import these key phrases like this:
nominatim special-phrases --import-from-wiki
Note that this command downloads the phrases from the wiki link above. You
need internet access for the step.
## Installing Tiger housenumber data for the US
Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
address set to complement the OSM house number data in the US. You can add
TIGER data to your own Nominatim instance by following these steps. The
entire US adds about 10GB to your database.
1. Get preprocessed TIGER 2020 data:
cd $PROJECT_DIR
wget https://nominatim.org/data/tiger2020-nominatim-preprocessed.tar.gz
2. Import the data into your Nominatim database:
nominatim add-data --tiger-data tiger2020-nominatim-preprocessed.tar.gz
3. Enable use of the Tiger data in your `.env` by adding:
echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
4. Apply the new settings:
nominatim refresh --functions
See the [developer's guide](../develop/data-sources.md#us-census-tiger) for more
information on how the data got preprocessed.

View File

@@ -4,8 +4,9 @@ This page contains generic installation instructions for Nominatim and its
prerequisites. There are also step-by-step instructions available for
the following operating systems:
* [Ubuntu 20.04](../appendix/Install-on-Ubuntu-20.md)
* [Ubuntu 18.04](../appendix/Install-on-Ubuntu-18.md)
* [Ubuntu 16.04](../appendix/Install-on-Ubuntu-16.md)
* [CentOS 8](../appendix/Install-on-Centos-8.md)
* [CentOS 7.2](../appendix/Install-on-Centos-7.md)
These OS-specific instructions can also be found in executable form
@@ -25,65 +26,80 @@ and can't offer support.
For compiling:
* [cmake](https://cmake.org/)
* [libxml2](http://xmlsoft.org/)
* a recent C++ compiler
Nominatim comes with its own version of osm2pgsql. See the
osm2pgsql README for additional dependencies required for compiling osm2pgsql.
For running tests:
* [behave](http://pythonhosted.org/behave/)
* [Psycopg2](https://initd.org/psycopg)
* [nose](https://nose.readthedocs.io)
* [phpunit](https://phpunit.de)
* [expat](https://libexpat.github.io/)
* [proj](https://proj.org/)
* [bzip2](http://www.bzip.org/)
* [zlib](https://www.zlib.net/)
* [ICU](http://site.icu-project.org/)
* [Boost libraries](https://www.boost.org/), including system and filesystem
* PostgreSQL client libraries
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
For running Nominatim:
* [PostgreSQL](https://www.postgresql.org) (9.3 or later)
* [PostGIS](https://postgis.org) (2.2 or later)
* [PostgreSQL](https://www.postgresql.org) (9.3+ will work, 11+ strongly recommended)
* [PostGIS](https://postgis.net) (2.2+)
* [Python 3](https://www.python.org/) (3.5+)
* [Psycopg2](https://www.psycopg.org) (2.7+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
* [PyICU](https://pypi.org/project/PyICU/)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
* [PEAR::DB](https://pear.php.net/package/DB)
* a webserver (apache or nginx are recommended)
* PHP-cgi (for running queries from the command line)
For running continuous updates:
* [pyosmium](https://osmcode.org/pyosmium/) (with Python 3)
* [pyosmium](https://osmcode.org/pyosmium/)
For dependencies for running tests and building documentation, see
the [Development section](../develop/Development-Environment.md).
### Hardware
A minimum of 2GB of RAM is required or installation will fail. For a full
planet import 32GB of RAM or more are strongly recommended.
planet import 64GB of RAM or more are strongly recommended. Do not report
out of memory problems if you have less than 64GB RAM.
For a full planet install you will need at least 700GB of hard disk space
(take into account that the OSM database is growing fast). SSD disks
will help considerably to speed up import and queries.
For a full planet install you will need at least 900GB of hard disk space.
Take into account that the OSM database is growing fast.
Fast disks are essential. Using NVME disks is recommended.
On a 6-core machine with 32GB RAM and SSDs the import of a full planet takes
a bit more than 2 days. Without SSDs 7-8 days are more realistic.
Even on a well configured machine the import of a full planet takes
around 2 days. On traditional spinning disks, 7-8 days are more realistic.
## Setup of the server
### PostgreSQL tuning
## Tuning the PostgreSQL database
You might want to tune your PostgreSQL installation so that the later steps
make best use of your hardware. You should tune the following parameters in
your `postgresql.conf` file.
shared_buffers (2GB)
maintenance_work_mem (10GB)
work_mem (50MB)
effective_cache_size (24GB)
shared_buffers = 2GB
maintenance_work_mem = (10GB)
autovacuum_work_mem = 2GB
work_mem = (50MB)
effective_cache_size = (24GB)
synchronous_commit = off
checkpoint_segments = 100 # only for postgresql <= 9.4
max_wal_size = 1GB # postgresql > 9.4
checkpoint_timeout = 10min
checkpoint_completion_target = 0.9
The numbers in brackets behind some parameters seem to work fine for
32GB RAM machine. Adjust to your setup.
64GB RAM machine. Adjust to your setup. A higher number for `max_wal_size`
means that PostgreSQL needs to run checkpoints less often but it does require
the additional space on your disk.
Autovacuum must not be switched off because it ensures that the
tables are frequently analysed. If your machine has very little memory,
you might consider setting:
autovacuum_max_workers = 1
and even reduce `autovacuum_work_mem` further. This will reduce the amount
of memory that autovacuum takes away from the import process.
For the initial import, you should also set:
@@ -91,68 +107,57 @@ For the initial import, you should also set:
full_page_writes = off
Don't forget to reenable them after the initial import or you risk database
corruption. Autovacuum must not be switched off because it ensures that the
tables are frequently analysed.
### Webserver setup
The `website/` directory in the build directory contains the configured
website. Include the directory into your webbrowser to serve php files
from there.
#### Configure for use with Apache
Make sure your Apache configuration contains the required permissions for the
directory and create an alias:
<Directory "/srv/nominatim/build/website">
Options FollowSymLinks MultiViews
AddType text/html .php
DirectoryIndex search.php
Require all granted
</Directory>
Alias /nominatim /srv/nominatim/build/website
`/srv/nominatim/build` should be replaced with the location of your
build directory.
After making changes in the apache config you need to restart apache.
The website should now be available on http://localhost/nominatim.
#### Configure for use with Nginx
Use php-fpm as a deamon for serving PHP cgi. Install php-fpm together with nginx.
By default php listens on a network socket. If you want it to listen to a
Unix socket instead, change the pool configuration (`pool.d/www.conf`) as
follows:
; Comment out the tcp listener and add the unix socket
;listen = 127.0.0.1:9000
listen = /var/run/php5-fpm.sock
; Ensure that the daemon runs as the correct user
listen.owner = www-data
listen.group = www-data
listen.mode = 0666
Tell nginx that php files are special and to fastcgi_pass to the php-fpm
unix socket by adding the location definition to the default configuration.
root /srv/nominatim/build/website;
index search.php index.html;
location ~ [^/]\.php(/|$) {
fastcgi_split_path_info ^(.+?\.php)(/.*)$;
if (!-f $document_root$fastcgi_script_name) {
return 404;
}
fastcgi_pass unix:/var/run/php5-fpm.sock;
fastcgi_index search.php;
include fastcgi.conf;
}
Restart the nginx and php5-fpm services and the website should now be available
at `http://localhost/`.
corruption.
Now continue with [importing the database](Import-and-Update.md).
## Downloading and building Nominatim
### Downloading the latest release
You can download the [latest release from nominatim.org](https://nominatim.org/downloads/).
The release contains all necessary files. Just unpack it.
### Downloading the latest development version
If you want to install latest development version from github, make sure to
also check out the osm2pgsql subproject:
```
git clone --recursive git://github.com/openstreetmap/Nominatim.git
```
The development version does not include the country grid. Download it separately:
```
wget -O Nominatim/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
```
### Building Nominatim
The code must be built in a separate directory. Create the directory and
change into it.
```
mkdir build
cd build
```
Nominatim uses cmake and make for building. Assuming that you have created the
build at the same level as the Nominatim source directory run:
```
cmake ../Nominatim
make
sudo make install
```
Nominatim installs itself into `/usr/local` per default. To choose a different
installation directory add `-DCMAKE_INSTALL_PREFIX=<install root>` to the
cmake command. Make sure that the `bin` directory is available in your path
in that case, e.g.
```
export PATH=<install root>/bin:$PATH
```
Now continue with [importing the database](Import.md).

View File

@@ -1,10 +1,154 @@
# Database Migrations
This page describes database migrations necessary to update existing databases
to newer versions of Nominatim.
Since version 3.7.0 Nominatim offers automatic migrations. Please follow
the following steps:
SQL statements should be executed from the PostgreSQL commandline. Execute
`psql nominatim` to enter command line mode.
* stop any updates that are potentially running
* update Nominatim to the newer version
* go to your project directory and run `nominatim admin --migrate`
* (optionally) restart updates
Below you find additional migrations and hints about other structural and
breaking changes. **Please read them before running the migration.**
!!! note
If you are migrating from a version <3.6, then you still have to follow
the manual migration steps up to 3.6.
## 3.6.0 -> 3.7.0
### New location for data files
External data files for Wikipedia importance, postcodes etc. are no longer
expected to reside in the source tree by default. Instead they will be searched
in the project directory. If you have an automated setup script you must
either adapt the download location or explicitly set the location of the
files to the old place in your `.env`.
### Introducing `nominatim` command line tool
The various php utilities have been replaced with a single `nominatim`
command line tool. Make sure to adapt any scripts. There is no direct 1:1
matching between the old utilities and the commands of nominatim CLI. The
following list gives you a list of nominatim sub-commands that contain
functionality of each script:
* ./utils/setup.php: `import`, `freeze`, `refresh`
* ./utils/update.php: `replication`, `add-data`, `index`, `refresh`
* ./utils/specialphrases.php: `special-phrases`
* ./utils/check_import_finished.php: `admin`
* ./utils/warm.php: `admin`
* ./utils/export.php: `export`
Try `nominatim <command> --help` for more information about each subcommand.
`./utils/query.php` no longer exists in its old form. `nominatim search`
provides a replacement but returns different output.
### Switch to normalized house numbers
The housenumber column in the placex table uses now normalized version.
The automatic migration step will convert the column but this may take a
very long time. It is advisable to take the machine offline while doing that.
## 3.5.0 -> 3.6.0
### Change of layout of search_name_* tables
The table need a different index for nearest place lookup. Recreate the
indexes using the following shell script:
```bash
for table in `psql -d nominatim -c "SELECT tablename FROM pg_tables WHERE tablename LIKE 'search_name_%'" -tA | grep -v search_name_blank`;
do
psql -d nominatim -c "DROP INDEX idx_${table}_centroid_place; CREATE INDEX idx_${table}_centroid_place ON ${table} USING gist (centroid) WHERE ((address_rank >= 2) AND (address_rank <= 25)); DROP INDEX idx_${table}_centroid_street; CREATE INDEX idx_${table}_centroid_street ON ${table} USING gist (centroid) WHERE ((address_rank >= 26) AND (address_rank <= 27))";
done
```
### Removal of html output
The debugging UI is no longer directly provided with Nominatim. Instead we
now provide a simple Javascript application. Please refer to
[Setting up the Nominatim UI](../Setup-Nominatim-UI) for details on how to
set up the UI.
The icons served together with the API responses have been moved to the
nominatim-ui project as well. If you want to keep the `icon` field in the
response, you need to set `CONST_MapIcon_URL` to the URL of the `/mapicon`
directory of nominatim-ui.
### Change order during indexing
When reindexing places during updates, there is now a different order used
which needs a different database index. Create it with the following SQL command:
```sql
CREATE INDEX idx_placex_pendingsector_rank_address
ON placex
USING BTREE (rank_address, geometry_sector)
WHERE indexed_status > 0;
```
You can then drop the old index with:
```sql
DROP INDEX idx_placex_pendingsector;
```
### Unused index
This index has been unused ever since the query using it was changed two years ago. Saves about 12GB on a planet installation.
```sql
DROP INDEX idx_placex_geometry_reverse_lookupPoint;
```
### Switching to dotenv
As part of the work changing the configuration format, the configuration for
the website is now using a separate configuration file. To create the
configuration file, run the following command after updating:
```sh
./utils/setup.php --setup-website
```
## 3.4.0 -> 3.5.0
### New Wikipedia/Wikidata importance tables
The `wikipedia_*` tables have a new format that also includes references to
Wikidata. You need to update the computation functions and the tables as
follows:
* download the new Wikipedia tables as described in the import section
* reimport the tables: `./utils/setup.php --import-wikipedia-articles`
* update the functions: `./utils/setup.php --create-functions --enable-diff-updates`
* create a new lookup index:
```sql
CREATE INDEX idx_placex_wikidata
ON placex
USING BTREE ((extratags -> 'wikidata'))
WHERE extratags ? 'wikidata'
AND class = 'place'
AND osm_type = 'N'
AND rank_search < 26;
```
* compute importance: `./utils/update.php --recompute-importance`
The last step takes about 10 hours on the full planet.
Remove one function (it will be recreated in the next step):
```sql
DROP FUNCTION create_country(hstore,character varying);
```
Finally, update all SQL functions:
```sh
./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
```
## 3.3.0 -> 3.4.0
@@ -23,6 +167,12 @@ CREATE INDEX idx_location_area_country_geometry ON location_area_country USING G
CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id);
```
Finally, update all SQL functions:
```sh
./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
```
## 3.2.0 -> 3.3.0
### New database connection string (DSN) format
@@ -39,7 +189,7 @@ The new format is
### Natural Earth country boundaries no longer needed as fallback
```
```sql
DROP TABLE country_naturalearthdata;
```
@@ -65,27 +215,37 @@ following command:
The reverse algorithm has changed and requires new indexes. Run the following
SQL statements to create the indexes:
```
```sql
CREATE INDEX idx_placex_geometry_reverse_lookupPoint
ON placex USING gist (geometry)
WHERE (name is not null or housenumber is not null or rank_address between 26 and 27)
AND class not in ('railway','tunnel','bridge','man_made')
AND rank_address >= 26 AND indexed_status = 0 AND linked_place_id is null;
ON placex
USING gist (geometry)
WHERE (name IS NOT null or housenumber IS NOT null or rank_address BETWEEN 26 AND 27)
AND class NOT IN ('railway','tunnel','bridge','man_made')
AND rank_address >= 26
AND indexed_status = 0
AND linked_place_id IS null;
CREATE INDEX idx_placex_geometry_reverse_lookupPolygon
ON placex USING gist (geometry)
WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
AND rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
AND rank_address between 4 and 25
AND type != 'postcode'
AND name is not null
AND indexed_status = 0
AND linked_place_id is null;
CREATE INDEX idx_placex_geometry_reverse_placeNode
ON placex USING gist (geometry)
WHERE osm_type = 'N' AND rank_search between 5 and 25
AND class = 'place' AND type != 'postcode'
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
WHERE osm_type = 'N'
AND rank_search between 5 and 25
AND class = 'place'
AND type != 'postcode'
AND name is not null
AND indexed_status = 0
AND linked_place_id is null;
```
You also need to grant the website user access to the `country_osm_grid` table:
```
```sql
GRANT SELECT ON table country_osm_grid to "www-user";
```
@@ -93,7 +253,7 @@ Replace the `www-user` with the user name of your website server if necessary.
You can now drop the unused indexes:
```
```sql
DROP INDEX idx_placex_reverse_geometry;
```
@@ -122,8 +282,8 @@ CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry);
CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id);
CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode);
GRANT SELECT ON location_postcode TO "www-data";
drop type if exists nearfeaturecentr cascade;
create type nearfeaturecentr as (
DROP TYPE IF EXISTS nearfeaturecentr CASCADE;
CREATE TYPE nearfeaturecentr AS (
place_id BIGINT,
keywords int[],
rank_address smallint,

View File

@@ -0,0 +1,185 @@
# Setting up the Nominatim UI
Nominatim is a search API, it does not provide a website interface on its
own. [nominatim-ui](https://github.com/osm-search/nominatim-ui) offers a
small website for testing your setup and inspecting the database content.
This section provides a quick start how to use nominatim-ui with your
installation. For more details, please also have a look at the
[README of nominatim-ui](https://github.com/osm-search/nominatim-ui/blob/master/README.md).
## Installing nominatim-ui
nominatim-ui does not need any special installation, just download, configure
and run it.
Clone the source from github:
git clone https://github.com/osm-search/nominatim-ui
Copy the example configuration into the right place:
cd nominatim-ui
cp dist/config.example.js dist/config.js
Now adapt the configuration to your needs. You need at least
to change the `Nominatim_API_Endpoint` to point to your Nominatim installation.
Then you can just test it locally by spinning up a webserver in the `dist`
directory. For example, with Python:
cd nominatim-ui/dist
python3 -m http.server 8765
The website is now available at `http://localhost:8765`.
## Forwarding searches to nominatim-ui
Nominatim used to provide the search interface directly by itself when
`format=html` was requested. For all endpoints except for `/reverse` and
`/lookup` this even used to be the default.
The following section describes how to set up Apache or nginx, so that your
users are forwarded to nominatim-ui when they go to URL that formerly presented
the UI.
### Setting up forwarding in Nginx
First of all make nominatim-ui available under `/ui` on your webserver:
``` nginx
server {
# Here is the Nominatim setup as described in the Installation section
location /ui/ {
alias <full path to the nominatim-ui directory>/dist/;
index index.html;
}
}
```
Now we need to find out if a URL should be forwarded to the UI. Add the
following `map` commands *outside* the server section:
``` nginx
# Inspect the format parameter in the query arguments. We are interested
# if it is set to html or something else or if it is missing completely.
map $args $format {
default default;
~(^|&)format=html(&|$) html;
~(^|&)format= other;
}
# Determine from the URI and the format parameter above if forwarding is needed.
map $uri/$format $forward_to_ui {
default 1; # The default is to forward.
~^/ui 0; # If the URI point to the UI already, we are done.
~/other$ 0; # An explicit non-html format parameter. No forwarding.
~/reverse.*/default 0; # Reverse and lookup assume xml format when
~/lookup.*/default 0; # no format parameter is given. No forwarding.
}
```
The `$forward_to_ui` parameter can now be used to conditionally forward the
calls:
```
# When no endpoint is given, default to search.
# Need to add a rewrite so that the rewrite rules below catch it correctly.
rewrite ^/$ /search;
location @php {
# fastcgi stuff..
if ($forward_to_ui) {
rewrite ^(/[^/]*) https://yourserver.com/ui$1.html redirect;
}
}
location ~ [^/]\.php(/|$) {
# fastcgi stuff..
if ($forward_to_ui) {
rewrite (.*).php https://yourserver.com/ui$1.html redirect;
}
}
```
!!! warning
Be aware that the rewrite commands are slightly different for URIs with and
without the .php suffix.
Reload nginx and the UI should be available.
### Setting up forwarding in Apache
First of all make nominatim-ui available in the `ui/` subdirectory where
Nominatim is installed. For example, given you have set up an alias under
`nominatim` like this:
``` apache
Alias /nominatim /home/vagrant/build/website
```
you need to insert the following rules for nominatim-ui before that alias:
```
<Directory "/home/vagrant/nominatim-ui/dist">
DirectoryIndex search.html
Require all granted
</Directory>
Alias /nominatim/ui /home/vagrant/nominatim-ui/dist
```
Replace `/home/vagrant/nominatim-ui` with the directory where you have cloned
nominatim-ui.
!!! important
The alias for nominatim-ui must come before the alias for the Nominatim
website directory.
To set up forwarding, the Apache rewrite module is needed. Enable it with:
``` sh
sudo a2enmod rewrite
```
Then add rewrite rules to the `Directory` directive of the Nominatim website
directory like this:
``` apache
<Directory "/home/vagrant/build/website">
Options FollowSymLinks MultiViews
AddType text/html .php
Require all granted
RewriteEngine On
# This must correspond to the URL where nominatim can be found.
RewriteBase "/nominatim/"
# If no endpoint is given, then use search.
RewriteRule ^(/|$) "search.php"
# If format-html is explicity requested, forward to the UI.
RewriteCond %{QUERY_STRING} "format=html"
RewriteRule ^([^/]+).php ui/$1.html [R,END]
# Same but .php suffix is missing.
RewriteCond %{QUERY_STRING} "format=html"
RewriteRule ^([^/]+) ui/$1.html [R,END]
# If no format parameter is there then forward anything
# but /reverse and /lookup to the UI.
RewriteCond %{QUERY_STRING} "!format="
RewriteCond %{REQUEST_URI} "!/lookup"
RewriteCond %{REQUEST_URI} "!/reverse"
RewriteRule ^([^/]+).php ui/$1.html [R,END]
# Same but .php suffix is missing.
RewriteCond %{QUERY_STRING} "!format="
RewriteCond %{REQUEST_URI} "!/lookup"
RewriteCond %{REQUEST_URI} "!/reverse"
RewriteRule ^([^/]+) ui/$1.html [R,END]
</Directory>
```
Restart Apache and the UI should be available.

56
docs/admin/Update.md Normal file
View File

@@ -0,0 +1,56 @@
# Updating the Database
There are many different ways to update your Nominatim database.
The following section describes how to keep it up-to-date using
an [online replication service for OpenStreetMap data](https://wiki.openstreetmap.org/wiki/Planet.osm/diffs)
For a list of other methods to add or update data see the output of
`nominatim add-data --help`.
!!! important
If you have configured a flatnode file for the import, then you
need to keep this flatnode file around for updates.
#### Installing the newest version of Pyosmium
It is recommended to install Pyosmium via pip. Make sure to use python3.
Run (as the same user who will later run the updates):
```sh
pip3 install --user osmium
```
#### Setting up the update process
Next the update needs to be initialised. By default Nominatim is configured
to update using the global minutely diffs.
If you want a different update source you will need to add some settings
to `.env`. For example, to use the daily country extracts
diffs for Ireland from Geofabrik add the following:
# base URL of the replication service
NOMINATIM_REPLICATION_URL="https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates"
# How often upstream publishes diffs
NOMINATIM_REPLICATION_UPDATE_INTERVAL=86400
# How long to sleep if no update found yet
NOMINATIM_REPLICATION_RECHECK_INTERVAL=900
To set up the update process now run the following command:
nominatim replication --init
It outputs the date where updates will start. Recheck that this date is
what you expect.
The `replication --init` command needs to be rerun whenever the replication
service is changed.
#### Updating Nominatim
The following command will keep your database constantly up to date:
nominatim replication
If you have imported multiple country extracts and want to keep them
up-to-date, [Advanced installations section](Advanced-Installations.md) contains instructions
to set up and update multiple country extracts.

View File

@@ -1,19 +1,22 @@
# Place details
Lookup details about a single place by id. The default output is HTML for debugging search logic and results.
Show all details about a single place saved in the database.
**The details page (including JSON output) exists for debugging only and must not be downloaded automatically**, see [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
!!! warning
The details page exists for debugging only. You may not use it in scripts
or to automatically query details about a result.
See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
## Parameters
The details API supports the following two request formats:
```
https://nominatim.openstreetmap.org/details?osmtype=[N|W|R]&osmid=<value>&class=<value>
``` xml
https://nominatim.openstreetmap.org/details?osmtype=[N|W|R]&osmid=<value>&class=<value>
```
`osmtype` and `osmid` are required parameter. The type is one of node (N), way (W)
`osmtype` and `osmid` are required parameters. The type is one of node (N), way (W)
or relation (R). The id must be a number. The `class` parameter is optional and
allows to distinguish between entries, when the corresponding OSM object has more
than one main tag. For example, when a place is tagged with `tourism=hotel` and
@@ -23,36 +26,34 @@ to get exactly the one you want. If there are multiple places in the database
but the `class` parameter is left out, then one of the places will be chosen
at random and displayed.
```
https://nominatim.openstreetmap.org/details?place_id=<value>
``` xml
https://nominatim.openstreetmap.org/details?place_id=<value>
```
Placeids are assigned sequentially during Nominatim data import. The id for a place is different between Nominatim installation (servers) and changes when data gets reimported. Therefore it can't be used as permanent id and shouldn't be used in bug reports.
Place IDs are assigned sequentially during Nominatim data import. The ID
for a place is different between Nominatim installation (servers) and
changes when data gets reimported. Therefore it cannot be used as
a permanent id and shouldn't be used in bug reports.
Additional optional parameters are explained below.
### Output format
* `format=[html|json]`
See [Place Output Formats](Output.md) for details on each format. (Default: html)
* `json_callback=<string>`
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
* `pretty=[0|1]`
For JSON output will add indentation to make it more human-readable. (Default: 0)
Add indentation to make it more human-readable. (Default: 0)
### Output details
* `addressdetails=[0|1]`
Include a breakdown of the address into elements. (Default for JSON: 0, for HTML: 1)
Include a breakdown of the address into elements. (Default: 0)
* `keywords=[0|1]`
@@ -60,11 +61,16 @@ Include a list of name keywords and address keywords (word ids). (Default: 0)
* `linkedplaces=[0|1]`
Include details of places higher in the address hierarchy. E.g. for a street this is usually the city, state, postal code, country. (Default: 1)
Include a details of places that are linked with this one. Places get linked
together when they are different forms of the same physical object. Nominatim
links two kinds of objects together: place nodes get linked with the
corresponding administrative boundaries. Waterway relations get linked together with their
members.
(Default: 1)
* `hierarchy=[0|1]`
Include details of places lower in the address hierarchy. E.g. for a city this usually a list of streets, suburbs, rivers. (Default for JSON: 0, for HTML: 1)
Include details of places lower in the address hierarchy. (Default: 0)
* `group_hierarchy=[0|1]`
@@ -72,7 +78,7 @@ For JSON output will group the places by type. (Default: 0)
* `polygon_geojson=[0|1]`
Include geometry of result. (Default for JSON: 0, for HTML: 1)
Include geometry of result. (Default: 0)
### Language of results
@@ -86,10 +92,6 @@ comma-separated list of language codes.
## Examples
##### HTML
[https://nominatim.openstreetmap.org/details.php?osmtype=W&osmid=38210407](https://nominatim.openstreetmap.org/details.php?osmtype=W&osmid=38210407)
##### JSON
[https://nominatim.openstreetmap.org/details.php?osmtype=W&osmid=38210407&format=json](https://nominatim.openstreetmap.org/details.php?osmtype=W&osmid=38210407&format=json)

View File

@@ -58,4 +58,4 @@ The [Overpass API](https://wiki.openstreetmap.org/wiki/Overpass_API) is more
suited for these kinds of queries.
That said if you installed your own Nominatim instance you can use the
`/utils/export.php` PHP script as basis to return such lists.
`nominatim export` PHP script as basis to return such lists.

View File

@@ -56,6 +56,21 @@ specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
### Polygon output
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
* `polygon_threshold=0.0`
Return a simplified version of the output geometry. The parameter is the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
### Other

View File

@@ -2,12 +2,10 @@
The [/reverse](Reverse.md), [/search](Search.md) and [/lookup](Lookup.md)
API calls produce very similar output which is explained in this section.
There is one section for each format which is selectable via the `format`
parameter.
There is one section for each format. The format correspond to what was
selected via the `format` parameter.
## Formats
### JSON
## JSON
The JSON format returns an array of places (for search and lookup) or
a single place (for reverse) of the following format:
@@ -41,48 +39,50 @@ a single place (for reverse) of the following format:
"wikipedia": "en:London",
"population": "8416535"
}
},
}
```
The possible fields are:
* `place_id` - reference to the Nominatim internal database ID (see notes below)
* `osm_type`, `osm_id` - reference to the OSM object
* `boundingbox` - area of corner coordinates
* `place_id` - reference to the Nominatim internal database ID ([see notes](#place_id-is-not-a-persistent-id))
* `osm_type`, `osm_id` - reference to the OSM object ([see notes](#osm-reference))
* `boundingbox` - area of corner coordinates ([see notes](#boundingbox))
* `lat`, `lon` - latitude and longitude of the centroid of the object
* `display_name` - full comma-separated address
* `class`, `type` - key and value of the main OSM tag
* `importance` - computed importance rank
* `icon` - link to class icon (if available)
* `address` - dictionary of address details (only with `addressdetails=1`)
* `address` - dictionary of address details (only with `addressdetails=1`,
[see notes](#addressdetails))
* `extratags` - dictionary with additional useful tags like website or maxspeed
(only with `extratags=1`)
* `namedetails` - dictionary with full list of available names including ref etc.
* `geojson`, `svg`, `geotext`, `geokml` - full geometry
(only with the appropriate `polygon_*` parameter)
### JSONv2
## JSONv2
This is the same as the JSON format with two changes:
* `class` renamed to `category`
* additional field `place_rank` with the search rank of the object
### GeoJSON
## GeoJSON
This format follows the [RFC7946](https://geojson.org). Every feature includes
a bounding box (`bbox`).
The feature list has the following fields:
The properties object has the following fields:
* `place_id` - reference to the Nominatim internal database ID (see notes below)
* `osm_type`, `osm_id` - reference to the OSM object
* `place_id` - reference to the Nominatim internal database ID ([see notes](#place_id-is-not-a-persistent-id))
* `osm_type`, `osm_id` - reference to the OSM object ([see notes](#osm-reference))
* `category`, `type` - key and value of the main OSM tag
* `display_name` - full comma-separated address
* `place_rank` - class search rank
* `importance` - computed importance rank
* `icon` - link to class icon (if available)
* `address` - dictionary of address details (only with `addressdetails=1`)
* `address` - dictionary of address details (only with `addressdetails=1`,
[see notes](#addressdetails))
* `extratags` - dictionary with additional useful tags like `website` or `maxspeed`
(only with `extratags=1`)
* `namedetails` - dictionary with full list of available names including ref etc.
@@ -90,39 +90,36 @@ The feature list has the following fields:
Use `polygon_geojson` to output the full geometry of the object instead
of the centroid.
### GeocodeJSON
## GeocodeJSON
The GeocodeJSON format follows the
[GeocodeJSON spec 0.1.0](https://github.com/geocoders/geocodejson-spec).
The following feature attributes are implemented:
* `osm_type`, `osm_id` - reference to the OSM object (unofficial extension)
* `osm_type`, `osm_id` - reference to the OSM object (unofficial extension, [see notes](#osm-reference))
* `type` - value of the main tag of the object (e.g. residential, restaurant, ...)
* `label` - full comma-separated address
* `name` - localised name of the place
* `housenumber`, `street`, `locality`, `postcode`, `city`,
`district`, `county`, `state`, `country` -
* `housenumber`, `street`, `locality`, `district`, `postcode`, `city`,
`county`, `state`, `country` -
provided when it can be determined from the address
(see [this issue](https://github.com/openstreetmap/Nominatim/issues/1080) for
current limitations on the correctness of the address) and `addressdetails=1`
was given
* `admin` - list of localised names of administrative boundaries (only with `addressdetails=1`)
Use `polygon_geojson` to output the full geometry of the object instead
of the centroid.
### XML
## XML
The XML response returns one or more place objects in slightly different
formats depending on the API call.
#### Reverse
### Reverse
```
<reversegeocode timestamp="Sat, 11 Aug 18 11:53:21 +0000"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright"
querystring="lat=48.400381&lon=11.745876&zoom=5&format=xml">
<result place_id="179509537" osm_type="relation" osm_id="2145268" ref="BY"
<result place_id="179509537" osm_type="relation" osm_id="2145268" ref="BY" place_rank="15" address_rank="15"
lat="48.9467562" lon="11.4038717"
boundingbox="47.2701114,50.5647142,8.9763497,13.8396373">
Bavaria, Germany
@@ -148,11 +145,11 @@ attribution to OSM and the original querystring.
The place information can be found in the `result` element. The attributes of that element contain:
* `place_id` - reference to the Nominatim internal database ID (see notes below)
* `osm_type`, `osm_id` - reference to the OSM object
* `place_id` - reference to the Nominatim internal database ID ([see notes](#place_id-is-not-a-persistent-id))
* `osm_type`, `osm_id` - reference to the OSM object ([see notes](#osm-reference))
* `ref` - content of `ref` tag if it exists
* `lat`, `lon` - latitude and longitude of the centroid of the object
* `boundingbox` - comma-separated list of corner coordinates
* `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))
The full address of the result can be found in the content of the
`result` element as a comma-separated list.
@@ -160,14 +157,14 @@ The full address of the result can be found in the content of the
Additional information requested with `addressdetails=1`, `extratags=1` and
`namedetails=1` can be found in extra elements.
#### Search and Lookup
### Search and Lookup
```
<searchresults timestamp="Sat, 11 Aug 18 11:55:35 +0000"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright"
querystring="london" polygon="false" exclude_place_ids="100149"
more_url="https://nominatim.openstreetmap.org/search.php?q=london&addressdetails=1&extratags=1&exclude_place_ids=100149&format=xml&accept-language=en-US%2Cen%3Bq%3D0.7%2Cde%3Bq%3D0.3">
<place place_id="100149" osm_type="node" osm_id="107775" place_rank="15"
<place place_id="100149" osm_type="node" osm_id="107775" place_rank="15" address_rank="15"
boundingbox="51.3473219,51.6673219,-0.2876474,0.0323526" lat="51.5073219" lon="-0.1276474"
display_name="London, Greater London, England, SW1A 2DU, United Kingdom"
class="place" type="city" importance="0.9654895765402"
@@ -203,12 +200,13 @@ generic information about the query:
The place information can be found in the `place` elements, of which there may
be more than one. The attributes of that element contain:
* `place_id` - reference to the Nominatim internal database ID (see notes below)
* `osm_type`, `osm_id` - reference to the OSM object
* `place_id` - reference to the Nominatim internal database ID ([see notes](#place_id-is-not-a-persistent-id))
* `osm_type`, `osm_id` - reference to the OSM object ([see notes](#osm-reference))
* `ref` - content of `ref` tag if it exists
* `lat`, `lon` - latitude and longitude of the centroid of the object
* `boundingbox` - comma-separated list of corner coordinates
* `place_rank` - class search rank
* `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))
* `place_rank` - class [search rank](../develop/Ranking#search-rank)
* `address_rank` - place [address rank](../develop/Ranking#address-rank)
* `display_name` - full comma-separated address
* `class`, `type` - key and value of the main OSM tag
* `importance` - computed importance rank
@@ -218,17 +216,19 @@ When `addressdetails=1` is requested, the localised address parts appear
as subelements with the type of the address part.
Additional information requested with `extratags=1` and `namedetails=1` can
be found in extra elements as sub-element of each place.
be found in extra elements as sub-element of `extratags` and `namedetails`
respectively.
## Notes on field values
### place_id is not a persistent id
The `place_id` is created when a Nominatim database gets installed. A
single place will have a different value on another server or even when
the same data gets re-imported. It's thus not useful to treat it as
permanent for later use.
The `place_id` is an internal identifier that is assigned data is imported
into a Nominatim database. The same OSM object will have a different value
on another server. It may even change its ID on the same server when it is
removed and reimported while updating the database with fresh OSM data.
It is thus not useful to treat it as permanent for later use.
The combination `osm_type`+`osm_id` is slighly better but remember in
OpenStreetMap mappers can delete, split, recreate places (and those
@@ -237,10 +237,59 @@ Places can also change their meaning without changing their `osm_id`,
e.g. when a restaurant is retagged as supermarket. For a more in-depth
discussion see [Permanent ID](https://wiki.openstreetmap.org/wiki/Permanent_ID).
Nominatim merges some places (e.g. center node of a city with the boundary
relation) so `osm_type`+`osm_id`+`class_name` would be more unique.
If you need an ID that is consistent over multiple installations of Nominatim,
then you should use the combination of `osm_type`+`osm_id`+`class`.
### OSM reference
Nominatim may sometimes return special objects that do not correspond directly
to an object in OpenStreetMap. These are:
* **Postcodes**. Nominatim returns an postcode point created from all mapped
postcodes of the same name. The class and type of these object is `place=postcdode`.
No `osm_type` and `osm_id` are included in the result.
* **Housenumber interpolations**. Nominatim returns a single interpolated
housenumber from the interpolation way. The class and type are `place=house`
and `osm_type` and `osm_id` correspond to the interpolation way in OSM.
* **TIGER housenumber.** Nominatim returns a single interpolated housenumber
from the TIGER data. The class and type are `place=house`
and `osm_type` and `osm_id` correspond to the street mentioned in the result.
Please note that the `osm_type` and `osm_id` returned may be changed in the
future. You should not expect to only find `node`, `way` and `relation` for
the type.
### boundingbox
Comma separated list of min latitude, max latitude, min longitude, max longitude.
The whole planet would be `-90,90,-180,180`.
Can be used to pan and center the map on the result, for example with leafletjs
mapping library
`map.fitBounds([[bbox[0],bbox[2]],[bbox[1],bbox[3]]], {padding: [20, 20], maxzoom: 16});`
Bounds crossing the antimeridian have a min latitude -180 and max latitude 180,
essentially covering the entire planet
(see [issue 184](https://github.com/openstreetmap/Nominatim/issues/184)).
### addressdetails
Address details in the xml and json formats return a list of names together
with a designation label. Per default the following labels may appear:
* continent
* country, country_code
* region, state, state_district, county
* municipality, city, town, village
* city_district, district, borough, suburb, subdivision
* hamlet, croft, isolated_dwelling
* neighbourhood, allotments, quarter
* city_block, residental, farm, farmyard, industrial, commercial, retail
* road
* house_number, house_name
* emergency, historic, military, natural, landuse, place, railway,
man_made, aerialway, boundary, amenity, aeroway, club, craft, leisure,
office, mountain_pass, shop, tourism, bridge, tunnel, waterway
They roughly correspond to the classification of the OpenStreetMap data
according to either the `place` tag or the main key of the object.

View File

@@ -7,7 +7,7 @@ Its API has the following endpoints for querying the data:
* __[/search](Search.md)__ - search OSM objects by name or type
* __[/reverse](Reverse.md)__ - search OSM object by their location
* __[/lookup](Lookup.md)__ - look up address details for OSM objects by their ID
* __/status__ - query the status of the server
* __[/status](Status.md)__ - query the status of the server
* __/deletable__ - list objects that have been deleted in OSM but are held
back in Nominatim in case the deletion was accidental
* __/polygons__ - list of broken polygons detected by Nominatim

View File

@@ -1,36 +1,48 @@
# Reverse Geocoding
Reverse geocoding generates an address from a latitude and longitude or from
an OSM object.
Reverse geocoding generates an address from a latitude and longitude.
## How it works
The reverse geocoding API does not exactly compute the address for the
coordinate it receives. It works by finding the closest suitable OSM object
and returning its address information. This may occasionally lead to
unexpected results.
First of all, Nominatim only includes OSM objects in
its index that are suitable for searching. Small, unnamed paths for example
are missing from the database and can therefore not be used for reverse
geocoding either.
The other issue to be aware of is that the closest OSM object may not always
have a similar enough address to the coordinate you were requesting. For
example, in dense city areas it may belong to a completely different street.
## Parameters
The main format of the reverse API is
```
https://nominatim.openstreetmap.org/reverse?<query>
https://nominatim.openstreetmap.org/reverse?lat=<value>&lon=<value>&<params>
```
There are two ways how the requested location can be specified:
where `lat` and `lon` are latitude and longitutde of a coordinate in WGS84
projection. The API returns exactly one result or an error when the coordinate
is in an area with no OSM data coverage.
* `lat=<value>` `lon=<value>`
Additional paramters are accepted as listed below.
A geographic location to generate an address for. The coordiantes must be
in WGS84 format.
* `osm_type=[N|W|R]` `osm_id=<value>`
A specific OSM node(N), way(W) or relation(R) to return an address for.
In both cases exactly one object is returned. The two input parameters cannot
be used at the same time. Both accept the additional optional parameters listed
below.
!!! warning "Deprecation warning"
The reverse API used to allow address lookup for a single OSM object by
its OSM id. This use is now deprecated. Use the [Address Lookup API](../Lookup)
instead.
### Output format
* `format=[xml|json|jsonv2|geojson|geocodejson]`
See [Place Output Formats](Output.md) for details on each format. (Default: html)
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
* `json_callback=<string>`
@@ -69,8 +81,9 @@ comma-separated list of language codes.
* `zoom=[0-18]`
Level of detail required for the address. Default: 18. This is a number that corresponds
roughly to the zoom level used in map frameworks like Leaflet.js, Openlayers etc.
Level of detail required for the address. Default: 18. This is a number that
corresponds roughly to the zoom level used in XYZ tile sources in frameworks
like Leaflet.js, Openlayers etc.
In terms of address details the zoom levels are as follows:
zoom | address detail
@@ -97,7 +110,7 @@ options can be used at a time. (Default: 0)
* `polygon_threshold=0.0`
Simplify the output geometry before returning. The parameter is the
Return a simplified version of the output geometry. The parameter is the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
@@ -149,7 +162,7 @@ This overrides the specified machine readable format. (Default: 0)
"licence":"Data © OpenStreetMap contributors, ODbL 1.0. https:\/\/www.openstreetmap.org\/copyright",
"osm_type":"way",
"osm_id":"280940520",
"lat":"-34.4391708",
"lat":"-34.4391708",
"lon":"-58.7064573",
"place_rank":"26",
"category":"highway",

View File

@@ -1,30 +1,27 @@
# Search queries
The search API allows you to look up a location from a textual description.
Nominatim supports structured as well as free-form search queries.
The search API allows you to look up a location from a textual description
or address. Nominatim supports structured and free-form search queries.
The search query may also contain
[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
Note that this only limits the items to be found, it's not suited to return complete
lists of OSM objects of a specific type. For those use [Overpass API](https://overpass-api.de/).
This can be used to narrow down the kind of objects to be returned.
!!! warning
Special phrases are not suitable to query all objects of a certain type in an
area. Nominatim will always just return a collection of the best matches. To
download OSM data by object type, use the [Overpass API](https://overpass-api.de/).
## Parameters
The search API has the following two formats:
```
https://nominatim.openstreetmap.org/search/<query>?<params>
```
This format only accepts a free-form query string where the
parts of the query are separated by slashes.
The search API has the following format:
```
https://nominatim.openstreetmap.org/search?<params>
```
In this form, the query may be given through two different sets of parameters:
The search term may be specified with two different sets of parameters:
* `q=<query>`
@@ -46,13 +43,13 @@ In this form, the query may be given through two different sets of parameters:
Structured requests are faster but are less robust against alternative
OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
All three query forms accept the additional parameters listed below.
Both query forms accept the additional parameters listed below.
### Output format
* `format=[html|xml|json|jsonv2|geojson|geocodejson]`
* `format=[xml|json|jsonv2|geojson|geocodejson]`
See [Place Output Formats](Output.md) for details on each format. (Default: html)
See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
* `json_callback=<string>`
@@ -92,16 +89,20 @@ comma-separated list of language codes.
* `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`
Limit search results to one or more countries. `<countrycode>` must be the
ISO 3166-1alpha2 code, e.g. `gb` for the United Kingdom, `de` for Germany.
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
e.g. `gb` for the United Kingdom, `de` for Germany.
Each place in Nominatim is assigned to one country code based
on OSM country boundaries. In rare cases a place may not be in any country
at all, for example, in international waters.
* `exclude_place_ids=<place_id,[place_id],[place_id]`
If you do not want certain OSM objects to appear in the search
result, give a comma separated list of the `place_id`s you want to skip.
This can be used to broaden search results. For example, if a previous
query only returned a few results, then including those here would cause
the search to return other, less accurate, matches (if possible).
This can be used to retrieve additional search results. For example, if a
previous query only returned a few results, then including those here would
cause the search to return other, less accurate, matches (if possible).
* `limit=<integer>`
@@ -112,16 +113,17 @@ Limit the number of returned results. (Default: 10, Maximum: 50)
* `viewbox=<x1>,<y1>,<x2>,<y2>`
The preferred area to find search results. Any two corner points of the box
are accepted in any order as long as they span a real box. `x` is longitude,
are accepted as long as they span a real box. `x` is longitude,
`y` is latitude.
* `bounded=[0|1]`
When a viewbox is given, restrict the result to items contained with that
When a viewbox is given, restrict the result to items contained within that
viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
only search is allowed. In this case, give the special keyword for the
amenity in square brackets, e.g. `[pub]`. (Default: 0)
only search is allowed. Give the special keyword for the amenity in square
brackets, e.g. `[pub]` and a selection of objects of this type is returned.
There is no guarantee that the result is complete. (Default: 0)
### Polygon output
@@ -136,7 +138,7 @@ options can be used at a time. (Default: 0)
* `polygon_threshold=0.0`
Simplify the output geometry before returning. The parameter is the
Return a simplified version of the output geometry. The parameter is the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
@@ -150,13 +152,11 @@ address to identify your requests. See Nominatim's [Usage Policy](https://operat
* `dedupe=[0|1]`
Sometimes you have several objects in OSM identifying the same place or
object in reality. The simplest case is a street being split in many
object in reality. The simplest case is a street being split into many
different OSM ways due to different characteristics. Nominatim will
attempt to detect such duplicates and only return one match unless
this parameter is set to 0. (Default: 1)
* `debug=[0|1]`
Output assorted developer debug information. Data on internals of Nominatim's
@@ -168,21 +168,27 @@ This overrides the specified machine readable format. (Default: 0)
## Examples
##### XML with polygon points
##### XML with kml polygon
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon=1&addressdetails=1)
* [https://nominatim.openstreetmap.org/search/gb/birmingham/pilkington%20avenue/135?format=xml&polygon=1&addressdetails=1](https://nominatim.openstreetmap.org/search/gb/birmingham/pilkington%20avenue/135?format=xml&polygon=1&addressdetails=1)
* [https://nominatim.openstreetmap.org/search/135%20pilkington%20avenue,%20birmingham?format=xml&polygon=1&addressdetails=1](https://nominatim.openstreetmap.org/search/135%20pilkington%20avenue,%20birmingham?format=xml&polygon=1&addressdetails=1)
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)
```xml
<searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
<place
place_id="1620612" osm_type="node" osm_id="452010817"
boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
polygonpoints="[['-1.81592098644987','52.5487429714954'],['-1.81592290792183','52.5487234624632'],...]"
lat="52.5487429714954" lon="-1.81602098644987"
display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
class="place" type="house">
<geokml>
<Polygon>
<outerBoundaryIs>
<LinearRing>
<coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
</LinearRing>
</outerBoundaryIs>
</Polygon>
</geokml>
<house_number>135</house_number>
<road>Pilkington Avenue</road>
<village>Wylde Green</village>

66
docs/api/Status.md Normal file
View File

@@ -0,0 +1,66 @@
# Status
Useful for checking if the service and database is running. The JSON output also shows
when the database was last updated.
## Parameters
* `format=[text|json]` (defaults to 'text')
## Output
#### Text format
```
https://nominatim.openstreetmap.org/status.php
```
will return HTTP status code 200 and print `OK`.
On error it will return HTTP status code 500 and print a message, e.g.
`ERROR: Database connection failed`.
#### JSON format
```
https://nominatim.openstreetmap.org/status.php?format=json
```
will return HTTP code 200 and a structure
```json
{
"status": 0,
"message": "OK",
"data_updated": "2020-05-04T14:47:00+00:00",
"software_version": "3.6.0-0",
"database_version": "3.6.0-0"
}
```
The `software_version` field contains the version of Nominatim used to serve
the API. The `database_version` field contains the version of the data format
in the database.
On error will also return HTTP status code 200 and a structure with error
code and message, e.g.
```json
{
"status": 700,
"message": "Database connection failed"
}
```
Possible status codes are
| | message | notes |
|-----|----------------------|---------------------------------------------------|
| 700 | "No database" | connection failed |
| 701 | "Module failed" | database could not load nominatim.so |
| 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
| 703 | "Query failed" | test query against a database table failed |
| 704 | "No value" | test query worked but returned no results |

View File

@@ -1,4 +0,0 @@
# Additional Data Sources
This guide explains how data sources other than OpenStreetMap mentioned in
the install instructions got obtained and converted.

View File

@@ -0,0 +1,128 @@
# Setting up Nominatim for Development
This chapter gives an overview how to set up Nominatim for developement
and how to run tests.
!!! Important
This guide assumes that you develop under the latest version of Ubuntu. You
can of course also use your favourite distribution. You just might have to
adapt the commands below slightly, in particular the commands for installing
additional software.
## Installing Nominatim
The first step is to install Nominatim itself. Please follow the installation
instructions in the [Admin section](../admin/Installation.md). You don't need
to set up a webserver for development, the webserver that is included with PHP
is sufficient.
If you want to run Nominatim in a VM via Vagrant, use the default `ubuntu` setup.
Vagrant's libvirt provider runs out-of-the-box under Ubuntu. You also need to
install an NFS daemon to enable directory sharing between host and guest. The
following packages should get you started:
sudo apt install vagrant vagrant-libvirt libvirt-daemon nfs-kernel-server
## Prerequisites for testing and documentation
The Nominatim test suite consists of behavioural tests (using behave) and
unit tests (using PHPUnit for PHP code and pytest for Python code).
It has the following additional requirements:
* [behave test framework](https://behave.readthedocs.io) >= 1.2.5
* [phpunit](https://phpunit.de) >= 7.3
* [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
* [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
* [pytest](https://pytest.org)
The documentation is built with mkdocs:
* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
### Installing prerequisites on Ubuntu/Debian
Some of the Python packages require the newest version which is not yet
available with the current distributions. Therefore it is recommended to
install pip to get the newest versions.
To install all necessary packages run:
```sh
sudo apt install php-cgi phpunit php-codesniffer \
python3-pip python3-setuptools python3-dev pylint
pip3 install --user behave mkdocs pytest
```
The `mkdocs` executable will be located in `.local/bin`. You may have to add
this directory to your path, for example by running:
```
echo 'export PATH=~/.local/bin:$PATH' > ~/.profile
```
If your distribution does not have PHPUnit 7.3+, you can install it (as well
as CodeSniffer) via composer:
```
sudo apt-get install composer
composer global require "squizlabs/php_codesniffer=*"
composer global require "phpunit/phpunit=8.*"
```
The binaries are found in `.config/composer/vendor/bin`. You need to add this
to your PATH as well:
```
echo 'export PATH=~/.config/composer/vendor/bin:$PATH' > ~/.profile
```
## Executing Tests
All tests are located in the `/test` directory.
To run all tests just go to the build directory and run make:
```sh
cd build
make test
```
For more information about the structure of the tests and how to change and
extend the test suite, see the [Testing chapter](Testing.md).
## Documentation Pages
The [Nominatim documentation](https://nominatim.org/release-docs/develop/) is
built using the [MkDocs](https://www.mkdocs.org/) static site generation
framework. The master branch is automatically deployed every night on
[https://nominatim.org/release-docs/develop/](https://nominatim.org/release-docs/develop/)
To build the documentation, go to the build directory and run
```
make doc
INFO - Cleaning site directory
INFO - Building documentation to directory: /home/vagrant/build/site-html
```
This runs `mkdocs build` plus extra transformation of some files and adds
symlinks (see `CMakeLists.txt` for the exact steps).
Now you can start webserver for local testing
```
build> mkdocs serve
[server:296] Serving on http://127.0.0.1:8000
[handlers:62] Start watching changes
```
If you develop inside a Vagrant virtual machine, use a port that is forwarded
to your host:
```
build> mkdocs serve --dev-addr 0.0.0.0:8088
[server:296] Serving on http://0.0.0.0:8088
[handlers:62] Start watching changes
```

View File

@@ -1,36 +0,0 @@
# Documentation Pages
The [Nominatim documentation](https://nominatim.org/release-docs/develop/) is built using the [MkDocs](https://www.mkdocs.org/) static site generation framework. The master branch is automatically deployed every night on under [https://nominatim.org/release-docs/develop/]()
To preview local changes:
1. Install MkDocs
```
pip3 install --user mkdocs
```
2. In build directory run
```
make doc
INFO - Cleaning site directory
INFO - Building documentation to directory: /home/vagrant/build/site-html
```
This runs `mkdocs build` plus extra transformion of some files and adds symlinks (see `CMakeLists.txt` for the exact steps).
3. Start webserver for local testing
```
mkdocs serve
[server:296] Serving on http://127.0.0.1:8000
[handlers:62] Start watching changes
```
If you develop inside a Vagrant virtual machine:
* add port forwarding to your Vagrantfile, e.g. `config.vm.network "forwarded_port", guest: 8000, host: 8000`
* use `mkdocs serve --dev-addr 0.0.0.0:8000` because the default localhost
IP does not get forwarded.

View File

@@ -1,8 +1,8 @@
# OSM Data Import
OSM data is initially imported using osm2pgsql. Nominatim uses its own data
output style 'gazetteer', which differs from the output style created for
map rendering.
OSM data is initially imported using [osm2pgsql](https://osm2pgsql.org).
Nominatim uses its own data output style 'gazetteer', which differs from the
output style created for map rendering.
## Database Layout
@@ -29,7 +29,7 @@ once with `class` of `highway` and once with a `class` of `bridge`. Thus the
## Configuring the Import
How tags are interpreted and assigned to the different `place` columns can be
configured via the import style configuration file (`CONST_Import_style`). This
configured via the import style configuration file (`NOMINATIM_IMPORT_STYLE`). This
is a JSON file which contains a list of rules which are matched against every
tag of every object and then assign the tag its specific role.

45
docs/develop/Postcodes.md Normal file
View File

@@ -0,0 +1,45 @@
# Postcodes in Nominatim
The blog post
[Nominatim and Postcodes](https://www.openstreetmap.org/user/lonvia/diary/43143)
describes the handling implemented since Nominatim 3.1.
Postcode centroids (aka 'calculated postcodes') are generated by looking at all
postcodes of a country, grouping them and calculating the geometric centroid.
There is currently no logic to deal with extreme outliers (typos or other
mistakes in OSM data). There is also no check if a postcodes adheres to a
country's format, e.g. if Swiss postcodes are 4 digits.
## Regular updating calculated postcodes
The script to rerun the calculation is
`nominatim refresh --postcodes`
and runs once per night on nominatim.openstreetmap.org.
## Finding places that share a specific postcode
In the Nominatim database run
```sql
SELECT address->'postcode' as pc,
osm_type, osm_id, class, type,
st_x(centroid) as lon, st_y(centroid) as lat
FROM placex
WHERE country_code='fr'
AND upper(trim (both ' ' from address->'postcode')) = '33210';
```
Alternatively on [Overpass](https://overpass-turbo.eu/) run the following query
```
[out:json][timeout:250];
area["name"="France"]->.boundaryarea;
(
nwr(area.boundaryarea)["addr:postcode"="33210"];
);
out body;
>;
out skel qt;
```

View File

@@ -7,24 +7,74 @@ different purposes, which are explained in this chapter.
## Search rank
The search rank describes the extent and importance of a place. It is used
when ranking search result. Simply put, if there are two results for a
when ranking search results. Simply put, if there are two results for a
search query which are otherwise equal, then the result with the _lower_
search rank will be appear higher in the result list.
Search ranks are not so important these days because many well-known
places use the Wikipedia importance ranking instead.
The following table gives an overview of the kind of features that Nominatim
expects for each rank:
rank | typical place types | extent
-------|---------------------------------|-------
1-3 | oceans, continents | -
4 | countries | -
5-9 | states, regions, provinces | -
10-12 | counties | -
13-16 | cities, municipalities, islands | 15 km
17-18 | towns, boroughs | 4 km
19 | villages, suburbs | 2 km
20 | hamlets, farms, neighbourhoods | 1 km
21-25 | isolated dwellings, city blocks | 500 m
The extent column describes how far a feature is assumed to reach when it
is mapped only as a point. Larger features like countries and states are usually
available with their exact area in the OpenStreetMap data. That is why no extent
is given.
## Address rank
The address rank describes where a place shows up in an address hierarchy.
Usually only administrative boundaries and place nodes and areas are
eligible to be part of an address. All other objects have an address rank
of 0.
eligible to be part of an address. Places that should not appear in the
address must have an address rank of 0.
Note that the search rank of a place plays a role in the address computation
as well. When collecting the places that should make up the address parts
then only places are taken into account that have a lower address rank than
the search rank of the base object.
The following table gives an overview how ranks are mapped to address parts:
rank | address part
-------------|-------------
1-3 | _unused_
4 | country
5-9 | state
10-12 | county
13-16 | city
17-21 | suburb
22-24 | neighbourhood
25 | squares, farms, localities
26-27 | street
28-30 | POI/house number
The country rank 4 usually doesn't show up in the address parts of an object.
The country is determined indirectly from the country code.
Ranks 5-24 can be assigned more or less freely. They make up the major part
of the address.
Rank 25 is also an addressing rank but it is special because while it can be
the parent to a POI with an addr:place of the same name, it cannot be a parent
to streets. Use it for place features that are technically on the same level
as a street (e.g. squares, city blocks) or for places that should not normally
appear in an address unless explicitly tagged so (e.g place=locality which
should be uninhabited and as such not addressable).
The street ranks 26 and 27 are handled slightly differently. Only one object
from these ranks shows up in an address.
For POI level objects like shops, buildings or house numbers always use rank 30.
Ranks 28 is reserved for house number interpolations. 29 is for internal use
only.
## Rank configuration
@@ -37,9 +87,9 @@ into the database. There are a few hard-coded rules for the assignment:
* highway nodes
* landuse that is not an area
Other than that, the ranks can be freely assigned via the JSON file
defined with `CONST_Address_Level_Config` according to their type and
the country they are in.
Other than that, the ranks can be freely assigned via the JSON file according
to their type and the country they are in. The name of the config file to be
used can be changed with the setting `NOMINATIM_ADDRESS_LEVEL_CONFIG`.
The address level configuration must consist of an array of configuration
entries, each containing a tag definition and an optional country array:
@@ -84,7 +134,7 @@ Then the rank is used when no more specific value is found for the given
key.
Countries and key/value combination may appear in multiple definitions. Just
make sure that each combination of counrty/key/value appears only once per
make sure that each combination of country/key/value appears only once per
file. Otherwise the import will fail with a UNIQUE INDEX constraint violation
on import.

View File

@@ -1,22 +1,10 @@
This directory contains functional and unit tests for the Nominatim API.
# Nominatim Test Suite
Prerequisites
=============
This chapter describes the tests in the `/test` directory, how they are
structured and how to extend them. For a quick introduction on how to run
the tests, see the [Development setup chapter](Development-Environment.md).
* Python 3 (https://www.python.org/)
* behave test framework >= 1.2.5 (https://github.com/behave/behave)
* nose (https://nose.readthedocs.org)
* pytidylib (http://countergram.com/open-source/pytidylib)
* psycopg2 (http://initd.org/psycopg/)
To get the prerequisites on a a fresh Ubuntu LTS 16.04 run:
[sudo] apt-get install python3-dev python3-pip python3-psycopg2 python3-tidylib phpunit php-cgi
pip3 install --user behave nose
Overall structure
=================
## Overall structure
There are two kind of tests in this test suite. There are functional tests
which test the API interface using a BDD test framework and there are unit
@@ -33,33 +21,55 @@ This test directory is sturctured as follows:
| +- api Tests for API endpoints (search, reverse, etc.)
|
+- php PHP unit tests
+- python Python unit tests
+- scenes Geometry test data
+- testdb Base data for generating API test database
```
PHP Unit Tests
==============
## PHP Unit Tests (`test/php`)
Unit tests can be found in the php/ directory and tests selected php functions.
Very low coverage.
Unit tests for PHP code can be found in the `php/` directory. They test selected
PHP functions. Very low coverage.
To execute the test suite run
cd test/php
phpunit ../
UNIT_TEST_DSN='pgsql:dbname=nominatim_unit_tests' phpunit ../
It will read phpunit.xml which points to the library, test path, bootstrap
strip and set other parameters.
strip and sets other parameters.
It will use (and destroy) a local database 'nominatim_unit_tests'. You can set
a different connection string with e.g. UNIT_TEST_DSN='pgsql:dbname=foo_unit_tests'.
BDD Functional Tests
====================
## Python Unit Tests (`test/python`)
Unit tests for Python code can be found in the `python/` directory. The goal is
to have complete coverage of the Python library in `nominatim`.
To execute the tests run
py.test-3 test/python
or
pytest test/python
The name of the pytest binary depends on your installation.
## BDD Functional Tests (`test/bdd`)
Functional tests are written as BDD instructions. For more information on
the philosophy of BDD testing, see http://pythonhosted.org/behave/philosophy.html
the philosophy of BDD testing, see the
[Behave manual](http://pythonhosted.org/behave/philosophy.html).
Usage
-----
The following explanation assume that the reader is familiar with the BDD
notations of features, scenarios and steps.
All possible steps can be found in the `steps` directory and should ideally
be documented.
### General Usage
To run the functional tests, do
@@ -73,58 +83,42 @@ The tests can be configured with a set of environment variables (`behave -D key=
the test databases (db tests)
* `TEST_DB` - name of test database (db tests)
* `API_TEST_DB` - name of the database containing the API test data (api tests)
* `API_TEST_FILE` - OSM file to be imported into the API test database (api tests)
* `DB_HOST` - (optional) hostname of database host
* `DB_PORT` - (optional) port of database on host
* `DB_USER` - (optional) username of database login
* `DB_PASS` - (optional) password for database login
* `SERVER_MODULE_PATH` - (optional) path on the Postgres server to Nominatim
module shared library file
* `TEST_SETTINGS_TEMPLATE` - file to write temporary Nominatim settings to
* `REMOVE_TEMPLATE` - if true, the template database will not be reused during
the next run. Reusing the base templates speeds up tests
considerably but might lead to outdated errors for some
changes in the database layout.
* `REMOVE_TEMPLATE` - if true, the template and API database will not be reused
during the next run. Reusing the base templates speeds
up tests considerably but might lead to outdated errors
for some changes in the database layout.
* `KEEP_TEST_DB` - if true, the test database will not be dropped after a test
is finished. Should only be used if one single scenario is
run, otherwise the result is undefined.
Logging can be defined through command line parameters of behave itself. Check
out `behave --help` for details. Also keep an eye out for the 'work-in-progress'
out `behave --help` for details. Also have a look at the 'work-in-progress'
feature of behave which comes in handy when writing new tests.
Writing Tests
-------------
The following explanation assume that the reader is familiar with the BDD
notations of features, scenarios and steps.
All possible steps can be found in the `steps` directory and should ideally
be documented.
### API Tests (`test/bdd/api`)
These tests are meant to test the different API endpoints and their parameters.
They require a preimported test database, which consists of the import of a
planet extract. A precompiled PBF with the necessary data can be downloaded from
https://www.nominatim.org/data/test/nominatim-api-testdata.pbf
They require to import several datasets into a test database. This is normally
done automatically during setup of the test. The API test database is then
kept around and reused in subsequent runs of behave. Use `behave -DREMOVE_TEMPLATE`
to force a reimport of the database.
The polygons defining the extract can be found in the test/testdb
directory. There is also a reduced set of wikipedia data for this extract,
which you need to import as well. For Tiger tests the data of South Dakota
is required. Get the Tiger files `46*`.
The official test dataset is saved in the file `test/testdb/apidb-test-data.pbf`
and compromises the following data:
The official test dataset is derived from the 160725 planet. Newer
planets are likely to work as well but you may see isolated test
failures where the data has changed. To recreate the input data
for the test database run:
* Geofabrik extract of Liechtenstein
* extract of Autauga country, Alabama, US (for tests against Tiger data)
* additional data from `test/testdb/additional_api_test.data.osm`
wget https://ftp5.gwdg.de/pub/misc/openstreetmap/planet.openstreetmap.org/pbf/planet-180924.osm.pbf
osmconvert planet-180924.osm.pbf -B=test/testdb/testdb.polys -o=testdb.pbf
Before importing make sure to add the following to your local settings:
@define('CONST_Database_DSN', 'pgsql://@/test_api_nominatim');
@define('CONST_Wikipedia_Data_Path', CONST_BasePath.'/test/testdb');
API tests should only be testing the functionality of the website PHP code.
Most tests should be formulated as BDD DB creation tests (see below) instead.
#### Code Coverage
@@ -134,7 +128,7 @@ On Debian/Ubuntu run:
apt-get install php-codecoverage php-xdebug
The run the API tests as follows:
Then run the API tests as follows:
behave api -DPHPCOV=<coverage output dir>
@@ -143,11 +137,11 @@ the [phpcov](https://github.com/sebastianbergmann/phpcov) tool:
phpcov merge --html=<report output dir> <coverage output dir>
### Indexing Tests (`test/bdd/db`)
### DB Creation Tests (`test/bdd/db`)
These tests check the import and update of the Nominatim database. They do not
test the correctness of osm2pgsql. Each test will write some data into the `place`
table (and optionally `the planet_osm_*` tables if required) and then run
table (and optionally the `planet_osm_*` tables if required) and then run
Nominatim's processing functions on that.
These tests need to create their own test databases. By default they will be
@@ -158,4 +152,8 @@ needs superuser rights for postgres.
### Import Tests (`test/bdd/osm2pgsql`)
These tests check that data is imported correctly into the place table. They
use the same template database as the Indexing tests, so the same remarks apply.
use the same template database as the DB Creation tests, so the same remarks apply.
Note that most testing of the gazetteer output of osm2pgsql is done in the tests
of osm2pgsql itself. The BDD tests are just there to ensure compatibility of
the osm2pgsql and Nominatim code.

View File

@@ -0,0 +1,34 @@
# Additional Data Sources
This guide explains how data sources other than OpenStreetMap mentioned in
the install instructions got obtained and converted.
## Country grid
Nominatim uses pre-generated country borders data. In case one imports only
a subset of a country. And to assign each place a partition. Nominatim
database tables are split into partitions for performance.
More details in [osm-search/country-grid-data](https://github.com/osm-search/country-grid-data).
## US Census TIGER
For the United States you can choose to import additonal street-level data.
The data isn't mixed into OSM data but queried as fallback when no OSM
result can be found.
More details in [osm-search/TIGER-data](https://github.com/osm-search/TIGER-data).
## GB postcodes
For Great Britain you can choose to import Royalmail postcode centroids.
More details in [osm-search/gb-postcode-data](https://github.com/osm-search/gb-postcode-data).
## Wikipedia & Wikidata rankings
Nominatim can import "importance" data of place names. This greatly
improves ranking of results.
More details in [osm-search/wikipedia-wikidata](https://github.com/osm-search/wikipedia-wikidata).

View File

@@ -9,14 +9,14 @@ the address computation and the search frontend.
The __data import__ stage reads the raw OSM data and extracts all information
that is useful for geocoding. This part is done by osm2pgsql, the same tool
that can also be used to import a rendering database. It uses the special
gazetteer output plugin in `osm2pgsql/output-gazetter.[ch]pp`. The result of
gazetteer output plugin in `osm2pgsql/src/output-gazetter.[ch]pp`. The result of
the import can be found in the database table `place`.
The __address computation__ or __indexing__ stage takes the data from `place`
and adds additional information needed for geocoding. It ranks the places by
importance, links objects that belong together and computes addresses and
the search index. Most of this work is done in PL/pgSQL via database triggers
and can be found in the file `sql/functions.sql`.
and can be found in the files in the `sql/functions/` directory.
The __search frontend__ implements the actual API. It takes search
and reverse geocoding queries from the user, looks up the data and

View File

@@ -1,3 +1,15 @@
.toctree-l3 {
display: none!important
}
table {
margin-bottom: 12pt
}
th, td {
padding: 1pt 12pt;
}
th {
background-color: #eee;
}

View File

@@ -1,7 +1,7 @@
site_name: Nominatim Documentation
theme: readthedocs
docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
site_url: http://nominatim.org
site_url: https://nominatim.org
repo_url: https://github.com/openstreetmap/Nominatim
pages:
- 'Introduction' : 'index.md'
@@ -11,31 +11,34 @@ pages:
- 'Reverse': 'api/Reverse.md'
- 'Address Lookup': 'api/Lookup.md'
- 'Details' : 'api/Details.md'
- 'Status' : 'api/Status.md'
- 'Place Output Formats': 'api/Output.md'
- 'FAQ': 'api/Faq.md'
- 'Administration Guide':
- 'Basic Installation': 'admin/Installation.md'
- 'Importing and Updating' : 'admin/Import-and-Update.md'
- 'Import' : 'admin/Import.md'
- 'Update' : 'admin/Update.md'
- 'Deploy' : 'admin/Deployment.md'
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
- 'Migration from older Versions' : 'admin/Migration.md'
- 'Troubleshooting' : 'admin/Faq.md'
- 'Developers Guide':
- 'Overview' : 'develop/overview.md'
- 'Setup for Development' : 'develop/Development-Environment.md'
- 'Architecture Overview' : 'develop/overview.md'
- 'OSM Data Import' : 'develop/Import.md'
- 'Place Ranking' : 'develop/Ranking.md'
- 'Documentation' : 'develop/Documentation.md'
- 'External Data Sources':
- 'Overview' : 'data-sources/overview.md'
- 'US Census (Tiger)': 'data-sources/US-Tiger.md'
- 'GB Postcodes': 'data-sources/GB-Postcodes.md'
- 'Country Grid': 'data-sources/Country-Grid.md'
- 'Wikipedia & Wikidata': 'data-sources/Wikipedia-Wikidata.md'
- 'Postcodes' : 'develop/Postcodes.md'
- 'Testing' : 'develop/Testing.md'
- 'External Data Sources': 'develop/data-sources.md'
- 'Appendix':
- 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
- 'Installation on Ubuntu 16' : 'appendix/Install-on-Ubuntu-16.md'
- 'Installation on CentOS 8' : 'appendix/Install-on-Centos-8.md'
- 'Installation on Ubuntu 18' : 'appendix/Install-on-Ubuntu-18.md'
- 'Installation on Ubuntu 20' : 'appendix/Install-on-Ubuntu-20.md'
markdown_extensions:
- codehilite:
use_pygments: False
- codehilite
- admonition
- toc:
permalink:
extra_css: [extra.css]
extra_css: [extra.css, styles.css]

69
docs/styles.css Normal file
View File

@@ -0,0 +1,69 @@
.codehilite .hll { background-color: #ffffcc }
.codehilite { background: #f0f0f0; }
.codehilite .c { color: #60a0b0; font-style: italic } /* Comment */
.codehilite .err { /* border: 1px solid #FF0000 */ } /* Error */
.codehilite .k { color: #007020; font-weight: bold } /* Keyword */
.codehilite .o { color: #666666 } /* Operator */
.codehilite .ch { color: #60a0b0; font-style: italic } /* Comment.Hashbang */
.codehilite .cm { color: #60a0b0; font-style: italic } /* Comment.Multiline */
.codehilite .cp { color: #007020 } /* Comment.Preproc */
.codehilite .cpf { color: #60a0b0; font-style: italic } /* Comment.PreprocFile */
.codehilite .c1 { color: #60a0b0; font-style: italic } /* Comment.Single */
.codehilite .cs { color: #60a0b0; background-color: #fff0f0 } /* Comment.Special */
.codehilite .gd { color: #A00000 } /* Generic.Deleted */
.codehilite .ge { font-style: italic } /* Generic.Emph */
.codehilite .gr { color: #FF0000 } /* Generic.Error */
.codehilite .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.codehilite .gi { color: #00A000 } /* Generic.Inserted */
.codehilite .go { color: #888888 } /* Generic.Output */
.codehilite .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
.codehilite .gs { font-weight: bold } /* Generic.Strong */
.codehilite .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
.codehilite .gt { color: #0044DD } /* Generic.Traceback */
.codehilite .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
.codehilite .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
.codehilite .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
.codehilite .kp { color: #007020 } /* Keyword.Pseudo */
.codehilite .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
.codehilite .kt { color: #902000 } /* Keyword.Type */
.codehilite .m { color: #40a070 } /* Literal.Number */
.codehilite .s { color: #4070a0 } /* Literal.String */
.codehilite .na { color: #4070a0 } /* Name.Attribute */
.codehilite .nb { color: #007020 } /* Name.Builtin */
.codehilite .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
.codehilite .no { color: #60add5 } /* Name.Constant */
.codehilite .nd { color: #555555; font-weight: bold } /* Name.Decorator */
.codehilite .ni { color: #d55537; font-weight: bold } /* Name.Entity */
.codehilite .ne { color: #007020 } /* Name.Exception */
.codehilite .nf { color: #06287e } /* Name.Function */
.codehilite .nl { color: #002070; font-weight: bold } /* Name.Label */
.codehilite .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
.codehilite .nt { color: #062873; font-weight: bold } /* Name.Tag */
.codehilite .nv { color: #bb60d5 } /* Name.Variable */
.codehilite .ow { color: #007020; font-weight: bold } /* Operator.Word */
.codehilite .w { color: #bbbbbb } /* Text.Whitespace */
.codehilite .mb { color: #40a070 } /* Literal.Number.Bin */
.codehilite .mf { color: #40a070 } /* Literal.Number.Float */
.codehilite .mh { color: #40a070 } /* Literal.Number.Hex */
.codehilite .mi { color: #40a070 } /* Literal.Number.Integer */
.codehilite .mo { color: #40a070 } /* Literal.Number.Oct */
.codehilite .sa { color: #4070a0 } /* Literal.String.Affix */
.codehilite .sb { color: #4070a0 } /* Literal.String.Backtick */
.codehilite .sc { color: #4070a0 } /* Literal.String.Char */
.codehilite .dl { color: #4070a0 } /* Literal.String.Delimiter */
.codehilite .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
.codehilite .s2 { color: #4070a0 } /* Literal.String.Double */
.codehilite .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
.codehilite .sh { color: #4070a0 } /* Literal.String.Heredoc */
.codehilite .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
.codehilite .sx { color: #c65d09 } /* Literal.String.Other */
.codehilite .sr { color: #235388 } /* Literal.String.Regex */
.codehilite .s1 { color: #4070a0 } /* Literal.String.Single */
.codehilite .ss { color: #517918 } /* Literal.String.Symbol */
.codehilite .bp { color: #007020 } /* Name.Builtin.Pseudo */
.codehilite .fm { color: #06287e } /* Name.Function.Magic */
.codehilite .vc { color: #bb60d5 } /* Name.Variable.Class */
.codehilite .vg { color: #bb60d5 } /* Name.Variable.Global */
.codehilite .vi { color: #bb60d5 } /* Name.Variable.Instance */
.codehilite .vm { color: #bb60d5 } /* Name.Variable.Magic */
.codehilite .il { color: #40a070 } /* Literal.Number.Integer.Long */

View File

@@ -2,17 +2,20 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/ClassTypes.php');
require_once(CONST_LibDir.'/ClassTypes.php');
/**
* Detailed list of address parts for a single result
*/
class AddressDetails
{
private $iPlaceID;
private $aAddressLines;
public function __construct(&$oDB, $iPlaceID, $sHousenumber, $mLangPref)
{
$this->iPlaceID = $iPlaceID;
if (is_array($mLangPref)) {
$mLangPref = $oDB->getArraySQL($oDB->getDBQuotedList($mLangPref));
}
@@ -58,48 +61,94 @@ class AddressDetails
return join(', ', $aParts);
}
public function getAddressNames()
public function getAddressNames($sCountry = null)
{
$aAddress = array();
$aFallback = array();
foreach ($this->aAddressLines as $aLine) {
if (!self::isAddress($aLine)) {
continue;
}
$bFallback = false;
$aTypeLabel = ClassTypes\getInfo($aLine);
$sTypeLabel = ClassTypes\getLabelTag($aLine);
if ($aTypeLabel === false) {
$aTypeLabel = ClassTypes\getFallbackInfo($aLine);
$bFallback = true;
}
$sName = false;
if (isset($aLine['localname']) && $aLine['localname']) {
$sName = null;
if (isset($aLine['localname']) && $aLine['localname']!=='') {
$sName = $aLine['localname'];
} elseif (isset($aLine['housenumber']) && $aLine['housenumber']) {
} elseif (isset($aLine['housenumber']) && $aLine['housenumber']!=='') {
$sName = $aLine['housenumber'];
}
if ($sName) {
$sTypeLabel = strtolower(isset($aTypeLabel['simplelabel']) ? $aTypeLabel['simplelabel'] : $aTypeLabel['label']);
$sTypeLabel = str_replace(' ', '_', $sTypeLabel);
if (isset($sName)) {
$sTypeLabel = strtolower(str_replace(' ', '_', $sTypeLabel));
if (!isset($aAddress[$sTypeLabel])
|| isset($aFallback[$sTypeLabel])
|| $aLine['class'] == 'place'
) {
$aAddress[$sTypeLabel] = $sName;
if ($bFallback) {
$aFallback[$sTypeLabel] = $bFallback;
}
}
}
}
return $aAddress;
}
/**
* Annotates the given json with geocodejson address information fields.
*
* @param array $aJson Json hash to add the fields to.
*
* Geocodejson has the following fields:
* street, locality, postcode, city, district,
* county, state, country
*
* Postcode and housenumber are added by type, district is not used.
* All other fields are set according to address rank.
*/
public function addGeocodeJsonAddressParts(&$aJson)
{
foreach (array_reverse($this->aAddressLines) as $aLine) {
if (!$aLine['isaddress']) {
continue;
}
if (!isset($aLine['localname']) || $aLine['localname'] == '') {
continue;
}
if ($aLine['type'] == 'postcode' || $aLine['type'] == 'postal_code') {
$aJson['postcode'] = $aLine['localname'];
continue;
}
if ($aLine['type'] == 'house_number') {
$aJson['housenumber'] = $aLine['localname'];
continue;
}
if ($this->iPlaceID == $aLine['place_id']) {
continue;
}
$iRank = (int)$aLine['rank_address'];
if ($iRank > 25 && $iRank < 28) {
$aJson['street'] = $aLine['localname'];
} elseif ($iRank >= 22 && $iRank <= 25) {
$aJson['locality'] = $aLine['localname'];
} elseif ($iRank >= 17 && $iRank <= 21) {
$aJson['district'] = $aLine['localname'];
} elseif ($iRank >= 13 && $iRank <= 16) {
$aJson['city'] = $aLine['localname'];
} elseif ($iRank >= 10 && $iRank <= 12) {
$aJson['county'] = $aLine['localname'];
} elseif ($iRank >= 5 && $iRank <= 9) {
$aJson['state'] = $aLine['localname'];
} elseif ($iRank == 4) {
$aJson['country'] = $aLine['localname'];
}
}
}
public function getAdminLevels()
{
$aAddress = array();

568
lib-php/ClassTypes.php Normal file
View File

@@ -0,0 +1,568 @@
<?php
namespace Nominatim\ClassTypes;
/**
* Create a label tag for the given place that can be used as an XML name.
*
* @param array[] $aPlace Information about the place to label.
*
* A label tag groups various object types together under a common
* label. The returned value is lower case and has no spaces
*/
function getLabelTag($aPlace, $sCountry = null)
{
$iRank = (int) ($aPlace['rank_address'] ?? 30);
$sLabel;
if (isset($aPlace['place_type'])) {
$sLabel = $aPlace['place_type'];
} elseif ($aPlace['class'] == 'boundary' && $aPlace['type'] == 'administrative') {
$sLabel = getBoundaryLabel($iRank/2, $sCountry);
} elseif ($aPlace['type'] == 'postal_code') {
$sLabel = 'postcode';
} elseif ($iRank < 26) {
$sLabel = $aPlace['type'];
} elseif ($iRank < 28) {
$sLabel = 'road';
} elseif ($aPlace['class'] == 'place'
&& ($aPlace['type'] == 'house_number' ||
$aPlace['type'] == 'house_name' ||
$aPlace['type'] == 'country_code')
) {
$sLabel = $aPlace['type'];
} else {
$sLabel = $aPlace['class'];
}
return strtolower(str_replace(' ', '_', $sLabel));
}
/**
* Create a label for the given place.
*
* @param array[] $aPlace Information about the place to label.
*/
function getLabel($aPlace, $sCountry = null)
{
if (isset($aPlace['place_type'])) {
return ucwords(str_replace('_', ' ', $aPlace['place_type']));
}
if ($aPlace['class'] == 'boundary' && $aPlace['type'] == 'administrative') {
return getBoundaryLabel(($aPlace['rank_address'] ?? 30)/2, $sCountry ?? null);
}
// Return a label only for 'important' class/type combinations
if (getImportance($aPlace) !== null) {
return ucwords(str_replace('_', ' ', $aPlace['type']));
}
return null;
}
/**
* Return a simple label for an administrative boundary for the given country.
*
* @param int $iAdminLevel Content of admin_level tag.
* @param string $sCountry Country code of the country where the object is
* in. May be null, in which case a world-wide
* fallback is used.
* @param string $sFallback String to return if no explicit string is listed.
*
* @return string
*/
function getBoundaryLabel($iAdminLevel, $sCountry, $sFallback = 'Administrative')
{
static $aBoundaryList = array (
'default' => array (
1 => 'Continent',
2 => 'Country',
3 => 'Region',
4 => 'State',
5 => 'State District',
6 => 'County',
7 => 'Municipality',
8 => 'City',
9 => 'City District',
10 => 'Suburb',
11 => 'Neighbourhood',
12 => 'City Block'
),
'no' => array (
3 => 'State',
4 => 'County'
),
'se' => array (
3 => 'State',
4 => 'County'
)
);
if (isset($aBoundaryList[$sCountry])
&& isset($aBoundaryList[$sCountry][$iAdminLevel])
) {
return $aBoundaryList[$sCountry][$iAdminLevel];
}
return $aBoundaryList['default'][$iAdminLevel] ?? $sFallback;
}
/**
* Return an estimated radius of how far the object node extends.
*
* @param array[] $aPlace Information about the place. This must be a node
* feature.
*
* @return float The radius around the feature in degrees.
*/
function getDefRadius($aPlace)
{
$aSpecialRadius = array(
'place:continent' => 25,
'place:country' => 7,
'place:state' => 2.6,
'place:province' => 2.6,
'place:region' => 1.0,
'place:county' => 0.7,
'place:city' => 0.16,
'place:municipality' => 0.16,
'place:island' => 0.32,
'place:postcode' => 0.16,
'place:town' => 0.04,
'place:village' => 0.02,
'place:hamlet' => 0.02,
'place:district' => 0.02,
'place:borough' => 0.02,
'place:suburb' => 0.02,
'place:locality' => 0.01,
'place:neighbourhood'=> 0.01,
'place:quarter' => 0.01,
'place:city_block' => 0.01,
'landuse:farm' => 0.01,
'place:farm' => 0.01,
'place:airport' => 0.015,
'aeroway:aerodrome' => 0.015,
'railway:station' => 0.005
);
$sClassPlace = $aPlace['class'].':'.$aPlace['type'];
return $aSpecialRadius[$sClassPlace] ?? 0.00005;
}
/**
* Get the icon to use with the given object.
*/
function getIcon($aPlace)
{
$aIcons = array(
'boundary:administrative' => 'poi_boundary_administrative',
'place:city' => 'poi_place_city',
'place:town' => 'poi_place_town',
'place:village' => 'poi_place_village',
'place:hamlet' => 'poi_place_village',
'place:suburb' => 'poi_place_village',
'place:locality' => 'poi_place_village',
'place:airport' => 'transport_airport2',
'aeroway:aerodrome' => 'transport_airport2',
'railway:station' => 'transport_train_station2',
'amenity:place_of_worship' => 'place_of_worship_unknown3',
'amenity:pub' => 'food_pub',
'amenity:bar' => 'food_bar',
'amenity:university' => 'education_university',
'tourism:museum' => 'tourist_museum',
'amenity:arts_centre' => 'tourist_art_gallery2',
'tourism:zoo' => 'tourist_zoo',
'tourism:theme_park' => 'poi_point_of_interest',
'tourism:attraction' => 'poi_point_of_interest',
'leisure:golf_course' => 'sport_golf',
'historic:castle' => 'tourist_castle',
'amenity:hospital' => 'health_hospital',
'amenity:school' => 'education_school',
'amenity:theatre' => 'tourist_theatre',
'amenity:library' => 'amenity_library',
'amenity:fire_station' => 'amenity_firestation3',
'amenity:police' => 'amenity_police2',
'amenity:bank' => 'money_bank2',
'amenity:post_office' => 'amenity_post_office',
'tourism:hotel' => 'accommodation_hotel2',
'amenity:cinema' => 'tourist_cinema',
'tourism:artwork' => 'tourist_art_gallery2',
'historic:archaeological_site' => 'tourist_archaeological2',
'amenity:doctors' => 'health_doctors',
'leisure:sports_centre' => 'sport_leisure_centre',
'leisure:swimming_pool' => 'sport_swimming_outdoor',
'shop:supermarket' => 'shopping_supermarket',
'shop:convenience' => 'shopping_convenience',
'amenity:restaurant' => 'food_restaurant',
'amenity:fast_food' => 'food_fastfood',
'amenity:cafe' => 'food_cafe',
'tourism:guest_house' => 'accommodation_bed_and_breakfast',
'amenity:pharmacy' => 'health_pharmacy_dispensing',
'amenity:fuel' => 'transport_fuel',
'natural:peak' => 'poi_peak',
'natural:wood' => 'landuse_coniferous_and_deciduous',
'shop:bicycle' => 'shopping_bicycle',
'shop:clothes' => 'shopping_clothes',
'shop:hairdresser' => 'shopping_hairdresser',
'shop:doityourself' => 'shopping_diy',
'shop:estate_agent' => 'shopping_estateagent2',
'shop:car' => 'shopping_car',
'shop:garden_centre' => 'shopping_garden_centre',
'shop:car_repair' => 'shopping_car_repair',
'shop:bakery' => 'shopping_bakery',
'shop:butcher' => 'shopping_butcher',
'shop:apparel' => 'shopping_clothes',
'shop:laundry' => 'shopping_laundrette',
'shop:beverages' => 'shopping_alcohol',
'shop:alcohol' => 'shopping_alcohol',
'shop:optician' => 'health_opticians',
'shop:chemist' => 'health_pharmacy',
'shop:gallery' => 'tourist_art_gallery2',
'shop:jewelry' => 'shopping_jewelry',
'tourism:information' => 'amenity_information',
'historic:ruins' => 'tourist_ruin',
'amenity:college' => 'education_school',
'historic:monument' => 'tourist_monument',
'historic:memorial' => 'tourist_monument',
'historic:mine' => 'poi_mine',
'tourism:caravan_site' => 'accommodation_caravan_park',
'amenity:bus_station' => 'transport_bus_station',
'amenity:atm' => 'money_atm2',
'tourism:viewpoint' => 'tourist_view_point',
'tourism:guesthouse' => 'accommodation_bed_and_breakfast',
'railway:tram' => 'transport_tram_stop',
'amenity:courthouse' => 'amenity_court',
'amenity:recycling' => 'amenity_recycling',
'amenity:dentist' => 'health_dentist',
'natural:beach' => 'tourist_beach',
'railway:tram_stop' => 'transport_tram_stop',
'amenity:prison' => 'amenity_prison',
'highway:bus_stop' => 'transport_bus_stop2'
);
$sClassPlace = $aPlace['class'].':'.$aPlace['type'];
return $aIcons[$sClassPlace] ?? null;
}
/**
* Get an icon for the given object with its full URL.
*/
function getIconFile($aPlace)
{
if (CONST_MapIcon_URL === false) {
return null;
}
$sIcon = getIcon($aPlace);
if (!isset($sIcon)) {
return null;
}
return CONST_MapIcon_URL.'/'.$sIcon.'.p.20.png';
}
/**
* Return a class importance value for the given place.
*
* @param array[] $aPlace Information about the place.
*
* @return int An importance value. The lower the value, the more
* important the class.
*/
function getImportance($aPlace)
{
static $aWithImportance = null;
if ($aWithImportance === null) {
$aWithImportance = array_flip(array(
'boundary:administrative',
'place:country',
'place:state',
'place:province',
'place:county',
'place:city',
'place:region',
'place:island',
'place:town',
'place:village',
'place:hamlet',
'place:suburb',
'place:locality',
'landuse:farm',
'place:farm',
'highway:motorway_junction',
'highway:motorway',
'highway:trunk',
'highway:primary',
'highway:secondary',
'highway:tertiary',
'highway:residential',
'highway:unclassified',
'highway:living_street',
'highway:service',
'highway:track',
'highway:road',
'highway:byway',
'highway:bridleway',
'highway:cycleway',
'highway:pedestrian',
'highway:footway',
'highway:steps',
'highway:motorway_link',
'highway:trunk_link',
'highway:primary_link',
'landuse:industrial',
'landuse:residential',
'landuse:retail',
'landuse:commercial',
'place:airport',
'aeroway:aerodrome',
'railway:station',
'amenity:place_of_worship',
'amenity:pub',
'amenity:bar',
'amenity:university',
'tourism:museum',
'amenity:arts_centre',
'tourism:zoo',
'tourism:theme_park',
'tourism:attraction',
'leisure:golf_course',
'historic:castle',
'amenity:hospital',
'amenity:school',
'amenity:theatre',
'amenity:public_building',
'amenity:library',
'amenity:townhall',
'amenity:community_centre',
'amenity:fire_station',
'amenity:police',
'amenity:bank',
'amenity:post_office',
'leisure:park',
'amenity:park',
'landuse:park',
'landuse:recreation_ground',
'tourism:hotel',
'tourism:motel',
'amenity:cinema',
'tourism:artwork',
'historic:archaeological_site',
'amenity:doctors',
'leisure:sports_centre',
'leisure:swimming_pool',
'shop:supermarket',
'shop:convenience',
'amenity:restaurant',
'amenity:fast_food',
'amenity:cafe',
'tourism:guest_house',
'amenity:pharmacy',
'amenity:fuel',
'natural:peak',
'waterway:waterfall',
'natural:wood',
'natural:water',
'landuse:forest',
'landuse:cemetery',
'landuse:allotments',
'landuse:farmyard',
'railway:rail',
'waterway:canal',
'waterway:river',
'waterway:stream',
'shop:bicycle',
'shop:clothes',
'shop:hairdresser',
'shop:doityourself',
'shop:estate_agent',
'shop:car',
'shop:garden_centre',
'shop:car_repair',
'shop:newsagent',
'shop:bakery',
'shop:furniture',
'shop:butcher',
'shop:apparel',
'shop:electronics',
'shop:department_store',
'shop:books',
'shop:yes',
'shop:outdoor',
'shop:mall',
'shop:florist',
'shop:charity',
'shop:hardware',
'shop:laundry',
'shop:shoes',
'shop:beverages',
'shop:dry_cleaning',
'shop:carpet',
'shop:computer',
'shop:alcohol',
'shop:optician',
'shop:chemist',
'shop:gallery',
'shop:mobile_phone',
'shop:sports',
'shop:jewelry',
'shop:pet',
'shop:beauty',
'shop:stationery',
'shop:shopping_centre',
'shop:general',
'shop:electrical',
'shop:toys',
'shop:jeweller',
'shop:betting',
'shop:household',
'shop:travel_agency',
'shop:hifi',
'amenity:shop',
'tourism:information',
'place:house',
'place:house_name',
'place:house_number',
'place:country_code',
'leisure:pitch',
'highway:unsurfaced',
'historic:ruins',
'amenity:college',
'historic:monument',
'railway:subway',
'historic:memorial',
'leisure:nature_reserve',
'leisure:common',
'waterway:lock_gate',
'natural:fell',
'amenity:nightclub',
'highway:path',
'leisure:garden',
'landuse:reservoir',
'leisure:playground',
'leisure:stadium',
'historic:mine',
'natural:cliff',
'tourism:caravan_site',
'amenity:bus_station',
'amenity:kindergarten',
'highway:construction',
'amenity:atm',
'amenity:emergency_phone',
'waterway:lock',
'waterway:riverbank',
'natural:coastline',
'tourism:viewpoint',
'tourism:hostel',
'tourism:bed_and_breakfast',
'railway:halt',
'railway:platform',
'railway:tram',
'amenity:courthouse',
'amenity:recycling',
'amenity:dentist',
'natural:beach',
'place:moor',
'amenity:grave_yard',
'waterway:drain',
'landuse:grass',
'landuse:village_green',
'natural:bay',
'railway:tram_stop',
'leisure:marina',
'highway:stile',
'natural:moor',
'railway:light_rail',
'railway:narrow_gauge',
'natural:land',
'amenity:village_hall',
'waterway:dock',
'amenity:veterinary',
'landuse:brownfield',
'leisure:track',
'railway:historic_station',
'landuse:construction',
'amenity:prison',
'landuse:quarry',
'amenity:telephone',
'highway:traffic_signals',
'natural:heath',
'historic:house',
'amenity:social_club',
'landuse:military',
'amenity:health_centre',
'historic:building',
'amenity:clinic',
'highway:services',
'amenity:ferry_terminal',
'natural:marsh',
'natural:hill',
'highway:raceway',
'amenity:taxi',
'amenity:take_away',
'amenity:car_rental',
'place:islet',
'amenity:nursery',
'amenity:nursing_home',
'amenity:toilets',
'amenity:hall',
'waterway:boatyard',
'highway:mini_roundabout',
'historic:manor',
'tourism:chalet',
'amenity:bicycle_parking',
'amenity:hotel',
'waterway:weir',
'natural:wetland',
'natural:cave_entrance',
'amenity:crematorium',
'tourism:picnic_site',
'landuse:wood',
'landuse:basin',
'natural:tree',
'leisure:slipway',
'landuse:meadow',
'landuse:piste',
'amenity:care_home',
'amenity:club',
'amenity:medical_centre',
'historic:roman_road',
'historic:fort',
'railway:subway_entrance',
'historic:yes',
'highway:gate',
'leisure:fishing',
'historic:museum',
'amenity:car_wash',
'railway:level_crossing',
'leisure:bird_hide',
'natural:headland',
'tourism:apartments',
'amenity:shopping',
'natural:scrub',
'natural:fen',
'building:yes',
'mountain_pass:yes',
'amenity:parking',
'highway:bus_stop',
'place:postcode',
'amenity:post_box',
'place:houses',
'railway:preserved',
'waterway:derelict_canal',
'amenity:dead_pub',
'railway:disused_station',
'railway:abandoned',
'railway:disused'
));
}
$sClassPlace = $aPlace['class'].':'.$aPlace['type'];
return $aWithImportance[$sClassPlace] ?? null;
}

View File

@@ -2,7 +2,7 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/DatabaseError.php');
require_once(CONST_LibDir.'/DatabaseError.php');
/**
* Uses PDO to access the database specified in the CONST_Database_DSN
@@ -12,9 +12,9 @@ class DB
{
protected $connection;
public function __construct($sDSN = CONST_Database_DSN)
public function __construct($sDSN = null)
{
$this->sDSN = $sDSN;
$this->sDSN = $sDSN ?? getSetting('DATABASE_DSN');
}
public function connect($bNew = false, $bPersistent = true)
@@ -135,7 +135,7 @@ class DB
try {
$stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
while ($val = $stmt->fetchColumn(0)) { // returns first column or false
while (($val = $stmt->fetchColumn(0)) !== false) { // returns first column or false
$aVals[] = $val;
}
} catch (\PDOException $e) {
@@ -241,11 +241,23 @@ class DB
}
/**
* Since the DSN includes the database name, checks if the connection works.
* Deletes a table. Returns true if deleted or didn't exist.
*
* @param string $sTableName
*
* @return boolean
*/
public function databaseExists()
public function deleteTable($sTableName)
{
return $this->exec('DROP TABLE IF EXISTS '.$sTableName.' CASCADE') == 0;
}
/**
* Tries to connect to the database but on failure doesn't throw an exception.
*
* @return boolean
*/
public function checkConnection()
{
$bExists = true;
try {
@@ -280,11 +292,18 @@ class DB
return (float) ($aMatches[1].'.'.$aMatches[2]);
}
/**
* Returns an associate array of postgresql database connection settings. Keys can
* be 'database', 'hostspec', 'port', 'username', 'password'.
* Returns empty array on failure, thus check if at least 'database' is set.
*
* @return array[]
*/
public static function parseDSN($sDSN)
{
// https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
$aInfo = array();
if (preg_match('/^pgsql:(.+)/', $sDSN, $aMatches)) {
if (preg_match('/^pgsql:(.+)$/', $sDSN, $aMatches)) {
foreach (explode(';', $aMatches[1]) as $sKeyVal) {
list($sKey, $sVal) = explode('=', $sKeyVal, 2);
if ($sKey == 'host') $sKey = 'hostspec';
@@ -295,4 +314,28 @@ class DB
}
return $aInfo;
}
/**
* Takes an array of settings and return the DNS string. Key names can be
* 'database', 'hostspec', 'port', 'username', 'password' but aliases
* 'dbname', 'host' and 'user' are also supported.
*
* @return string
*
*/
public static function generateDSN($aInfo)
{
$sDSN = sprintf(
'pgsql:host=%s;port=%s;dbname=%s;user=%s;password=%s;',
$aInfo['host'] ?? $aInfo['hostspec'] ?? '',
$aInfo['port'] ?? '',
$aInfo['dbname'] ?? $aInfo['database'] ?? '',
$aInfo['user'] ?? '',
$aInfo['password'] ?? ''
);
$sDSN = preg_replace('/\b\w+=;/', '', $sDSN);
$sDSN = preg_replace('/;\Z/', '', $sDSN);
return $sDSN;
}
}

View File

@@ -2,12 +2,12 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/PlaceLookup.php');
require_once(CONST_BasePath.'/lib/Phrase.php');
require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
require_once(CONST_BasePath.'/lib/SearchDescription.php');
require_once(CONST_BasePath.'/lib/SearchContext.php');
require_once(CONST_BasePath.'/lib/TokenList.php');
require_once(CONST_LibDir.'/PlaceLookup.php');
require_once(CONST_LibDir.'/Phrase.php');
require_once(CONST_LibDir.'/ReverseGeocode.php');
require_once(CONST_LibDir.'/SearchDescription.php');
require_once(CONST_LibDir.'/SearchContext.php');
require_once(CONST_LibDir.'/TokenList.php');
class Geocode
{
@@ -18,7 +18,7 @@ class Geocode
protected $aLangPrefOrder = array();
protected $aExcludePlaceIDs = array();
protected $bReverseInPlan = false;
protected $bReverseInPlan = true;
protected $iLimit = 20;
protected $iFinalLimit = 10;
@@ -245,7 +245,6 @@ class Geocode
}
$this->oPlaceLookup->loadParamArray($oParams, $sForceGeometryType);
$this->oPlaceLookup->setIncludePolygonAsPoints($oParams->getBool('polygon'));
$this->oPlaceLookup->setIncludeAddressDetails($oParams->getBool('addressdetails', false));
}
@@ -643,12 +642,6 @@ class Geocode
$oValidTokens = new TokenList();
if (!empty($aTokens)) {
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
$sSQL .= ' FROM word ';
$sSQL .= ' WHERE word_token in ('.join(',', $this->oDB->getDBQuotedList($aTokens)).')';
Debug::printSQL($sSQL);
$oValidTokens->addTokensFromDB(
$this->oDB,
$aTokens,
@@ -657,6 +650,8 @@ class Geocode
$this->oNormalizer
);
$oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
@@ -783,14 +778,19 @@ class Geocode
if (!empty($aResults)) {
$aSplitResults = Result::splitResults($aResults);
Debug::printVar('Split results', $aSplitResults);
if ($iGroupLoop <= 4 && empty($aSplitResults['tail'])
&& reset($aSplitResults['head'])->iResultRank > 0) {
if ($iGroupLoop <= 4
&& reset($aSplitResults['head'])->iResultRank > 0
&& $iGroupedRank !== array_key_last($aGroupedSearches)) {
// Haven't found an exact match for the query yet.
// Therefore add result from the next group level.
$aNextResults = $aSplitResults['head'];
foreach ($aNextResults as $oRes) {
$oRes->iResultRank--;
}
foreach ($aSplitResults['tail'] as $oRes) {
$oRes->iResultRank--;
$aNextResults[$oRes->iId] = $oRes;
}
$aResults = array();
} else {
$aResults = $aSplitResults['head'];
@@ -808,9 +808,7 @@ class Geocode
$sSQL .= 'WHERE place_id in ('.$sPlaceIds.') ';
$sSQL .= ' AND (';
$sSQL .= " placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank ";
if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) {
$sSQL .= " OR (extratags->'place') = 'city'";
}
$sSQL .= " OR placex.rank_search between $this->iMinAddressRank and $this->iMaxAddressRank ";
if ($this->aAddressRankList) {
$sSQL .= ' OR placex.rank_address in ('.join(',', $this->aAddressRankList).')';
}
@@ -890,7 +888,6 @@ class Geocode
$aSearchResults = $this->oPlaceLookup->lookup($aResults);
$aClassType = ClassTypes\getListWithImportance();
$aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery);
foreach ($aRecheckWords as $i => $sWord) {
if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]);
@@ -899,33 +896,23 @@ class Geocode
Debug::printVar('Recheck words', $aRecheckWords);
foreach ($aSearchResults as $iIdx => $aResult) {
// Default
$fDiameter = ClassTypes\getProperty($aResult, 'defdiameter', 0.0001);
$fRadius = ClassTypes\getDefRadius($aResult);
$aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fDiameter/2);
$aOutlineResult = $this->oPlaceLookup->getOutlines($aResult['place_id'], $aResult['lon'], $aResult['lat'], $fRadius);
if ($aOutlineResult) {
$aResult = array_merge($aResult, $aOutlineResult);
}
if ($aResult['extra_place'] == 'city') {
$aResult['class'] = 'place';
$aResult['type'] = 'city';
$aResult['rank_search'] = 16;
}
// Is there an icon set for this type of result?
$aClassInfo = ClassTypes\getInfo($aResult);
if ($aClassInfo) {
if (isset($aClassInfo['icon'])) {
$aResult['icon'] = CONST_Website_BaseURL.'images/mapicons/'.$aClassInfo['icon'].'.p.20.png';
}
if (isset($aClassInfo['label'])) {
$aResult['label'] = $aClassInfo['label'];
}
$sIcon = ClassTypes\getIconFile($aResult);
if (isset($sIcon)) {
$aResult['icon'] = $sIcon;
}
$sLabel = ClassTypes\getLabel($aResult);
if (isset($sLabel)) {
$aResult['label'] = $sLabel;
}
$aResult['name'] = $aResult['langaddress'];
if ($oCtx->hasNearPoint()) {
@@ -937,6 +924,26 @@ class Geocode
$aResult['lon'],
$aResult['lat']
);
// secondary ordering (for results with same importance (the smaller the better):
// - approximate importance of address parts
if (isset($aResult['addressimportance']) && $aResult['addressimportance']) {
$aResult['foundorder'] = -$aResult['addressimportance']/10;
} else {
$aResult['foundorder'] = -$aResult['importance'];
}
// - number of exact matches from the query
$aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches;
// - importance of the class/type
$iClassImportance = ClassTypes\getImportance($aResult);
if (isset($iClassImportance)) {
$aResult['foundorder'] += 0.0001 * $iClassImportance;
} else {
$aResult['foundorder'] += 0.01;
}
// - rank
$aResult['foundorder'] -= 0.00001 * (30 - $aResult['rank_search']);
// Adjust importance for the number of exact string matches in the result
$iCountWords = 0;
$sAddress = $aResult['langaddress'];
@@ -947,21 +954,8 @@ class Geocode
}
}
$aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1); // 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right
// secondary ordering (for results with same importance (the smaller the better):
// - approximate importance of address parts
$aResult['foundorder'] = -$aResult['addressimportance']/10;
// - number of exact matches from the query
$aResult['foundorder'] -= $aResults[$aResult['place_id']]->iExactMatches;
// - importance of the class/type
if (isset($aClassType[$aResult['class'].':'.$aResult['type']]['importance'])
&& $aClassType[$aResult['class'].':'.$aResult['type']]['importance']
) {
$aResult['foundorder'] += 0.0001 * $aClassType[$aResult['class'].':'.$aResult['type']]['importance'];
} else {
$aResult['foundorder'] += 0.01;
}
// 0.1 is a completely arbitrary number but something in the range 0.1 to 0.5 would seem right
$aResult['importance'] = $aResult['importance'] + ($iCountWords*0.1);
}
$aSearchResults[$iIdx] = $aResult;
}

View File

@@ -104,18 +104,29 @@ class ParameterParser
}
foreach ($aLanguages as $sLanguage => $fLanguagePref) {
$aLangPrefOrder['short_name:'.$sLanguage] = 'short_name:'.$sLanguage;
$aLangPrefOrder['name:'.$sLanguage] = 'name:'.$sLanguage;
}
$aLangPrefOrder['short_name'] = 'short_name';
$aLangPrefOrder['name'] = 'name';
$aLangPrefOrder['brand'] = 'brand';
foreach ($aLanguages as $sLanguage => $fLanguagePref) {
$aLangPrefOrder['official_name:'.$sLanguage] = 'official_name:'.$sLanguage;
$aLangPrefOrder['short_name:'.$sLanguage] = 'short_name:'.$sLanguage;
}
$aLangPrefOrder['official_name'] = 'official_name';
$aLangPrefOrder['short_name'] = 'short_name';
$aLangPrefOrder['ref'] = 'ref';
$aLangPrefOrder['type'] = 'type';
return $aLangPrefOrder;
}
public function hasSetAny($aParamNames)
{
foreach ($aParamNames as $sName) {
if ($this->getBool($sName)) {
return true;
}
}
return false;
}
}

View File

@@ -2,8 +2,8 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/AddressDetails.php');
require_once(CONST_BasePath.'/lib/Result.php');
require_once(CONST_LibDir.'/AddressDetails.php');
require_once(CONST_LibDir.'/Result.php');
class PlaceLookup
{
@@ -15,7 +15,6 @@ class PlaceLookup
protected $bExtraTags = false;
protected $bNameDetails = false;
protected $bIncludePolygonAsPoints = false;
protected $bIncludePolygonAsText = false;
protected $bIncludePolygonAsGeoJSON = false;
protected $bIncludePolygonAsKML = false;
@@ -38,11 +37,6 @@ class PlaceLookup
return $this->bDeDupe;
}
public function setIncludePolygonAsPoints($b = true)
{
$this->bIncludePolygonAsPoints = $b;
}
public function setIncludeAddressDetails($b)
{
$this->bAddressDetails = $b;
@@ -61,7 +55,6 @@ class PlaceLookup
if ($sGeomType === null || $sGeomType == 'geojson') {
$this->bIncludePolygonAsGeoJSON = $oParams->getBool('polygon_geojson');
$this->bIncludePolygonAsPoints = false;
}
if ($oParams->getString('format', '') !== 'geojson') {
@@ -100,7 +93,6 @@ class PlaceLookup
if ($this->bExtraTags) $aParams['extratags'] = '1';
if ($this->bNameDetails) $aParams['namedetails'] = '1';
if ($this->bIncludePolygonAsPoints) $aParams['polygon'] = '1';
if ($this->bIncludePolygonAsText) $aParams['polygon_text'] = '1';
if ($this->bIncludePolygonAsGeoJSON) $aParams['polygon_geojson'] = '1';
if ($this->bIncludePolygonAsKML) $aParams['polygon_kml'] = '1';
@@ -215,7 +207,7 @@ class PlaceLookup
'ST_Collect(centroid)',
'min(CASE WHEN placex.rank_search < 28 THEN placex.place_id ELSE placex.parent_place_id END)'
);
$sSQL .= " (extratags->'place') AS extra_place ";
$sSQL .= " COALESCE(extratags->'place', extratags->'linked_place') AS extra_place ";
$sSQL .= ' FROM placex';
$sSQL .= " WHERE place_id in ($sPlaceIDs) ";
$sSQL .= ' AND (';
@@ -248,7 +240,7 @@ class PlaceLookup
$sSQL .= ' ref, ';
if ($this->bExtraTags) $sSQL .= 'extratags, ';
if ($this->bNameDetails) $sSQL .= 'name, ';
$sSQL .= " extratags->'place' ";
$sSQL .= ' extra_place ';
$aSubSelects[] = $sSQL;
}
@@ -456,16 +448,24 @@ class PlaceLookup
}
}
$aPlace['addresstype'] = ClassTypes\getProperty(
$aPlace['addresstype'] = ClassTypes\getLabelTag(
$aPlace,
'simplelabel',
$aPlace['class']
$aPlace['country_code']
);
$aResults[$aPlace['place_id']] = $aPlace;
}
Debug::printVar('Places', $aPlaces);
$aResults = array_filter(
$aResults,
function ($v) {
return !($v instanceof Result);
}
);
return $aPlaces;
Debug::printVar('Places', $aResults);
return $aResults;
}
/* returns an array which will contain the keys
@@ -486,82 +486,73 @@ class PlaceLookup
$aOutlineResult = array();
if (!$iPlaceID) return $aOutlineResult;
if (CONST_Search_AreaPolygons) {
// Get the bounding box and outline polygon
$sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
if ($fLonReverse != null && $fLatReverse != null) {
$sSQL .= ' ST_Y(closest_point) as centrelat,';
$sSQL .= ' ST_X(closest_point) as centrelon,';
} else {
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
}
$sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
$sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
if ($this->bIncludePolygonAsKML) $sSQL .= ',ST_AsKML(geometry) as askml';
if ($this->bIncludePolygonAsSVG) $sSQL .= ',ST_AsSVG(geometry) as assvg';
if ($this->bIncludePolygonAsText || $this->bIncludePolygonAsPoints) $sSQL .= ',ST_AsText(geometry) as astext';
if ($fLonReverse != null && $fLatReverse != null) {
$sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
$sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
$sFrom .=' ELSE centroid END AS closest_point';
$sFrom .= ' from placex where place_id = '.$iPlaceID.') as plx';
} else {
$sFrom = ' from placex where place_id = '.$iPlaceID;
}
if ($this->fPolygonSimplificationThreshold > 0) {
$sSQL .= ' from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'.$sFrom.') as plx';
} else {
$sSQL .= $sFrom;
// Get the bounding box and outline polygon
$sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
if ($fLonReverse != null && $fLatReverse != null) {
$sSQL .= ' ST_Y(closest_point) as centrelat,';
$sSQL .= ' ST_X(closest_point) as centrelon,';
} else {
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
}
$sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
$sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
if ($this->bIncludePolygonAsKML) $sSQL .= ',ST_AsKML(geometry) as askml';
if ($this->bIncludePolygonAsSVG) $sSQL .= ',ST_AsSVG(geometry) as assvg';
if ($this->bIncludePolygonAsText) $sSQL .= ',ST_AsText(geometry) as astext';
if ($fLonReverse != null && $fLatReverse != null) {
$sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
$sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
$sFrom .=' ELSE centroid END AS closest_point';
$sFrom .= ' from placex where place_id = '.$iPlaceID.') as plx';
} else {
$sFrom = ' from placex where place_id = '.$iPlaceID;
}
if ($this->fPolygonSimplificationThreshold > 0) {
$sSQL .= ' from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'.$sFrom.') as plx';
} else {
$sSQL .= $sFrom;
}
$aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');
if ($aPointPolygon && $aPointPolygon['place_id']) {
if ($aPointPolygon['centrelon'] !== null && $aPointPolygon['centrelat'] !== null) {
$aOutlineResult['lat'] = $aPointPolygon['centrelat'];
$aOutlineResult['lon'] = $aPointPolygon['centrelon'];
}
$aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');
if ($this->bIncludePolygonAsGeoJSON) $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
if ($this->bIncludePolygonAsKML) $aOutlineResult['askml'] = $aPointPolygon['askml'];
if ($this->bIncludePolygonAsSVG) $aOutlineResult['assvg'] = $aPointPolygon['assvg'];
if ($this->bIncludePolygonAsText) $aOutlineResult['astext'] = $aPointPolygon['astext'];
if ($aPointPolygon && $aPointPolygon['place_id']) {
if ($aPointPolygon['centrelon'] !== null && $aPointPolygon['centrelat'] !== null) {
$aOutlineResult['lat'] = $aPointPolygon['centrelat'];
$aOutlineResult['lon'] = $aPointPolygon['centrelon'];
}
if ($this->bIncludePolygonAsGeoJSON) $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
if ($this->bIncludePolygonAsKML) $aOutlineResult['askml'] = $aPointPolygon['askml'];
if ($this->bIncludePolygonAsSVG) $aOutlineResult['assvg'] = $aPointPolygon['assvg'];
if ($this->bIncludePolygonAsText) $aOutlineResult['astext'] = $aPointPolygon['astext'];
if ($this->bIncludePolygonAsPoints) $aOutlineResult['aPolyPoints'] = geometryText2Points($aPointPolygon['astext'], $fRadius);
if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) {
$aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius;
$aPointPolygon['maxlat'] = $aPointPolygon['maxlat'] + $fRadius;
}
if (abs($aPointPolygon['minlon'] - $aPointPolygon['maxlon']) < 0.0000001) {
$aPointPolygon['minlon'] = $aPointPolygon['minlon'] - $fRadius;
$aPointPolygon['maxlon'] = $aPointPolygon['maxlon'] + $fRadius;
}
$aOutlineResult['aBoundingBox'] = array(
(string)$aPointPolygon['minlat'],
(string)$aPointPolygon['maxlat'],
(string)$aPointPolygon['minlon'],
(string)$aPointPolygon['maxlon']
);
if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) {
$aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius;
$aPointPolygon['maxlat'] = $aPointPolygon['maxlat'] + $fRadius;
}
if (abs($aPointPolygon['minlon'] - $aPointPolygon['maxlon']) < 0.0000001) {
$aPointPolygon['minlon'] = $aPointPolygon['minlon'] - $fRadius;
$aPointPolygon['maxlon'] = $aPointPolygon['maxlon'] + $fRadius;
}
$aOutlineResult['aBoundingBox'] = array(
(string)$aPointPolygon['minlat'],
(string)$aPointPolygon['maxlat'],
(string)$aPointPolygon['minlon'],
(string)$aPointPolygon['maxlon']
);
}
// as a fallback we generate a bounding box without knowing the size of the geometry
if ((!isset($aOutlineResult['aBoundingBox'])) && isset($fLon)) {
//
if ($this->bIncludePolygonAsPoints) {
$sGeometryText = 'POINT('.$fLon.','.$fLat.')';
$aOutlineResult['aPolyPoints'] = geometryText2Points($sGeometryText, $fRadius);
}
$aBounds = array();
$aBounds['minlat'] = $fLat - $fRadius;
$aBounds['maxlat'] = $fLat + $fRadius;
$aBounds['minlon'] = $fLon - $fRadius;
$aBounds['maxlon'] = $fLon + $fRadius;
$aBounds = array(
'minlat' => $fLat - $fRadius,
'maxlat' => $fLat + $fRadius,
'minlon' => $fLon - $fRadius,
'maxlon' => $fLon + $fRadius
);
$aOutlineResult['aBoundingBox'] = array(
(string)$aBounds['minlat'],

View File

@@ -26,6 +26,8 @@ class Result
public $iExactMatches = 0;
/// Subranking within the results (the higher the worse).
public $iResultRank = 0;
/// Address rank of the result.
public $iAddressRank;
public function debugInfo()
{
@@ -84,7 +86,7 @@ class Result
foreach ($aResults as $oRes) {
if ($oRes->iResultRank < $iMinRank) {
$aTail = array_merge($aTail, $aHead);
$aTail += $aHead;
$aHead = array($oRes->iId => $oRes);
$iMinRank = $oRes->iResultRank;
} elseif ($oRes->iResultRank == $iMinRank) {

View File

@@ -2,7 +2,7 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/Result.php');
require_once(CONST_LibDir.'/Result.php');
class ReverseGeocode
{
@@ -54,6 +54,7 @@ class ReverseGeocode
*/
protected function lookupInterpolation($sPointSQL, $fSearchDiam)
{
Debug::newFunction('lookupInterpolation');
$sSQL = 'SELECT place_id, parent_place_id, 30 as rank_search,';
$sSQL .= ' ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fraction,';
$sSQL .= ' startnumber, endnumber, interpolationtype,';
@@ -62,6 +63,7 @@ class ReverseGeocode
$sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', linegeo, '.$fSearchDiam.')';
$sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
$sSQL .= ' ORDER BY distance ASC limit 1';
Debug::printSQL($sSQL);
return $this->oDB->getRow(
$sSQL,
@@ -88,16 +90,20 @@ class ReverseGeocode
protected function lookupInCountry($sPointSQL, $iMaxRank)
{
Debug::newFunction('lookupInCountry');
// searches for polygon in table country_osm_grid which contains the searchpoint
// and searches for the nearest place node to the searchpoint in this polygon
$sSQL = 'SELECT country_code FROM country_osm_grid';
$sSQL .= ' WHERE ST_CONTAINS(geometry, '.$sPointSQL.') LIMIT 1';
Debug::printSQL($sSQL);
$sCountryCode = $this->oDB->getOne(
$sSQL,
null,
'Could not determine country polygon containing the point.'
);
Debug::printVar('Country code', $sCountryCode);
if ($sCountryCode) {
if ($iMaxRank > 4) {
// look for place nodes with the given country code
@@ -115,9 +121,11 @@ class ReverseGeocode
$sSQL .= 'WHERE distance <= reverse_place_diameter(rank_search)';
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' LIMIT 1';
Debug::printSQL($sSQL);
if (CONST_Debug) var_dump($sSQL);
$aPlace = $this->oDB->getRow($sSQL, null, 'Could not determine place node.');
Debug::printVar('Country node', $aPlace);
if ($aPlace) {
return new Result($aPlace['place_id']);
}
@@ -131,9 +139,10 @@ class ReverseGeocode
$sSQL .= ' AND class in (\'boundary\', \'place\')';
$sSQL .= ' AND linked_place_id is null';
$sSQL .= ' ORDER BY distance ASC';
Debug::printSQL($sSQL);
if (CONST_Debug) var_dump($sSQL);
$aPlace = $this->oDB->getRow($sSQL, null, 'Could not determine place node.');
Debug::printVar('Country place', $aPlace);
if ($aPlace) {
return new Result($aPlace['place_id']);
}
@@ -156,6 +165,7 @@ class ReverseGeocode
*/
protected function lookupPolygon($sPointSQL, $iMaxRank)
{
Debug::newFunction('lookupPolygon');
// polygon search begins at suburb-level
if ($iMaxRank > 25) $iMaxRank = 25;
// no polygon search over country-level
@@ -173,8 +183,10 @@ class ReverseGeocode
$sSQL .= ' ORDER BY rank_address DESC LIMIT 50 ) as a';
$sSQL .= ' WHERE ST_CONTAINS(geometry, '.$sPointSQL.' )';
$sSQL .= ' ORDER BY rank_address DESC LIMIT 1';
Debug::printSQL($sSQL);
$aPoly = $this->oDB->getRow($sSQL, null, 'Could not determine polygon containing the point.');
Debug::printVar('Polygon result', $aPoly);
if ($aPoly) {
// if a polygon is found, search for placenodes begins ...
@@ -193,6 +205,7 @@ class ReverseGeocode
// for place nodes at rank_address 16
$sSQL .= ' AND rank_search > '.$iRankSearch;
$sSQL .= ' AND rank_search <= '.$iMaxRank;
$sSQL .= ' AND rank_address > 0';
$sSQL .= ' AND class = \'place\'';
$sSQL .= ' AND type != \'postcode\'';
$sSQL .= ' AND name IS NOT NULL ';
@@ -205,11 +218,12 @@ class ReverseGeocode
$sSQL .= ' AND distance <= reverse_place_diameter(rank_search)';
$sSQL .= ' ORDER BY distance ASC, rank_search DESC';
$sSQL .= ' LIMIT 1';
Debug::printSQL($sSQL);
if (CONST_Debug) var_dump($sSQL);
$aPlacNode = $this->oDB->getRow($sSQL, null, 'Could not determine place node.');
if ($aPlacNode) {
return $aPlacNode;
$aPlaceNode = $this->oDB->getRow($sSQL, null, 'Could not determine place node.');
Debug::printVar('Nearest place node', $aPlaceNode);
if ($aPlaceNode) {
return $aPlaceNode;
}
}
}
@@ -227,6 +241,7 @@ class ReverseGeocode
public function lookupPoint($sPointSQL, $bDoInterpolation = true)
{
Debug::newFunction('lookupPoint');
// starts if the search is on POI or street level,
// searches for the nearest POI or street,
// if a street is found and a POI is searched for,
@@ -256,36 +271,15 @@ class ReverseGeocode
$sSQL .= ' and (ST_GeometryType(geometry) not in (\'ST_Polygon\',\'ST_MultiPolygon\') ';
$sSQL .= ' OR ST_DWithin('.$sPointSQL.', centroid, '.$fSearchDiam.'))';
$sSQL .= ' ORDER BY distance ASC limit 1';
if (CONST_Debug) var_dump($sSQL);
Debug::printSQL($sSQL);
$aPlace = $this->oDB->getRow($sSQL, null, 'Could not determine closest place.');
if (CONST_Debug) var_dump($aPlace);
Debug::printVar('POI/street level result', $aPlace);
if ($aPlace) {
$iPlaceID = $aPlace['place_id'];
$oResult = new Result($iPlaceID);
$iRankAddress = $aPlace['rank_address'];
$iParentPlaceID = $aPlace['parent_place_id'];
}
if ($bDoInterpolation && $iMaxRank >= 30) {
$fDistance = $fSearchDiam;
if ($aPlace) {
// We can't reliably go from the closest street to an
// interpolation line because the closest interpolation
// may have a different street segments as a parent.
// Therefore allow an interpolation line to take precendence
// even when the street is closer.
$fDistance = $iRankAddress < 28 ? 0.001 : $aPlace['distance'];
}
$aHouse = $this->lookupInterpolation($sPointSQL, $fDistance);
if ($aHouse) {
$oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE);
$oResult->iHouseNumber = closestHouseNumber($aHouse);
$aPlace = $aHouse;
$iRankAddress = 30;
}
}
if ($aPlace) {
@@ -305,11 +299,15 @@ class ReverseGeocode
$sSQL .= ' and class not in (\'boundary\')';
$sSQL .= ' and indexed_status = 0 and linked_place_id is null';
$sSQL .= ' ORDER BY distance ASC limit 1';
if (CONST_Debug) var_dump($sSQL);
Debug::printSQL($sSQL);
$aStreet = $this->oDB->getRow($sSQL, null, 'Could not determine closest place.');
Debug::printVar('Closest POI result', $aStreet);
if ($aStreet) {
if (CONST_Debug) var_dump($aStreet);
$aPlace = $aStreet;
$oResult = new Result($aStreet['place_id']);
$iRankAddress = 30;
}
}
@@ -326,15 +324,43 @@ class ReverseGeocode
$sSQL .= ' FROM location_property_tiger WHERE parent_place_id = '.$oResult->iId;
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', linegeo, 0.001)';
$sSQL .= ' ORDER BY distance ASC limit 1';
if (CONST_Debug) var_dump($sSQL);
Debug::printSQL($sSQL);
$aPlaceTiger = $this->oDB->getRow($sSQL, null, 'Could not determine closest Tiger place.');
Debug::printVar('Tiger house number result', $aPlaceTiger);
if ($aPlaceTiger) {
if (CONST_Debug) var_dump('found Tiger housenumber', $aPlaceTiger);
$aPlace = $aPlaceTiger;
$oResult = new Result($aPlaceTiger['place_id'], Result::TABLE_TIGER);
$oResult->iHouseNumber = closestHouseNumber($aPlaceTiger);
$iRankAddress = 30;
}
}
} else {
}
if ($bDoInterpolation && $iMaxRank >= 30) {
$fDistance = $fSearchDiam;
if ($aPlace) {
// We can't reliably go from the closest street to an
// interpolation line because the closest interpolation
// may have a different street segments as a parent.
// Therefore allow an interpolation line to take precendence
// even when the street is closer.
$fDistance = $iRankAddress < 28 ? 0.001 : $aPlace['distance'];
}
$aHouse = $this->lookupInterpolation($sPointSQL, $fDistance);
Debug::printVar('Interpolation result', $aPlace);
if ($aHouse) {
$oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE);
$oResult->iHouseNumber = closestHouseNumber($aHouse);
$aPlace = $aHouse;
$iRankAddress = 30;
}
}
if (!$aPlace) {
// if no POI or street is found ...
$oResult = $this->lookupLargeArea($sPointSQL, 25);
}
@@ -342,6 +368,8 @@ class ReverseGeocode
// lower than street level ($iMaxRank < 26 )
$oResult = $this->lookupLargeArea($sPointSQL, $iMaxRank);
}
Debug::printVar('Final result', $oResult);
return $oResult;
}
}

View File

@@ -2,7 +2,7 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/lib.php');
require_once(CONST_LibDir.'/lib.php');
/**
@@ -32,7 +32,18 @@ class SearchContext
public $sqlCountryList = '';
/// List of place IDs to exclude (as SQL).
private $sqlExcludeList = '';
/// Subset of word ids of full words in the query.
private $aFullNameWords = array();
public function setFullNameWords($aWordList)
{
$this->aFullNameWords = $aWordList;
}
public function getFullNameTerms()
{
return $this->aFullNameWords;
}
/**
* Check if a reference point is defined.
@@ -203,7 +214,7 @@ class SearchContext
}
/**
* Get an SQL snipped for computing the distance from the reference point.
* Get an SQL snippet for computing the distance from the reference point.
*
* @param string $sObj SQL variable name to compute the distance from.
*
@@ -215,7 +226,7 @@ class SearchContext
}
/**
* Get an SQL snipped for checking if something is within range of the
* Get an SQL snippet for checking if something is within range of the
* reference point.
*
* @param string $sObj SQL variable name to compute if it is within range.
@@ -228,14 +239,14 @@ class SearchContext
}
/**
* Get an SQL snipped of the importance factor of the viewbox.
* Get an SQL snippet of the importance factor of the viewbox.
*
* The importance factor is computed by checking if an object is within
* the viewbox and/or the extended version of the viewbox.
*
* @param string $sObj SQL variable name of object to weight the importance
*
* @return string SQL snipped of the factor with a leading multiply sign.
* @return string SQL snippet of the factor with a leading multiply sign.
*/
public function viewboxImportanceSQL($sObj)
{
@@ -252,7 +263,7 @@ class SearchContext
}
/**
* SQL snipped checking if a place ID should be excluded.
* SQL snippet checking if a place ID should be excluded.
*
* @param string $sVariable SQL variable name of place ID to check,
* potentially prefixed with more SQL.

View File

@@ -2,9 +2,9 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
require_once(CONST_BasePath.'/lib/SearchContext.php');
require_once(CONST_BasePath.'/lib/Result.php');
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
require_once(CONST_LibDir.'/SearchContext.php');
require_once(CONST_LibDir.'/Result.php');
/**
* Description of a single interpretation of a search query.
@@ -21,8 +21,6 @@ class SearchDescription
private $bRareName = false;
/// List of word ids making up the address of the object.
private $aAddress = array();
/// Subset of word ids of full words making up the address.
private $aFullNameAddress = array();
/// List of word ids that appear in the name but should be ignored.
private $aNameNonSearch = array();
/// List of word ids that appear in the address but should be ignored.
@@ -88,18 +86,6 @@ class SearchDescription
$this->sType = $sType;
}
/**
* Check if this might be a full address search.
*
* @return bool True if the search contains name, address and housenumber.
*/
public function looksLikeFullAddress()
{
return (!empty($this->aName))
&& (!empty($this->aAddress) || $this->sCountryCode)
&& preg_match('/[0-9]+/', $this->sHouseNumber);
}
/**
* Check if any operator is set.
*
@@ -193,21 +179,17 @@ class SearchDescription
// - increase score for finding it anywhere else (optimisation)
if (!$bLastToken) {
$oSearch->iSearchRank += 5;
$oSearch->iNamePhrase = -1;
}
$aNewSearches[] = $oSearch;
}
} elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
&& is_a($oSearchTerm, '\Nominatim\Token\Postcode')
) {
// We need to try the case where the postal code is the primary element
// (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode)
// so try both.
if (!$this->sPostcode) {
// If we have structured search or this is the first term,
// make the postcode the primary search element.
if ($this->iOperator == Operator::NONE
&& ($sPhraseType == 'postalcode' || $bFirstToken)
) {
if ($this->iOperator == Operator::NONE && $bFirstToken) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->iOperator = Operator::POSTCODE;
@@ -224,6 +206,10 @@ class SearchDescription
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->iNamePhrase = -1;
if (strlen($oSearchTerm->sPostcode) < 4) {
$oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
}
$oSearch->sPostcode = $oSearchTerm->sPostcode;
$aNewSearches[] = $oSearch;
}
@@ -234,7 +220,11 @@ class SearchDescription
if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->iNamePhrase = -1;
$oSearch->sHouseNumber = $oSearchTerm->sToken;
if ($this->iOperator != Operator::NONE) {
$oSearch->iSearchRank++;
}
// sanity check: if the housenumber is not mainly made
// up of numbers, add a penalty
if (preg_match('/\\d/', $oSearch->sHouseNumber) === 0
@@ -252,13 +242,27 @@ class SearchDescription
$oSearch->iSearchRank++;
}
$aNewSearches[] = $oSearch;
// Housenumbers may appear in the name when the place has its own
// address terms.
if ($oSearchTerm->iId !== null
&& ($this->iNamePhrase >= 0 || empty($this->aName))
&& empty($this->aAddress)
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aAddress = $this->aName;
$oSearch->bRareName = false;
$oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId);
$aNewSearches[] = $oSearch;
}
}
} elseif ($sPhraseType == ''
&& is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
) {
if ($this->iOperator == Operator::NONE) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->iSearchRank += 2;
$oSearch->iNamePhrase = -1;
$iOp = $oSearchTerm->iOperator;
if ($iOp == Operator::NONE) {
@@ -268,6 +272,11 @@ class SearchDescription
$iOp = Operator::NEAR;
}
$oSearch->iSearchRank += 2;
} elseif (!$bFirstToken && !$bLastToken) {
$oSearch->iSearchRank += 2;
}
if ($this->sHouseNumber) {
$oSearch->iSearchRank++;
}
$oSearch->setPoiSearch(
@@ -288,13 +297,12 @@ class SearchDescription
if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
$oSearch = clone $this;
$oSearch->iSearchRank += 2;
$oSearch->iNamePhrase = -1;
$oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount;
$oSearch->aAddress[$iWordID] = $iWordID;
$aNewSearches[] = $oSearch;
} else {
$this->aFullNameAddress[$iWordID] = $iWordID;
}
} else {
} elseif (empty($this->aNameNonSearch)) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aName = array($iWordID => $iWordID);
@@ -334,48 +342,34 @@ class SearchDescription
if ((!$bStructuredPhrases || $iPhrase > 0)
&& (!empty($this->aName))
&& strpos($sToken, ' ') === false
) {
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
$oSearch = clone $this;
$oSearch->iSearchRank += 2;
$oSearch->aAddress[$iWordID] = $iWordID;
$aNewSearches[] = $oSearch;
} else {
$oSearch = clone $this;
$oSearch = clone $this;
$oSearch->iSearchRank++;
if (preg_match('#^[0-9 ]+$#', $sToken)) {
$oSearch->iSearchRank++;
}
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
$oSearch->aAddress[$iWordID] = $iWordID;
} else {
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
if (preg_match('#^[0-9]+$#', $sToken)) {
$oSearch->iSearchRank += 2;
}
if (!empty($aFullTokens)) {
$oSearch->iSearchRank++;
}
$aNewSearches[] = $oSearch;
// revert to the token version?
foreach ($aFullTokens as $oSearchTermToken) {
if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aAddress[$oSearchTermToken->iId]
= $oSearchTermToken->iId;
$aNewSearches[] = $oSearch;
}
}
}
$aNewSearches[] = $oSearch;
}
if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
&& (empty($this->aName) || $this->iNamePhrase == $iPhrase)
&& ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase)
&& strpos($sToken, ' ') === false
) {
$oSearch = clone $this;
$oSearch->iSearchRank += 2;
if (empty($this->aName)) {
$oSearch->iSearchRank += 1;
$oSearch->iSearchRank++;
if (empty($this->aName) && empty($this->aNameNonSearch)) {
$oSearch->iSearchRank++;
}
if (preg_match('#^[0-9]+$#', $sToken)) {
$oSearch->iSearchRank += 2;
if (preg_match('#^[0-9 ]+$#', $sToken)) {
$oSearch->iSearchRank++;
}
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
if (empty($this->aName)
@@ -389,6 +383,9 @@ class SearchDescription
}
$oSearch->aName[$iWordID] = $iWordID;
} else {
if (!empty($aFullTokens)) {
$oSearch->iSearchRank++;
}
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
}
$oSearch->iNamePhrase = $iPhrase;
@@ -447,12 +444,16 @@ class SearchDescription
$iLimit
);
//now search for housenumber, if housenumber provided
if ($this->sHouseNumber && !empty($aResults)) {
// Now search for housenumber, if housenumber provided. Can be zero.
if (($this->sHouseNumber || $this->sHouseNumber === '0') && !empty($aResults)) {
// Downgrade the rank of the street results, they are missing
// the housenumber.
foreach ($aResults as $oRes) {
$oRes->iResultRank++;
if ($oRes->iAddressRank >= 26) {
$oRes->iResultRank++;
} else {
$oRes->iResultRank += 2;
}
}
$aHnResults = $this->queryHouseNumber($oDB, $aResults);
@@ -613,14 +614,14 @@ class SearchDescription
// too many results are expected for the street, i.e. if the result
// will be narrowed down by an address. Remeber that with ordering
// every single result has to be checked.
if ($this->sHouseNumber && (!empty($this->aAddress) || $this->sPostcode)) {
if ($this->sHouseNumber && ($this->bRareName || !empty($this->aAddress) || $this->sPostcode)) {
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$aOrder[] = ' (';
$aOrder[0] .= 'EXISTS(';
$aOrder[0] .= ' SELECT place_id';
$aOrder[0] .= ' FROM placex';
$aOrder[0] .= ' WHERE parent_place_id = search_name.place_id';
$aOrder[0] .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
$aOrder[0] .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
$aOrder[0] .= ' LIMIT 1';
$aOrder[0] .= ') ';
// also housenumbers from interpolation lines table are needed
@@ -657,13 +658,10 @@ class SearchDescription
}
if ($this->sHouseNumber) {
$aTerms[] = 'address_rank between 16 and 27';
$aTerms[] = 'address_rank between 16 and 30';
} elseif (!$this->sClass || $this->iOperator == Operator::NAME) {
if ($iMinAddressRank > 0) {
$aTerms[] = 'address_rank >= '.$iMinAddressRank;
}
if ($iMaxAddressRank < 30) {
$aTerms[] = 'address_rank <= '.$iMaxAddressRank;
$aTerms[] = "((address_rank between $iMinAddressRank and $iMaxAddressRank) or (search_rank between $iMinAddressRank and $iMaxAddressRank))";
}
}
@@ -699,10 +697,11 @@ class SearchDescription
$sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid');
$aOrder[] = "$sImportanceSQL DESC";
if (!empty($this->aFullNameAddress)) {
$aFullNameAddress = $this->oContext->getFullNameTerms();
if (!empty($aFullNameAddress)) {
$sExactMatchSQL = ' ( ';
$sExactMatchSQL .= ' SELECT count(*) FROM ( ';
$sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($this->aFullNameAddress).')';
$sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($aFullNameAddress).')';
$sExactMatchSQL .= ' INTERSECT ';
$sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)';
$sExactMatchSQL .= ' ) s';
@@ -719,7 +718,7 @@ class SearchDescription
$aResults = array();
if (!empty($aTerms)) {
$sSQL = 'SELECT place_id,'.$sExactMatchSQL;
$sSQL = 'SELECT place_id, address_rank,'.$sExactMatchSQL;
$sSQL .= ' FROM search_name';
$sSQL .= ' WHERE '.join(' and ', $aTerms);
$sSQL .= ' ORDER BY '.join(', ', $aOrder);
@@ -732,6 +731,7 @@ class SearchDescription
foreach ($aDBResults as $aResult) {
$oResult = new Result($aResult['place_id']);
$oResult->iExactMatches = $aResult['exactmatch'];
$oResult->iAddressRank = $aResult['address_rank'];
$aResults[$aResult['place_id']] = $oResult;
}
}
@@ -751,7 +751,7 @@ class SearchDescription
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$sSQL = 'SELECT place_id FROM placex ';
$sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
$sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
$sSQL .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);
@@ -1019,7 +1019,7 @@ class SearchDescription
'Name terms (stop words)' => $this->aNameNonSearch,
'Address terms' => $this->aAddress,
'Address terms (stop words)' => $this->aAddressNonSearch,
'Address terms (full words)' => $this->aFullNameAddress,
'Address terms (full words)' => $this->aFullNameAddress ?? '',
'Special search' => $this->iOperator,
'Class' => $this->sClass,
'Type' => $this->sType,
@@ -1031,7 +1031,7 @@ class SearchDescription
public function dumpAsHtmlTableRow(&$aWordIDs)
{
$kf = function ($k) use (&$aWordIDs) {
return $aWordIDs[$k];
return $aWordIDs[$k] ?? '['.$k.']';
};
echo '<tr>';

85
lib-php/Shell.php Normal file
View File

@@ -0,0 +1,85 @@
<?php
namespace Nominatim;
class Shell
{
public function __construct($sBaseCmd, ...$aParams)
{
if (!$sBaseCmd) {
throw new \Exception('Command missing in new() call');
}
$this->baseCmd = $sBaseCmd;
$this->aParams = array();
$this->aEnv = null; // null = use the same environment as the current PHP process
$this->stdoutString = null;
foreach ($aParams as $sParam) {
$this->addParams($sParam);
}
}
public function addParams(...$aParams)
{
foreach ($aParams as $sParam) {
if (isset($sParam) && $sParam !== null && $sParam !== '') {
array_push($this->aParams, $sParam);
}
}
return $this;
}
public function addEnvPair($sKey, $sVal)
{
if (isset($sKey) && $sKey && isset($sVal)) {
if (!isset($this->aEnv)) $this->aEnv = $_ENV;
$this->aEnv = array_merge($this->aEnv, array($sKey => $sVal), $_ENV);
}
return $this;
}
public function escapedCmd()
{
$aEscaped = array_map(function ($sParam) {
return $this->escapeParam($sParam);
}, array_merge(array($this->baseCmd), $this->aParams));
return join(' ', $aEscaped);
}
public function run($bExitOnFail = false)
{
$sCmd = $this->escapedCmd();
// $aEnv does not need escaping, proc_open seems to handle it fine
$aFDs = array(
0 => array('pipe', 'r'),
1 => STDOUT,
2 => STDERR
);
$aPipes = null;
$hProc = @proc_open($sCmd, $aFDs, $aPipes, null, $this->aEnv);
if (!is_resource($hProc)) {
throw new \Exception('Unable to run command: ' . $sCmd);
}
fclose($aPipes[0]); // no stdin
$iStat = proc_close($hProc);
if ($iStat != 0 && $bExitOnFail) {
exit($iStat);
}
return $iStat;
}
private function escapeParam($sParam)
{
if (preg_match('/^-*\w+$/', $sParam)) return $sParam;
return escapeshellarg($sParam);
}
}

View File

@@ -51,9 +51,15 @@ class Status
$iDataDateEpoch = $this->oDB->getOne($sSQL);
if ($iDataDateEpoch === false) {
throw Exception('Data date query failed '.$iDataDateEpoch->getMessage(), 705);
throw new Exception('Import date is not available', 705);
}
return $iDataDateEpoch;
}
public function databaseVersion()
{
$sSQL = 'SELECT value FROM nominatim_properties WHERE property = \'database_version\'';
return $this->oDB->getOne($sSQL);
}
}

View File

@@ -2,12 +2,12 @@
namespace Nominatim;
require_once(CONST_BasePath.'/lib/TokenCountry.php');
require_once(CONST_BasePath.'/lib/TokenHousenumber.php');
require_once(CONST_BasePath.'/lib/TokenPostcode.php');
require_once(CONST_BasePath.'/lib/TokenSpecialTerm.php');
require_once(CONST_BasePath.'/lib/TokenWord.php');
require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
require_once(CONST_LibDir.'/TokenCountry.php');
require_once(CONST_LibDir.'/TokenHousenumber.php');
require_once(CONST_LibDir.'/TokenPostcode.php');
require_once(CONST_LibDir.'/TokenSpecialTerm.php');
require_once(CONST_LibDir.'/TokenWord.php');
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
/**
* Saves information about the tokens that appear in a search query.
@@ -80,6 +80,21 @@ class TokenList
return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array();
}
public function getFullWordIDs()
{
$ids = array();
foreach ($this->aTokens as $aTokenList) {
foreach ($aTokenList as $oToken) {
if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
$ids[$oToken->iId] = $oToken->iId;
}
}
}
return $ids;
}
/**
* Add token information from the word table in the database.
*
@@ -151,7 +166,8 @@ class TokenList
$oToken = new Token\Word(
$iId,
$aWord['word_token'][0] != ' ',
(int) $aWord['count']
(int) $aWord['count'],
substr_count($aWord['word_token'], ' ')
);
}

View File

@@ -2,7 +2,7 @@
namespace Nominatim\Token;
require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
/**
* A word token describing a place type.

View File

@@ -13,12 +13,15 @@ class Word
public $bPartial;
/// Number of appearances in the database.
public $iSearchNameCount;
/// Number of terms in the word.
public $iTermCount;
public function __construct($iId, $bPartial, $iSearchNameCount)
public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount)
{
$this->iId = $iId;
$this->bPartial = $bPartial;
$this->iSearchNameCount = $iSearchNameCount;
$this->iTermCount = $iTermCount;
}
public function debugInfo()

View File

@@ -0,0 +1,10 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
loadSettings(getcwd());
(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('admin', '--check-database')
->run();

View File

@@ -1,6 +1,7 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_BasePath.'/lib/init-cmd.php');
require_once(CONST_LibDir.'/init-cmd.php');
ini_set('memory_limit', '800M');
ini_set('display_errors', 'stderr');
@@ -11,10 +12,12 @@ $aCMDOptions
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
include(CONST_Phrase_Config);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
setupHTTPProxy();
if (true) {
$sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Country_Codes';

View File

@@ -1,10 +1,11 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
// Script to extract structured city and street data
// from a running nominatim instance as CSV data
require_once(CONST_BasePath.'/lib/init-cmd.php');
require_once(CONST_BasePath.'/lib/ParameterParser.php');
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/ParameterParser.php');
ini_set('memory_limit', '800M');
$aCMDOptions = array(
@@ -21,6 +22,7 @@
array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
"\nAddress ranks: continent, country, state, county, city, suburb, street, path",
'Additional output types: postcode, placeid (placeid for each object)',
"\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
@@ -30,6 +32,8 @@
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
$aRankmap = array(
'continent' => 1,
'country' => 4,

93
lib-php/admin/query.php Normal file
View File

@@ -0,0 +1,93 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/Geocode.php');
require_once(CONST_LibDir.'/ParameterParser.php');
ini_set('memory_limit', '800M');
$aCMDOptions
= array(
'Query database from command line. Returns search result as JSON.',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('search', '', 0, 1, 1, 1, 'string', 'Search for given term or coordinate'),
array('country', '', 0, 1, 1, 1, 'string', 'Structured search: country'),
array('state', '', 0, 1, 1, 1, 'string', 'Structured search: state'),
array('county', '', 0, 1, 1, 1, 'string', 'Structured search: county'),
array('city', '', 0, 1, 1, 1, 'string', 'Structured search: city'),
array('street', '', 0, 1, 1, 1, 'string', 'Structured search: street'),
array('amenity', '', 0, 1, 1, 1, 'string', 'Structured search: amenity'),
array('postalcode', '', 0, 1, 1, 1, 'string', 'Structured search: postal code'),
array('accept-language', '', 0, 1, 1, 1, 'string', 'Preferred language order for showing search results'),
array('bounded', '', 0, 1, 0, 0, 'bool', 'Restrict results to given viewbox'),
array('nodedupe', '', 0, 1, 0, 0, 'bool', 'Do not remove duplicate results'),
array('limit', '', 0, 1, 1, 1, 'int', 'Maximum number of results returned (default: 10)'),
array('exclude_place_ids', '', 0, 1, 1, 1, 'string', 'Comma-separated list of place ids to exclude from results'),
array('featureType', '', 0, 1, 1, 1, 'string', 'Restrict results to certain features (country, state,city,settlement)'),
array('countrycodes', '', 0, 1, 1, 1, 'string', 'Comma-separated list of countries to restrict search to'),
array('viewbox', '', 0, 1, 1, 1, 'string', 'Prefer results in given view box'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
@define('CONST_Log_File', getSetting('LOG_FILE', false));
@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA'));
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
$oDB = new Nominatim\DB;
$oDB->connect();
if (isset($aCMDResult['nodedupe'])) $aCMDResult['dedupe'] = 'false';
$oParams = new Nominatim\ParameterParser($aCMDResult);
$aSearchParams = array(
'search',
'amenity',
'street',
'city',
'county',
'state',
'country',
'postalcode'
);
if (!$oParams->hasSetAny($aSearchParams)) {
showUsage($aCMDOptions, true);
return 1;
}
$oGeocode = new Nominatim\Geocode($oDB);
$oGeocode->setLanguagePreference($oParams->getPreferredLanguages(false));
$oGeocode->setReverseInPlan(true);
$oGeocode->loadParamArray($oParams);
if ($oParams->getBool('search')) {
$oGeocode->setQuery($aCMDResult['search']);
} else {
$oGeocode->setQueryFromParams($oParams);
}
$aSearchResults = $oGeocode->lookup();
echo json_encode($aSearchResults, JSON_PRETTY_PRINT | JSON_UNESCAPED_SLASHES | JSON_UNESCAPED_UNICODE)."\n";

View File

@@ -1,8 +1,9 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_BasePath.'/lib/init-cmd.php');
require_once(CONST_BasePath.'/lib/setup/SetupClass.php');
require_once(CONST_BasePath.'/lib/setup_functions.php');
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/setup/SetupClass.php');
require_once(CONST_LibDir.'/setup_functions.php');
ini_set('memory_limit', '800M');
use Nominatim\Setup\SetupFunctions as SetupFunctions;
@@ -38,19 +39,46 @@ $aCMDOptions
array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
array('create-country-names', '', 0, 1, 0, 0, 'bool', 'Create default list of searchable country names'),
array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'),
array('setup-website', '', 0, 1, 0, 0, 'bool', 'Used to compile environment variables for the website'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
// $aCMDOptions passed to getCmdOpt by reference
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
setupHTTPProxy();
$bDidSomething = false;
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
// by default, use all but one processor, but never more than 15.
$iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1));
function run($oCmd)
{
global $iInstances;
global $aCMDResult;
$oCmd->addParams('--threads', $iInstances);
if ($aCMDResult['ignore-errors'] ?? false) {
$oCmd->addParams('--ignore-errors');
}
if ($aCMDResult['quiet'] ?? false) {
$oCmd->addParams('--quiet');
}
if ($aCMDResult['verbose'] ?? false) {
$oCmd->addParams('--verbose');
}
$oCmd->run(true);
}
//*******************************************************
// Making some sanity check:
// Check if osm-file is set and points to a valid file
@@ -59,12 +87,6 @@ if ($aCMDResult['import-data'] || $aCMDResult['all']) {
checkInFile($aCMDResult['osm-file']);
}
// osmosis init is no longer supported
if ($aCMDResult['osmosis-init']) {
$bDidSomething = true;
echo "Command 'osmosis-init' no longer available, please use utils/update.php --init-updates.\n";
}
// ******************************************************
// instantiate Setup class
$oSetup = new SetupFunctions($aCMDResult);
@@ -73,58 +95,73 @@ $oSetup = new SetupFunctions($aCMDResult);
// go through complete process if 'all' is selected or start selected functions
if ($aCMDResult['create-db'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createDB();
run((clone($oNominatimCmd))->addParams('transition', '--create-db'));
}
$oSetup->connect();
if ($aCMDResult['setup-db'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->setupDB();
}
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db');
// Try accessing the C module, so we know early if something is wrong
checkModulePresence(); // raises exception on failure
if ($aCMDResult['no-partitions'] ?? false) {
$oCmd->addParams('--no-partitions');
}
run($oCmd);
}
if ($aCMDResult['import-data'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->importData($aCMDResult['osm-file']);
$oCmd = (clone($oNominatimCmd))
->addParams('transition', '--import-data')
->addParams('--osm-file', $aCMDResult['osm-file']);
if ($aCMDResult['drop'] ?? false) {
$oCmd->addParams('--drop');
}
run($oCmd);
}
if ($aCMDResult['create-functions'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createFunctions();
$oSetup->createSqlFunctions();
}
if ($aCMDResult['create-tables'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createTables($aCMDResult['reverse-only']);
$oSetup->createFunctions();
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-tables');
if ($aCMDResult['reverse-only'] ?? false) {
$oCmd->addParams('--reverse-only');
}
run($oCmd);
}
if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createPartitionTables();
run((clone($oNominatimCmd))->addParams('transition', '--create-partition-tables'));
}
if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createPartitionFunctions();
$oSetup->createSqlFunctions(); // also create partition functions
}
if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->importWikipediaArticles();
// ignore errors!
(clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run();
}
if ($aCMDResult['load-data'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->loadData($aCMDResult['disable-token-precalc']);
run((clone($oNominatimCmd))->addParams('transition', '--load-data'));
}
if ($aCMDResult['import-tiger-data']) {
$bDidSomething = true;
$oSetup->importTigerData();
$sTigerPath = getSetting('TIGER_DATA_PATH', CONST_InstallDir.'/tiger');
run((clone($oNominatimCmd))->addParams('transition', '--tiger-data', $sTigerPath));
}
if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) {
@@ -134,22 +171,39 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) {
if ($aCMDResult['index'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->index($aCMDResult['index-noanalyse']);
}
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--index');
if ($aCMDResult['index-noanalyse'] ?? false) {
$oCmd->addParams('--no-analyse');
}
if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createSearchIndices();
}
if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
$bDidSomething = true;
$oSetup->createCountryNames($aCMDResult);
run($oCmd);
}
if ($aCMDResult['drop']) {
$bDidSomething = true;
$oSetup->drop($aCMDResult);
run((clone($oNominatimCmd))->addParams('freeze'));
}
if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
$bDidSomething = true;
$oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-search-indices');
if ($aCMDResult['drop'] ?? false) {
$oCmd->addParams('--drop');
}
run($oCmd);
}
if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
$bDidSomething = true;
run(clone($oNominatimCmd))->addParams('transition', '--create-country-names');
}
if ($aCMDResult['setup-website'] || $aCMDResult['all']) {
$bDidSomething = true;
run((clone($oNominatimCmd))->addParams('refresh', '--website'));
}
// ******************************************************

View File

@@ -0,0 +1,11 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
loadSettings(getcwd());
(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
->addParams('special-phrases', '--import-from-wiki')
->run();

236
lib-php/admin/update.php Normal file
View File

@@ -0,0 +1,236 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/setup_functions.php');
require_once(CONST_LibDir.'/setup/SetupClass.php');
ini_set('memory_limit', '800M');
use Nominatim\Setup\SetupFunctions as SetupFunctions;
// (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
$aCMDOptions
= array(
'Import / update / index osm data',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'),
array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
setupHTTPProxy();
if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
date_default_timezone_set('Etc/UTC');
$oDB = new Nominatim\DB();
$oDB->connect();
$fPostgresVersion = $oDB->getPostgresVersion();
$aDSNInfo = Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
// cache memory to be used by osm2pgsql, should not be more than the available memory
$iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
if ($iCacheMemory + 500 > getTotalMemoryMB()) {
$iCacheMemory = getCacheMemoryMB();
echo "WARNING: resetting cache memory to $iCacheMemory\n";
}
$oOsm2pgsqlCmd = (new \Nominatim\Shell(getOsm2pgsqlBinary()))
->addParams('--hstore')
->addParams('--latlong')
->addParams('--append')
->addParams('--slim')
->addParams('--with-forward-dependencies', 'false')
->addParams('--log-progress', 'true')
->addParams('--number-processes', 1)
->addParams('--cache', $iCacheMemory)
->addParams('--output', 'gazetteer')
->addParams('--style', getImportStyle())
->addParams('--database', $aDSNInfo['database'])
->addParams('--port', $aDSNInfo['port']);
if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
$oOsm2pgsqlCmd->addParams('--host', $aDSNInfo['hostspec']);
}
if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
$oOsm2pgsqlCmd->addParams('--user', $aDSNInfo['username']);
}
if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
$oOsm2pgsqlCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
}
if (getSetting('FLATNODE_FILE')) {
$oOsm2pgsqlCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE'));
}
if ($fPostgresVersion >= 11.0) {
$oOsm2pgsqlCmd->addEnvPair(
'PGOPTIONS',
'-c jit=off -c max_parallel_workers_per_gather=0'
);
}
$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
function run($oCmd)
{
global $aCMDResult;
if ($aCMDResult['quiet'] ?? false) {
$oCmd->addParams('--quiet');
}
if ($aCMDResult['verbose'] ?? false) {
$oCmd->addParams('--verbose');
}
$oCmd->run(true);
}
if ($aResult['init-updates']) {
$oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
if ($aResult['no-update-functions']) {
$oCmd->addParams('--no-update-functions');
}
run($oCmd);
}
if ($aResult['check-for-updates']) {
exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run());
}
if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
// import diffs and files directly (e.g. from osmosis --rri)
$sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
if (!file_exists($sNextFile)) {
fail("Cannot open $sNextFile\n");
}
// Import the file
$oCMD = (clone $oOsm2pgsqlCmd)->addParams($sNextFile);
echo $oCMD->escapedCmd()."\n";
$iRet = $oCMD->run();
if ($iRet) {
fail("Error from osm2pgsql, $iRet\n");
}
// Don't update the import status - we don't know what this file contains
}
if ($aResult['calculate-postcodes']) {
run((clone($oNominatimCmd))->addParams('refresh', '--postcodes'));
}
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
$bHaveDiff = false;
$bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
$sContentURL = '';
if (isset($aResult['import-node']) && $aResult['import-node']) {
if ($bUseOSMApi) {
$sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
} else {
$sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
}
}
if (isset($aResult['import-way']) && $aResult['import-way']) {
if ($bUseOSMApi) {
$sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
} else {
$sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');%3E;);out%20meta;';
}
}
if (isset($aResult['import-relation']) && $aResult['import-relation']) {
if ($bUseOSMApi) {
$sContentURL = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
} else {
$sContentURL = 'https://overpass-api.de/api/interpreter?data=(rel(id:'.$aResult['import-relation'].');%3E;);out%20meta;';
}
}
if ($sContentURL) {
file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
$bHaveDiff = true;
}
if ($bHaveDiff) {
// import generated change file
$oCMD = (clone $oOsm2pgsqlCmd)->addParams($sTemporaryFile);
echo $oCMD->escapedCmd()."\n";
$iRet = $oCMD->run();
if ($iRet) {
fail("osm2pgsql exited with error level $iRet\n");
}
}
if ($aResult['recompute-word-counts']) {
run((clone($oNominatimCmd))->addParams('refresh', '--word-counts'));
}
if ($aResult['index']) {
run((clone $oNominatimCmd)
->addParams('index', '--minrank', $aResult['index-rank'])
->addParams('--threads', $aResult['index-instances']));
}
if ($aResult['update-address-levels']) {
run((clone($oNominatimCmd))->addParams('refresh', '--address-levels'));
}
if ($aResult['recompute-importance']) {
run((clone($oNominatimCmd))->addParams('refresh', '--importance'));
}
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
$oCmd = (clone($oNominatimCmd))
->addParams('replication')
->addParams('--threads', $aResult['index-instances']);
if (!$aResult['import-osmosis-all']) {
$oCmd->addParams('--once');
}
if ($aResult['no-index']) {
$oCmd->addParams('--no-index');
}
run($oCmd);
}

95
lib-php/admin/warm.php Normal file
View File

@@ -0,0 +1,95 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/log.php');
require_once(CONST_LibDir.'/Geocode.php');
require_once(CONST_LibDir.'/PlaceLookup.php');
require_once(CONST_LibDir.'/ReverseGeocode.php');
ini_set('memory_limit', '800M');
$aCMDOptions = array(
'Tools to warm nominatim db',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'),
array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
@define('CONST_Log_File', getSetting('LOG_FILE', false));
@define('CONST_Max_Word_Frequency', getSetting('MAX_WORD_FREQUENCY'));
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
@define('CONST_Term_Normalization_Rules', getSetting('TERM_NORMALIZATION'));
@define('CONST_Use_Aux_Location_data', getSettingBool('USE_AUX_LOCATION_DATA'));
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
$oDB = new Nominatim\DB();
$oDB->connect();
$bVerbose = $aResult['verbose'];
function print_results($aResults, $bVerbose)
{
if ($bVerbose) {
if ($aResults && count($aResults)) {
echo $aResults[0]['langaddress']."\n";
} else {
echo "<not found>\n";
}
} else {
echo '.';
}
}
if (!$aResult['search-only']) {
$oReverseGeocode = new Nominatim\ReverseGeocode($oDB);
$oReverseGeocode->setZoom(20);
$oPlaceLookup = new Nominatim\PlaceLookup($oDB);
$oPlaceLookup->setIncludeAddressDetails(true);
$oPlaceLookup->setLanguagePreference(array('en'));
echo 'Warm reverse: ';
if ($bVerbose) echo "\n";
for ($i = 0; $i < 1000; $i++) {
$fLat = rand(-9000, 9000) / 100;
$fLon = rand(-18000, 18000) / 100;
if ($bVerbose) echo "$fLat, $fLon = ";
$oLookup = $oReverseGeocode->lookup($fLat, $fLon);
$aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
print_results($aSearchResults, $bVerbose);
}
echo "\n";
}
if (!$aResult['reverse-only']) {
$oGeocode = new Nominatim\Geocode($oDB);
echo 'Warm search: ';
if ($bVerbose) echo "\n";
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
foreach ($oDB->getCol($sSQL) as $sWord) {
if ($bVerbose) echo "$sWord = ";
$oGeocode->setLanguagePreference(array('en'));
$oGeocode->setQuery($sWord);
$aSearchResults = $oGeocode->lookup();
print_results($aSearchResults, $bVerbose);
}
echo "\n";
}

Some files were not shown because too many files have changed in this diff Show More