Compare commits

...

1807 Commits

Author SHA1 Message Date
Sarah Hoffmann
fa084d2029 prepare 4.3.1 release 2023-10-04 16:44:50 +02:00
Sarah Hoffmann
145322661c switch back meaning of reverse and search in warm
Also do'n try to warm up searches on reverse-only databases.

Fixes #3213.
2023-10-04 16:39:31 +02:00
Sarah Hoffmann
22dbee27c8 adapt tests to improved search 2023-10-04 14:58:32 +02:00
Sarah Hoffmann
21381a0d35 filter duplicate results after DB query 2023-10-04 14:58:24 +02:00
Sarah Hoffmann
10ac54b000 rerank results by query
The algorithm is similar to the PHP reranking and uses the terms from
the display name to check against the query terms. However instead of
exact matching it uses a per-word-edit-distance, so that it is less
strict when it comes to mismatching accents or other one letter
differences.

Country names get a higher penalty because they don't receive a
penalty during token matching right now.

This will work badly with the legacy tokenizer. Given that it is
marked for removal, it is simply not worth optimising for it.
2023-10-04 14:58:14 +02:00
Sarah Hoffmann
dfde14255a move localization into add_result_details
This means that the locale now needs to be handed in into the search
functions already. At least search needs them for reranking.
2023-10-04 14:58:05 +02:00
Sarah Hoffmann
27657000b0 'fix' issues with recent changes to psycopg2 typing
Some of the changes just make the warnings go away. The typing info
is still incorrect on the stub side, as far as I can determine.
2023-10-04 14:57:07 +02:00
Sarah Hoffmann
6b265a5fa6 reduce expected count for multi-part words
Fixes #3196.
2023-10-04 14:56:59 +02:00
Sarah Hoffmann
64c1a4fc8c add changelog for 4.2.3 2023-09-07 10:07:16 +02:00
Sarah Hoffmann
ec47459410 prepare release 4.3.0 2023-09-06 20:08:28 +02:00
Sarah Hoffmann
c55c3657c3 reenable logging of details calls 2023-09-06 11:35:53 +02:00
Sarah Hoffmann
8b56b55761 restrict deduplication to results from placex
All other sources do not have deduplicatable entries.
2023-09-06 10:43:55 +02:00
Sarah Hoffmann
9056c9276f add bbox output to lookup results
Fixes #3149.
2023-09-06 10:27:03 +02:00
marc tobias
09ae312f09 documentation: fix a broken link, typos 2023-09-05 17:06:32 +02:00
Sarah Hoffmann
d6960c72e4 Merge pull request #3190 from lonvia/fix-internal-server-errors
Fix more failing queries
2023-09-05 17:00:31 +02:00
Sarah Hoffmann
b529e054cf Merge pull request #3189 from lonvia/add-country-area-restriction
Implement NOMINATIM_SEARCH_WITHIN_COUNTRIES for Python frontend
2023-09-05 14:29:44 +02:00
Sarah Hoffmann
b4a4ca81d1 actions: update to checkout v4 2023-09-05 11:44:12 +02:00
Sarah Hoffmann
c284df2dc9 restrict range for interpolated housenumbers
Interpolations are only supported up to 2^32 by the database.
Limit to 8 digits, which is still more than should be needed.
2023-09-05 11:41:41 +02:00
Sarah Hoffmann
18b2a4c204 avoid interpreting integral numbers as coordinates
There are addresses of type '123 W 345 N, Reigh', where 'W 345 N' is
the actual name of the street.
2023-09-05 11:26:47 +02:00
Sarah Hoffmann
e1303fb592 Merge pull request #3188 from lonvia/update-osm2pgsql
Update osm2pgsql to 1.9.2
2023-09-05 10:44:32 +02:00
Sarah Hoffmann
bd25cf04ed update osm2pgsql to 1.9.2 2023-09-04 14:53:46 +02:00
Sarah Hoffmann
ce1f4cbbdc allow lower case OSM types in lookup query 2023-09-04 14:39:35 +02:00
Sarah Hoffmann
e0aea0f27a Merge pull request #3186 from lonvia/tablespaces-for-flex
Make tablespace arguments work again for flex tables
2023-09-04 14:25:45 +02:00
Sarah Hoffmann
9848c4c56c implement NOMINATIM_SEARCH_WITHIN_COUNTRIES setting 2023-09-04 14:10:36 +02:00
Sarah Hoffmann
cb8149f8ea reenable tablespace arguments for flex tables 2023-09-04 10:45:52 +02:00
Sarah Hoffmann
0e74e82a38 add a shutdown handler for falcon server closing DB connections 2023-08-31 09:40:56 +02:00
Sarah Hoffmann
15e09f2b24 remove alias where it does not work with lambdas
Fixes #3177.
2023-08-30 21:55:34 +02:00
Sarah Hoffmann
2de8256863 fix parameter name for layer filter 2023-08-30 09:57:46 +02:00
Sarah Hoffmann
aff43fb1a3 Merge pull request #3178 from lonvia/library-documentation
Update documentation for new Python frontend
2023-08-30 09:17:24 +02:00
Sarah Hoffmann
cafd8e2b1e fix typos and grammar issues 2023-08-29 12:14:44 +02:00
Sarah Hoffmann
3794080327 rename documentation to 'Nominatim Manual' 2023-08-28 10:59:26 +02:00
Sarah Hoffmann
4e2683f068 adapt installation instruction for different frontends 2023-08-28 10:48:34 +02:00
Sarah Hoffmann
6e5f595d48 update API documentation 2023-08-28 10:31:58 +02:00
Sarah Hoffmann
2c24ba6d2d add deployment documentation for Python frontend 2023-08-27 21:27:16 +02:00
Sarah Hoffmann
3bb27fbee6 update documentation for configuration 2023-08-27 16:08:06 +02:00
Sarah Hoffmann
ef1b52eee5 add getting started section for library docs 2023-08-27 14:42:04 +02:00
Sarah Hoffmann
f917fa67aa switch to material theme
Needed for tabbed code blocks.
2023-08-25 22:47:07 +02:00
Sarah Hoffmann
386b4c82da preliminary library reference finished 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
a987f22cfb more library documentation 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
558c42ec83 finish configuration section 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
309ac46b98 restructure library documentation 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
399b04596e initial configuration documentation 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
75513a23a8 docs: documentation of API classes 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
e672de036e more adaptions for mkdocstrings griffin plugin 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
6533af6a91 add base framework for library reference 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
d3372e69ec update to modern mkdocstrings python handler 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
f3809a52e8 Merge pull request #3174 from lonvia/osm2pgsql-update-fix
Be more precise on what is deleted on updates
2023-08-25 21:26:23 +02:00
Sarah Hoffmann
c5f5ab5363 be more strict about removal from place_to_be_deleted
If the type of a place is changed and then the same insert issued
again, the old data will effectively remain in the table.

Fixed #3168.
2023-08-25 14:22:49 +02:00
Sarah Hoffmann
26dfb868e9 add test case for bad update 2023-08-25 14:22:49 +02:00
Sarah Hoffmann
d5b6042118 Merge pull request #3172 from lonvia/query-timeout
Introduce timeouts for queries
2023-08-25 10:00:22 +02:00
Sarah Hoffmann
1115705cbc add additional timeout for entire request 2023-08-25 09:16:53 +02:00
Sarah Hoffmann
161d17d85b use backwards-compatible asyncio timeout implementation 2023-08-25 08:50:03 +02:00
Sarah Hoffmann
5a2ebfcd4a translate query timeouts into proper HTTP responses
Need to use a 503 here because a 408 (Request timeout) will motivate
browsers to immediately resent the request.
2023-08-25 08:50:03 +02:00
Sarah Hoffmann
06a974df36 add a timeout for DB queries 2023-08-25 08:50:03 +02:00
Sarah Hoffmann
2762c45569 apply adjusted counts only to final result 2023-08-24 21:37:02 +02:00
Sarah Hoffmann
fd85483ce3 Merge pull request #3169 from lonvia/tweak-search-with-frequent-names
Further tweak detection of queries that would return a massive amount of results
2023-08-24 14:22:35 +02:00
Sarah Hoffmann
0a2d0c3b5c allow terms with frequent searches together with viewbox 2023-08-24 09:21:09 +02:00
Sarah Hoffmann
de7f9a4bd9 Merge pull request #3167 from lonvia/explicit-encoding
Send explicit charset in content-type when returning json
2023-08-24 08:53:11 +02:00
Sarah Hoffmann
c5836c8090 Merge pull request #3139 from mtmail/update-search-examples
Search.md - update XML and JSON example output
2023-08-23 23:41:30 +02:00
Sarah Hoffmann
dcdda314e2 further tweak search containing very frequent tokens
Excluding non-rare full names is not really possible because it makes
addresses with street names like 'main st' unsearchable. This tries to
leav all names in but refrain from ordering results by accuracy
when too many results are expected. This means that the DB will simply
get the first n results without any particular order.
2023-08-23 23:04:12 +02:00
Sarah Hoffmann
a9edd57fe2 send charset again in content-type when returning json
There are quite a few applications out there that will use some local
encoding when the charset is not explicitly given.
2023-08-23 20:55:57 +02:00
Sarah Hoffmann
cbd9fad94b Merge pull request #3165 from lonvia/update-osm2pgsql
Update osm2pgsql to 1.9.1
2023-08-23 15:57:55 +02:00
Sarah Hoffmann
bc1009f8c2 vagrant scripts: add missing yaml library 2023-08-23 08:34:12 +02:00
Sarah Hoffmann
719b66e5ed add new required json library for osm2pgsql 2023-08-23 00:24:31 +02:00
Sarah Hoffmann
5f09ba4e10 update osm2pgsql to 1.9.1 2023-08-23 00:24:31 +02:00
Sarah Hoffmann
517a0cb673 Merge pull request #3159 from lonvia/fix-name-lookup-for-housenames
Fix name lookup for housenames
2023-08-20 15:52:20 +02:00
Sarah Hoffmann
7c79b07817 fix parameter use for ST_Project
Before postgis 3.4 ST_Project required a geography as input and seemed
to have implicitly converted to geography. Since 3.4 geometry input
is supported but leads to a completely different result.
2023-08-20 13:52:03 +02:00
Sarah Hoffmann
23eed4ff2f fix tag name for housename addresses in layer selection
Fixes #3156.
2023-08-19 15:57:33 +02:00
Sarah Hoffmann
4559886d83 Merge pull request #3155 from lonvia/caching-of-transliterators
Cache ICU transliterators between calls
2023-08-16 22:55:54 +02:00
Sarah Hoffmann
bfc706a596 cache ICU transliterators and reuse them 2023-08-15 23:08:44 +02:00
Sarah Hoffmann
9805a461eb Merge pull request #3150 from alfmarcua/allow_neg_id_details
Allow negative osm_id in details page by setting default parameter to 0
2023-08-14 11:41:42 +02:00
Sarah Hoffmann
bcf8433ba8 fix regression in default setting for details linkedplaces 2023-08-13 16:24:19 +02:00
Sarah Hoffmann
746dd057b9 prefer name-only searches more 2023-08-13 15:24:16 +02:00
Sarah Hoffmann
b710297d05 return bbox of full country for country searches
Fixes #3149.
2023-08-13 14:37:28 +02:00
Sarah Hoffmann
0a8e8cec0f fix application of label to wrong expression 2023-08-13 11:59:01 +02:00
Sarah Hoffmann
96e5a23727 avoid lambda SQL in connection with alias tables 2023-08-13 11:40:49 +02:00
Sarah Hoffmann
611b925368 use coalsce() instead of indexless postgis functions
ST_Intersects is deemed too expensive by the query planer which
leads to odd plans when index use is avoided.
2023-08-12 19:14:13 +02:00
Sarah Hoffmann
cab2a74740 do not use index when searching in large areas
This concerns viewboxes as well as radius search.
2023-08-12 16:12:44 +02:00
Sarah Hoffmann
fa3ac22a8f adapt tests to changes in search 2023-08-12 16:12:31 +02:00
Sarah Hoffmann
95d1048789 take token_assignment penalty into account
Also computes the expected count differently when addresses are
involved. Address token counts do not bare a direct relation to
real counts.
2023-08-12 15:33:50 +02:00
Sarah Hoffmann
38b2b8a143 fix debug output for NearSearch
The search info is in a subsearch and was therefore not taken into
account.
2023-08-12 11:27:55 +02:00
Sarah Hoffmann
3d0bc85b4d improve penalty for token-split words
The rematch penalty for partial words created by the transliteration
need to take into account that they are rematched against the full word.
That means that missing beginning and end should not get a significant
penalty.
2023-08-12 11:26:02 +02:00
alfmarcua
25a391070b Allow negative osm_id in details page by setting default parameter to 0 2023-08-11 10:46:28 +02:00
Sarah Hoffmann
926c4a7d04 fix lookup polygon output
Fixes #3147.
2023-08-08 21:31:25 +02:00
marc tobias
5683f55646 Search.md - update XML and JSON example output 2023-08-08 17:49:11 +02:00
Sarah Hoffmann
671f4e943e Merge pull request #3146 from lonvia/forbid-mixed-queries
Do not allow to mix structured and unstructured search
2023-08-08 17:34:32 +02:00
Sarah Hoffmann
282c0da941 return an error when q is used together with structured parameters 2023-08-08 15:54:55 +02:00
Sarah Hoffmann
78648f1faf remove lookup by address only
There are too many lookups where the address is very frequent,
even when many address parts are present.
2023-08-06 21:00:10 +02:00
Sarah Hoffmann
8d9b5e4775 allow oversized viewboxes again
This seems to be a rather regular thing when unconditionally sending
the current view and being zoomed out.

Fixes #3137.
2023-08-06 17:50:35 +02:00
Sarah Hoffmann
996026e5ed provide full URL in more field
This is a regression against the PHP version.

Fixes #3138.
2023-08-06 17:50:02 +02:00
Sarah Hoffmann
2c7e1db5f6 remove SQL lambdas with IN expressions
The values of IN expressions are incorrectly cached.
2023-08-02 12:34:07 +02:00
Sarah Hoffmann
2171b38551 only print non-empty search tables 2023-08-02 09:25:47 +02:00
Sarah Hoffmann
afdbdb02a1 do not lookup by address vector when only few tokens are available
Names of countries and states are exceedingly rare in the word count
but are very frequent in the address. A short name has the danger
of producing too many results.
2023-08-02 09:25:47 +02:00
Sarah Hoffmann
8adeaa2c7e fix regression with lat/lon in json output
lat, lon is returned as strings in the PHP output. Reproduce that in the
Python frontend.

See #3115.
2023-08-01 14:27:43 +02:00
Sarah Hoffmann
d15f605129 allow OPTIONS method in starlette CORS middleware
If not allowed, then the middleware will return a 400 on pre-flight
CORS requests.

Fixes #3129.
2023-08-01 11:12:36 +02:00
Sarah Hoffmann
252fe42612 Merge pull request #3122 from miku0/sanitizer-final
Adds sanitizer for Japanese addresses to correspond to block address
2023-08-01 10:38:58 +02:00
miku0
67e1c7dc72 Moved KANJI_MAP to icu-rules 2023-07-31 11:57:49 +00:00
Sarah Hoffmann
c29ffc38e6 Merge pull request #3128 from lonvia/fix-classtype-lookup
Fix query over classtype tables
2023-07-31 10:20:58 +02:00
miku0
4d61cc87cf Add the test of reconbine_place 2023-07-31 02:39:56 +00:00
miku0
2350018106 Fixed cosmetic issues 2023-07-31 02:39:04 +00:00
Sarah Hoffmann
8fc3dd9457 fix query over classtype tables
The case statement prevented the index on the classtype tables
from being used. Move the case statement inside the geometry
function instead.
2023-07-30 23:51:36 +02:00
Sarah Hoffmann
d97ca9fcb2 Merge pull request #3127 from lonvia/file-logging
Add file logging for Python frontend
2023-07-30 21:22:48 +02:00
Sarah Hoffmann
e523da9e12 reintroduce file logging for Python frontend 2023-07-30 19:58:00 +02:00
miku0
67706cec4e add @fail-legacy 2023-07-27 07:33:53 +00:00
miku0
fac8c32cda Moved KANJI_MAP to global variable 2023-07-26 21:43:22 +00:00
Sarah Hoffmann
1c6f426363 Merge pull request #3125 from lonvia/warm-to-python
Port warm and export functions to Python
2023-07-26 22:37:37 +02:00
Sarah Hoffmann
8cba65809c older version of Postgres cannot convert jsonb to int 2023-07-26 17:45:21 +02:00
Sarah Hoffmann
77ed4f98bb older Python versions need a reference to the loop for a lock 2023-07-26 15:17:18 +02:00
miku0
848e5ac5de Correction to PR's comment 2023-07-26 09:50:25 +00:00
Sarah Hoffmann
9448c5e16f add tests for new arm and export Python functions 2023-07-26 11:09:52 +02:00
miku0
0722495434 add japanese sanitizer 2023-07-26 07:54:58 +00:00
Sarah Hoffmann
d545c6d73c mostly remove php-cgi requirement
This is now only needed for BDD tests against the php API.
2023-07-26 00:10:11 +02:00
Sarah Hoffmann
f69fea4210 remove now unused run_api_script function 2023-07-25 22:45:29 +02:00
Sarah Hoffmann
4cd0a4ced4 remove now unused run_legacy_script() 2023-07-25 21:39:23 +02:00
Sarah Hoffmann
0804cc0cff port export function to Python
Some of the parameters have been renoved as they don't make sense
anymore.
2023-07-25 21:39:23 +02:00
Sarah Hoffmann
faeee7528f move warm script to python code 2023-07-25 21:39:23 +02:00
Sarah Hoffmann
261e0cfd5a Merge pull request #3121 from lonvia/port-remaining-api-calls
Port remaining API endpoints to Python
2023-07-25 20:56:38 +02:00
Sarah Hoffmann
66ecb56cea add tests for new endpoints 2023-07-25 10:57:19 +02:00
Sarah Hoffmann
79bd54f610 add /polygons endpoint to Python v1 API 2023-07-22 20:59:13 +02:00
Sarah Hoffmann
30cef4d5fd add deletable endpoint 2023-07-22 17:21:24 +02:00
Sarah Hoffmann
8d52032263 remove debug print 2023-07-22 13:54:23 +02:00
Sarah Hoffmann
4a5786334b Merge pull request #3117 from lonvia/fix-assorted-search-errors
More improvements to the Python search algorithm
2023-07-22 11:45:36 +02:00
Sarah Hoffmann
587698a6f3 disallow special housenumber search with a single frequent partial 2023-07-20 18:05:54 +02:00
Sarah Hoffmann
927d2cc824 do not split names from typed phrases
When phrases are typed, they should only contain exactly one term.
2023-07-17 20:09:08 +02:00
Sarah Hoffmann
7f9cb4e68d split up get_assignment functon in more readable parts 2023-07-17 16:27:25 +02:00
Sarah Hoffmann
d48ea4f22c disallow address searches that start with a postcode
These are postcode searches and nothing else.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
412bd2ec20 block search queries with too many tokens 2023-07-17 16:27:25 +02:00
Sarah Hoffmann
1c189060c2 simplify yield_lookups() function
Move creation of field lookups in separate functions to make the code
more readable.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
4a00a3c0f5 penalize name token splitting when phrases are used 2023-07-17 16:27:25 +02:00
Sarah Hoffmann
8366e4ca83 penalize search with frequent partials
Avoid search against frequent partials if we have already looked for
the full name equivalents.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
283db76e45 avoid splitting of first token when a housenumber is present
This only covers the case of <poi name> <street name> <housenumber>
which is exceedingly rare.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
8a36ed4f6f increase threshold for full name searches
They still should be preferrred over expensive partial name searches.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
d0f45155c8 fix search for housenumber names
The search still included a lookup of housenumbers in children which is
wrong.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
9fc235d670 Merge pull request #3112 from jenkin/fix-polgyon-polygon-typo
fix polgyon polygon typo
2023-07-17 14:20:14 +02:00
Alessio Cimarelli
42c549274f fix polgyon polygon typo 2023-07-17 10:11:57 +02:00
Sarah Hoffmann
2e56182a7f Merge pull request #3110 from lonvia/sql-lambda-queries
Use SQLAlchemy's lambda statements
2023-07-14 18:57:31 +02:00
Sarah Hoffmann
7932b1849b selected lambdas for search 2023-07-14 15:43:29 +02:00
Sarah Hoffmann
886374d779 add lambdas for layer filters 2023-07-14 15:27:20 +02:00
Sarah Hoffmann
d42e2e391f avoid forwarding variables via SQL 2023-07-14 15:27:16 +02:00
Sarah Hoffmann
f264eaeda2 make SQL statements in reverse lambda functions
Further improves internal caching of statements by SQLAlchemy.
2023-07-14 14:39:14 +02:00
Sarah Hoffmann
35fd74af6d Merge pull request #3096 from alfmarcua/search-within-countries-parameter
Parameterise the search only within countries
2023-07-12 17:45:55 +02:00
alfmarcua
4b53cf1464 Split lookupInCountry in two functions and document NOMINATIM_SEARCH_WITHIN_COUNTRIES parameter 2023-07-12 13:53:23 +02:00
alfmarcua
26e78efbb9 Parameterise the search only within countries 2023-07-12 13:53:23 +02:00
Sarah Hoffmann
157f0b8a83 Merge pull request #3109 from lonvia/prepared-statements
Make prepared statements work with Python API
2023-07-10 11:45:29 +02:00
Sarah Hoffmann
d743cf308e avoid index-use on rank parameters for reverse lookups 2023-07-09 22:49:31 +02:00
Sarah Hoffmann
f4cdcb995c force a fixed pool size and make it configurable 2023-07-09 00:31:53 +02:00
Sarah Hoffmann
75139961a3 use constant expressions to select partial indexes in reverse
When expressions are generated with SQLAlchemy, any constants are
replaced with bind parameters. The bind parameters become parameters of
prepared statements. The result is that the query planner tends to
oversee that the partial indexes can be used.
2023-07-09 00:31:53 +02:00
Sarah Hoffmann
3e2dd59a94 remove disabling of prepared statements
Fixes for query planning with prepared statements to follow in
subsequent commits.
2023-07-08 15:59:15 +02:00
Sarah Hoffmann
c01386b5b4 Merge pull request #3107 from lonvia/performance-search
Assorted fixes to Python search code
2023-07-08 15:53:42 +02:00
Sarah Hoffmann
9cb8447673 increase minimum required SQLAlchemy version to 1.4.31 2023-07-06 14:15:51 +02:00
Sarah Hoffmann
e67355ab0e make get_addressdata calls cachable
VALUEs() is not a cachable construct in SQLAlchemy, so use arrays
instead. Also add a special case for single results, the usual result
for reverse queries.
2023-07-06 12:16:57 +02:00
Sarah Hoffmann
9cb9b670d1 add support for postcompile literals in debug output 2023-07-06 12:16:57 +02:00
Sarah Hoffmann
3e725bb2db fix SRID handling in Geometry type 2023-07-06 12:16:57 +02:00
Sarah Hoffmann
cc45930ef9 avoid lookup via partials on frequent words
Drops expensive searches via partials on terms like 'rue de'.

See #2979.
2023-07-06 12:16:57 +02:00
Sarah Hoffmann
3266daa8fd add a small penalty to lookups in address vectors 2023-07-04 16:54:42 +02:00
Sarah Hoffmann
ce17b0eeca Merge pull request #3101 from lonvia/custom-geometry-type
Improve use of SQLAlchemy statement cache with search queries
2023-07-03 11:03:26 +02:00
Sarah Hoffmann
17a65d82bb make types compatible with older Python versions 2023-07-02 23:06:42 +02:00
Sarah Hoffmann
cc7646665c remove GeoAlchemy as dependency 2023-07-02 23:06:42 +02:00
Sarah Hoffmann
82216ebf8b always run function update on migrations
This means that we can have migrations which require nothing but
an update of the functions.
2023-07-01 20:18:59 +02:00
Sarah Hoffmann
49e0d83d5d fix linting issues 2023-07-01 20:18:59 +02:00
Sarah Hoffmann
673c3c7a55 replace regexp_match with generic op() functions
Works around a bug in SQLAlchemy where regexp_match creates an
unstable cache key.
2023-07-01 18:15:22 +02:00
Sarah Hoffmann
5135041405 replace CASE construct with plpgsql function 2023-07-01 18:15:22 +02:00
Sarah Hoffmann
42631b85c7 band-aid for SQLAlchemy 1.4 2023-07-01 18:03:07 +02:00
Sarah Hoffmann
9f6f12cfeb move search to bind parameters 2023-07-01 18:03:07 +02:00
Sarah Hoffmann
6c4c9ec1f2 switch reverse() to new Geometry datatype
Also switches to using bind parameters for recurring parameters.
2023-07-01 18:03:07 +02:00
Sarah Hoffmann
4bb4db0668 introduce slim Geometry database type 2023-07-01 18:03:07 +02:00
Sarah Hoffmann
505fdd02ca Merge pull request #3100 from lonvia/fix-name-merging-in-unnamed-boundaries
Fix merging of linked names into unnamed boundaries
2023-07-01 15:36:56 +02:00
Sarah Hoffmann
a873f260cf fix merging of linked names into unnamed boundaries
The NULL value of the boundaries' name field was erasing all
content when used in SQL operations.
2023-06-30 22:14:11 +02:00
Sarah Hoffmann
b45f761227 Merge pull request #3099 from lonvia/determine-place-address-from-tokenizer
Use information from tokenizer to determine street vs. place address
2023-06-30 21:47:57 +02:00
Sarah Hoffmann
d7a3039c2a also switch legacy tokenizer to new street/place choice behaviour 2023-06-30 17:03:17 +02:00
Sarah Hoffmann
6c5589c9d2 fix optional string representation or repr(PlaceName) 2023-06-30 11:10:44 +02:00
Sarah Hoffmann
645ea5a057 use information from tokenizer to determine street vs. place address
So far the SQL logic used the information from the address field
to determine if an address is attached to a street or place.
This changes the logic to use the information provided in the
token_info. This allows sanitizers to enforce a certain parenting
without changing the visible address information.
2023-06-30 11:08:25 +02:00
Sarah Hoffmann
2755ebe883 Merge pull request #3094 from lonvia/fix-failing-bdd-tests
Add BDD tests against Python frontend to CI
2023-06-22 22:28:31 +02:00
Sarah Hoffmann
4b829b5ff9 Merge pull request #3090 from mtmail/check-database-on-frozen-database
check-database on frozen db shouldnt recommend indexing
2023-06-22 20:11:30 +02:00
Sarah Hoffmann
ed19340af0 add python frontend tests to CI 2023-06-22 17:29:44 +02:00
Sarah Hoffmann
2d05ff0190 slightly adapt postcode tests 2023-06-22 16:51:59 +02:00
Sarah Hoffmann
0d338fa4c0 bdd: fix faking HTTP headers for python web frameworks 2023-06-22 14:00:33 +02:00
mtmail
15a66e7b7d Merge branch 'osm-search:master' into check-database-on-frozen-database 2023-06-22 12:14:55 +02:00
Sarah Hoffmann
3a21999a17 move text normalization into extra function 2023-06-22 10:48:05 +02:00
Sarah Hoffmann
08dcd05d7b Merge pull request #3093 from lonvia/remove-sanic
Remove support for Sanic
2023-06-22 09:55:32 +02:00
Marc Tobias
2337cc653b check-database on frozen db shouldnt recommend indexing 2023-06-21 17:47:57 +02:00
Sarah Hoffmann
0deb9262c9 use in operator for enum tests
Fixes a complaint by mypy.
2023-06-21 15:30:40 +02:00
Sarah Hoffmann
9bc5be837b remove useless check
Found by new mypy version.
2023-06-21 11:56:39 +02:00
Sarah Hoffmann
b79d5494f9 remove support for sanic framework
There is no performance gain over falcon or starlette but the special
structure of sanic makes it hard to have exchangable code
2023-06-21 10:53:57 +02:00
Sarah Hoffmann
ded2c5bf68 update project information for taginfo.json 2023-06-21 10:06:25 +02:00
Sarah Hoffmann
bd2c64876f Merge pull request #3045 from biswajit-k/taginfo
Add script to generate taginfo project description
2023-06-21 10:01:40 +02:00
Sarah Hoffmann
7c66fef63f Merge pull request #3091 from lonvia/fix-postcode-search
Assorted smaller fixes for Python-based search code
2023-06-20 16:13:20 +02:00
Sarah Hoffmann
4ad8818809 avoid fallback country lookup when places are excluded 2023-06-20 12:22:08 +02:00
Sarah Hoffmann
2f4342810d allow empty parts in excluded_place_id
This worked before.
2023-06-20 11:56:43 +02:00
Sarah Hoffmann
36df56b093 fix header name for browser languages 2023-06-20 11:56:43 +02:00
Sarah Hoffmann
d0a1e8e311 tweak postcode search
Give a preference to left-right reading, i.e <postcode>,<address>
prefers a postcode search while <address>,<postcode> rather does
an address search.

Also exclude non-addressables, countries and state from results when a
postcode is contained in the query.
2023-06-20 11:56:43 +02:00
Sarah Hoffmann
1b50381852 use string representation when dumping variables 2023-06-20 10:15:51 +02:00
Sarah Hoffmann
3443d2c129 fix debug output for searches
Fields have been renamed.
2023-06-20 10:12:19 +02:00
Sarah Hoffmann
1f83efa8f2 Merge pull request #3086 from lonvia/close-connection-on-replication
Close database connections while waiting for the next update cycle
2023-06-19 15:48:00 +02:00
Sarah Hoffmann
a7bd39b62a Merge pull request #3087 from lonvia/conditional-spgist
Disable SPGist for PostgreSQL < 11
2023-06-19 15:47:37 +02:00
Sarah Hoffmann
1177b30a60 Merge pull request #3085 from lonvia/allow-brackets-in-password
Do not fail php script generation when curly braces are present
2023-06-19 14:44:22 +02:00
Sarah Hoffmann
10e56e0de7 no longer recommend continuous update mode 2023-06-19 12:07:09 +02:00
Sarah Hoffmann
6f3339cc49 close DB connection when waiting for next update cycle 2023-06-19 12:02:51 +02:00
Sarah Hoffmann
771be0e056 do not fail php script generation when curly braces are present
Fixes #3084.
2023-06-19 11:23:30 +02:00
Sarah Hoffmann
71ad4fc406 Add custom config to information required for bug reports 2023-06-18 09:27:55 +02:00
Sarah Hoffmann
6a5695d059 disable SPGist for PostgreSQL < 11
Before that version, an operator needed to be given.
2023-06-13 15:15:43 +02:00
Sarah Hoffmann
aaf0e7db06 add instructions for switching to user for install scripts
Fixes #3057.
2023-06-13 14:12:56 +02:00
Sarah Hoffmann
7aa0aba382 remove Ubuntu 18 installation from TOC
Fixes #3072.
2023-06-13 14:05:54 +02:00
Sarah Hoffmann
9af190a43c Merge pull request #3073 from mtmail/vagrantfile-remove-ubuntu-18
Vagrantfile - remove Ubuntu 18
2023-06-13 12:31:10 +02:00
marc tobias
2e46bc0aea Vagrantfile - remove Ubuntu 18 2023-05-30 00:22:18 +02:00
Sarah Hoffmann
a413aae8a3 Merge pull request #3067 from lonvia/python-search-api
Python implementation of Search API
2023-05-27 08:56:06 +02:00
Sarah Hoffmann
317cc5c544 make code backwards-compatible with older Python versions 2023-05-26 23:33:35 +02:00
Sarah Hoffmann
41bf162306 remove tests for old PHP cli commands 2023-05-26 17:36:05 +02:00
Sarah Hoffmann
43c27dffd2 fix new linting warning 2023-05-26 16:31:43 +02:00
Sarah Hoffmann
8f299838f7 fix various failing BDD tests 2023-05-26 15:08:48 +02:00
Sarah Hoffmann
146a0b29c0 add support for search by houenumber 2023-05-26 14:10:57 +02:00
Sarah Hoffmann
964bc7fbe0 ignore name rankings without variants 2023-05-26 13:53:34 +02:00
Sarah Hoffmann
75aa3cc9bd properly close connections when shutting down starlette 2023-05-26 13:53:34 +02:00
Sarah Hoffmann
0843fefad3 fix geocodejson address assignment
The categories should be assigned the address part with the
highest address rank not lowest.
2023-05-26 11:43:11 +02:00
Sarah Hoffmann
371a780ef4 add server fronting for search endpoint
This also implements some of the quirks of free-text search of the
V1 API, in particular, search for categories and coordinates.
2023-05-26 11:40:45 +02:00
Sarah Hoffmann
c7db69a30c add timestamps to HTML debug output 2023-05-26 09:05:44 +02:00
Sarah Hoffmann
b48cda7173 mingle names from linked places into results 2023-05-24 23:17:15 +02:00
Sarah Hoffmann
0608cf1476 switch CLI search command to python implementation 2023-05-24 22:54:54 +02:00
Sarah Hoffmann
f335e78d1e make localisation of results explicit
Localisation was previously done as part of the formatting but might
also be useful on its own when working with the results directly.
2023-05-24 18:12:34 +02:00
Sarah Hoffmann
dcfb228c9a add API functions for search functions
Search is now split into three functions: for free-text search,
for structured search and for search by category. Note that the
free-text search does not have as many hidden features like
coordinate search. Use the search parameters for that.
2023-05-24 18:05:43 +02:00
Sarah Hoffmann
dc99bbb0af implement actual database searches 2023-05-24 13:52:31 +02:00
Sarah Hoffmann
c42273a4db implement search builder 2023-05-23 11:23:44 +02:00
Sarah Hoffmann
3bf489cd7c implement token assignment 2023-05-22 15:49:03 +02:00
Sarah Hoffmann
d8240f9ee4 add query analyser for legacy tokenizer 2023-05-22 11:07:14 +02:00
Sarah Hoffmann
2448cf2a14 add factory for query analyzer 2023-05-22 09:23:19 +02:00
Sarah Hoffmann
004883bdb1 query analyzer for ICU tokenizer 2023-05-22 08:46:19 +02:00
biswajit-k
11a1191ba0 add ci-test for taginfo file generation 2023-05-21 23:49:04 +05:30
Sarah Hoffmann
ff66595f7a add data structure for tokenized query 2023-05-21 09:30:57 +02:00
biswajit-k
9de2a342e8 docs: add installation instruction for dkjson package 2023-05-19 23:22:42 +05:30
biswajit-k
562f8bc84a Add script to generate taginfo project description 2023-05-19 18:02:06 +05:30
Sarah Hoffmann
d69411f414 Merge pull request #3064 from lonvia/clicmd-debug-output
Enable debug output on command line
2023-05-19 08:55:26 +02:00
Sarah Hoffmann
39ccb15880 enable debug output on command line 2023-05-18 22:37:46 +02:00
Sarah Hoffmann
d2c56f9f96 Merge pull request #3063 from lonvia/variable-parameters
Rework how search parameters are handed to the Python API
2023-05-18 22:27:18 +02:00
Sarah Hoffmann
7f1a0ce94a fix use of subquery in reverse 2023-05-18 20:27:07 +02:00
Sarah Hoffmann
32dbf83747 move zoom_to_rank computation to extra file 2023-05-18 18:29:41 +02:00
Sarah Hoffmann
d9d8b9c526 add tests for parameter converter 2023-05-18 18:09:07 +02:00
Sarah Hoffmann
9036bf3398 Merge pull request #3062 from lonvia/enable-psycopg
Add support for psycopg 3 in the frontend
2023-05-18 18:07:11 +02:00
Sarah Hoffmann
bef5cea48e switch API parameters to keyword arguments
This switches the input parameters for API calls to a generic
keyword argument catch-all which is then loaded into a dataclass
where the parameters are checked and forwarded to internal
function.

The dataclass gives more flexibility with the parameters and makes
it easier to reuse common parameters for the different API calls.
2023-05-18 17:42:23 +02:00
Sarah Hoffmann
84abf7c95a actions: run tests against psycopg 2023-05-18 16:08:32 +02:00
Sarah Hoffmann
1f0e1bec0e enable API use with psycopg 3 2023-05-18 16:08:32 +02:00
Sarah Hoffmann
8f88613a6b Merge pull request #3050 from mtmail/tiger-check-if-database-frozen
when adding Tiger data, check first if database is in frozen state
2023-05-08 16:35:31 +02:00
Marc Tobias
e5f332bd71 when adding Tiger data, check first if database is in frozen state 2023-05-08 14:35:30 +02:00
Sarah Hoffmann
07589cfc34 Merge pull request #3054 from lonvia/add-amenity-to-documentation
Docs: reintroduce amenity parameter for structured search
2023-05-08 11:12:17 +02:00
Sarah Hoffmann
68e0306e62 docs: reintroduce amenity parameter for structured search 2023-05-08 10:24:12 +02:00
Sarah Hoffmann
5751686fdc Merge pull request #3006 from biswajit-k/generalize-filter
generalize filter function for sanitizers
2023-04-11 19:20:08 +02:00
Sarah Hoffmann
2af20f8df8 Merge pull request #3030 from lonvia/interpolation-corner-cases
Fix a number of corner cases with interpolation splitting
2023-04-07 13:59:52 +02:00
Sarah Hoffmann
60c1301fca fix a number of corner cases with interpolation splitting
Snapping a line to a point before splitting was meant to ensure
that the split point is really on the line. However, ST_Snap() does
not always behave well for this case. It may shorten the interpolation
line in some cases with the result that two points housenumbers
suddenly fall on the same point. It might also shorten the line down
to a single point which then makes ST_Split() crash.

Switch to a combination of ST_LineLocatePoint and ST_LineSubString
instead, which guarantees to keep the original geometry. Explicitly
handle the corner cases, where the split point falls on the beginning
or end of the line.
2023-04-06 16:54:00 +02:00
Sarah Hoffmann
b8a7319212 use place_to_be_deleted when force deleting objects 2023-04-04 11:09:17 +02:00
Sarah Hoffmann
6ef4d04b46 Merge pull request #3023 from lonvia/lookup-api
Python implementation of Lookup API
2023-04-03 16:20:47 +02:00
Sarah Hoffmann
1dce2b98b4 switch CLI lookup command to Python implementation 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
86c4897c9b add lookup call to server glue 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
2237603677 add tests for new lookup API 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
6e81596609 rename lookup() API to details and add lookup call
The initial plan to serve /details and /lookup endpoints from
the same API call turned out to be impractical, so the API now
also has deparate functions for both.
2023-04-03 14:40:41 +02:00
Sarah Hoffmann
4607c7ed04 python lookup: add function for simple lookups 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
63638eb447 python lookup: factor out finding in tables into own function 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
c92ac84679 Merge pull request #3021 from lonvia/readd-postalcode-relations
Flex style: reinstate postalcode relations
2023-04-03 12:11:49 +02:00
Sarah Hoffmann
ed9cd9f0e5 bdd: disable detail tests searching by place ID
Place IDs are not stable and cannot be used in tests.
2023-04-03 10:07:06 +02:00
Sarah Hoffmann
7d30dbebc5 flex style: reinstate postcode boundaries
Postcode boundaries don't have a name, so need to be imported
unconditionally.
2023-04-03 09:17:50 +02:00
biswajit-k
8f03c80ce8 generalize filter for sanitizers 2023-04-01 19:24:09 +05:30
Sarah Hoffmann
ee0366af88 Merge pull request #3019 from lonvia/add-data-postprocessing
Add postprocessing to add-data function
2023-03-31 22:19:46 +02:00
Sarah Hoffmann
683a3cb3ec call osm2pgsql postprocessing flush_deleted_places() when adding data 2023-03-31 18:05:07 +02:00
Sarah Hoffmann
f8bca4fbcb Merge pull request #3020 from lonvia/reverse-api
Python implementation of reverse
2023-03-31 18:01:50 +02:00
Sarah Hoffmann
1e2a1d9ce5 limit results for country lookup 2023-03-30 10:00:19 +02:00
Sarah Hoffmann
1feac2069b add BDD tests for new layers parameter 2023-03-30 09:54:55 +02:00
Sarah Hoffmann
26ee6b6dde python reverse: add support for point geometries in interpolations 2023-03-29 17:21:33 +02:00
Sarah Hoffmann
c150ca4889 add wsgi entry point for falcon server 2023-03-28 15:05:52 +02:00
Sarah Hoffmann
e717e349d0 add wsgi entry point for starlette
uvicorn needs a parameter-free function to start.
2023-03-28 15:03:00 +02:00
Sarah Hoffmann
e158017086 ignore broken data in interpolation table 2023-03-28 14:57:39 +02:00
Sarah Hoffmann
36d068871d disable prepared statements
Prepared statements do not work well with the partial indexes that
Nominatim uses because all Python constants are replaced with
parameters. A query like:

  placex.select().where(placex.c.rank_address.between(4, 25)

gets translated into a prepared query with two parameters:

  SELECT * FROM placex WHERE rank_address BETWEEN %s and %s

And this does not work with a partial index of:

  CREATE INDEX on placex(geometry) WHERE rank_address between 4 and 25
2023-03-28 14:53:45 +02:00
Sarah Hoffmann
6c67a4b500 switch reverse CLI command to Python implementation 2023-03-26 18:09:33 +02:00
Sarah Hoffmann
86b43dc605 make sure PHP and Python reverse code does the same
The only allowable difference is precision of coordinates. Python uses
a precision of 7 digits where possible, which corresponds to the
precision of OSM data.

Also fixes some smaller bugs found by the BDD tests.
2023-03-26 16:21:43 +02:00
Sarah Hoffmann
300921a93e add server glue for reverse API call 2023-03-25 17:04:02 +01:00
Sarah Hoffmann
35b52c4656 add output formatters for ReverseResults
These formatters are written in a way that they can be reused for
search results later.
2023-03-25 15:45:03 +01:00
Sarah Hoffmann
878302a622 ignore NotImplementedErrors when compiling SQL 2023-03-24 11:16:02 +01:00
Sarah Hoffmann
55277738d4 factor out layer checks in reverse function 2023-03-24 10:08:01 +01:00
Sarah Hoffmann
2f54732500 python: implement reverse lookup function
The implementation follows for most part the PHP code but introduces an
additional layer parameter with which the kind of places to be returned
can be restricted. This replaces the hard-coded exclusion lists.
2023-03-23 22:38:37 +01:00
Sarah Hoffmann
41da298b18 add python implementation of reverse
This adds an additional layer parameter and slightly changes the
queries to do more efficient lookups for large area features.
2023-03-23 10:16:50 +01:00
Sarah Hoffmann
ebcf8c2b6b api: make details parameter optional 2023-03-23 10:16:50 +01:00
Sarah Hoffmann
1facfd019b api: generalize error handling
Return a consistent error response which takes into account the chosen
content type. Also adds tests for V1 server glue.
2023-03-23 10:16:50 +01:00
Sarah Hoffmann
00e3a752c9 split SearchResult type
Use adapted types for the different result types. This makes it
easier to have adapted output formatting and means there are only
result fields that are filled.
2023-03-23 10:16:50 +01:00
Sarah Hoffmann
d03fd3f883 Merge pull request #3013 from mtmail/changelog-4-2-x
update ChangeLog to verison 4.2.2
2023-03-23 09:20:25 +01:00
marc tobias
fa3d13ac7e update ChangeLog to verison 4.2.2 2023-03-22 23:45:59 +01:00
Sarah Hoffmann
434bd5a5bb Merge pull request #3011 from lonvia/fix-flex-scripts
Fix extra tag handling in some flex scripts
2023-03-21 09:58:53 +01:00
Sarah Hoffmann
9aca389bda Merge pull request #3010 from lonvia/adapt-zoom-for-reverse
Minor adjustments to reverse zoom translation
2023-03-21 08:56:25 +01:00
Sarah Hoffmann
69ce42b22c remove more tags from full style
The full style should only save the necessary tags needed for
processing.
2023-03-20 21:42:24 +01:00
Sarah Hoffmann
114cc776be fix handling of unused extra tags
The tags can only be moved to extra tags after the main tags have been
handled.
2023-03-20 21:20:27 +01:00
Sarah Hoffmann
5e5cff897f minor adjustment to reverse zoom translation
Add a 'village' zoom level at 13 between town and neighbourhood
and a all locality-like objects for zoom 15. These zoom levels had
the same behaviour as the lower level so far. However, the distinction
for village and locality may be useful at times.
2023-03-20 20:47:42 +01:00
Sarah Hoffmann
a8bedb6ab9 Merge pull request #3003 from lonvia/rework-bdd-api-tests
Reorganize code around BDD API tests and extend reverse API tests
2023-03-10 10:01:24 +01:00
Sarah Hoffmann
81430bd3bd bdd: be more fuzzy with coordinate comparisons 2023-03-09 22:37:45 +01:00
Sarah Hoffmann
93203f355a avoid recent Python dialect 2023-03-09 20:57:43 +01:00
Sarah Hoffmann
b730d286ad fix polygon simplification in reverse results
polygon_threshold has never really worked for reverse.
2023-03-09 20:24:07 +01:00
Sarah Hoffmann
3f2296e3ea bdd: extend reverse API tests for format checks
Reorganise the API reverse tests and extend the checks for the
output format, testing for all expected fields.
2023-03-09 20:20:50 +01:00
Sarah Hoffmann
2b7eb4906a bdd: add tests for valid debug output 2023-03-09 20:10:51 +01:00
Sarah Hoffmann
db1aa4d02e bdd: replace old formatting strings 2023-03-09 19:49:55 +01:00
Sarah Hoffmann
ad88d7a3e0 bdd: more format checks for reverse XML 2023-03-09 19:40:24 +01:00
Sarah Hoffmann
e42c1c9c7a bdd: new step variant 'result contains in field'
This replaces the + notation for recursing into result dictionaries.
2023-03-09 19:31:21 +01:00
Sarah Hoffmann
556bb2386d bdd: factor out computation of result to-check lists 2023-03-09 18:01:45 +01:00
Sarah Hoffmann
1e58cef174 bdd: replace property_list construct with standard check functions 2023-03-09 17:56:28 +01:00
Sarah Hoffmann
01010e443f bdd: remove special case for osm_type field
The fuzzy field check hide cover formatting errors. Use 'osm' when
only caring about the conent.
2023-03-09 17:44:34 +01:00
Sarah Hoffmann
da0a7a765e bdd: reorganise field comparisons
Move comparision on Field values from assert_field() into a
comparator class. Replace BadRowValueAssert with a simpler
check_row() function.
2023-03-09 17:05:05 +01:00
Sarah Hoffmann
9769a0dcdb bdd: use new check_for_attributes() function also in steps 2023-03-09 16:44:07 +01:00
Sarah Hoffmann
fbff4fa218 bdd: fully check correctness of geojson and geocodejson
Parse code now checks presence of all required fields and exports
all fields for inspection.
2023-03-09 16:36:46 +01:00
Sarah Hoffmann
d17ec56e54 bdd: remove OrderedDict
dicts are guaranteed to keep insertion order by since Python 3.7, making
use of ORderedDict mute.
2023-03-09 16:08:39 +01:00
Sarah Hoffmann
9a5f75dba7 Merge pull request #2993 from biswajit-k/delete-tags
Adds sanitizer for preventing certain tags to enter search index based on parameters
2023-03-09 14:31:45 +01:00
biswajit-k
ca149fb796 Adds sanitizer for preventing certain tags to enter search index based on parameters
fix: pylint error

added docs for delete tags sanitizer

fixed typos in docs and code comments

fix: python typechecking error

fixed rank address type

Revert "fixed typos in docs and code comments"

This reverts commit 6839eea755a87f557895f30524fb5c03dd983d60.

added default parameters and refactored code

added test for all parameters
2023-03-09 14:18:39 +05:30
Sarah Hoffmann
08f19e074b Merge pull request #2999 from biswajit-k/fix-typos
fixed typos in docs and code comments
2023-03-08 08:55:27 +01:00
biswajit-k
36388cafe9 fixed typos in docs and code comments 2023-03-06 17:09:38 +05:30
Sarah Hoffmann
8191c747b9 add latest security incident
Also removes 3.6 which is no longer supported.
2023-02-22 11:24:04 +01:00
Sarah Hoffmann
d078763fa1 harmonize flags for PHP's htmlspecialchars 2023-02-20 16:46:53 +01:00
Sarah Hoffmann
412ead5f2d adapt PHP tests for debug output 2023-02-20 16:23:28 +01:00
Sarah Hoffmann
513175ce23 properly encode special HTML characters in debug mode 2023-02-20 15:43:03 +01:00
Sarah Hoffmann
8db6dd995a Merge pull request #2986 from mtmail/add-more-languages-from-osm-wiki
sp_wiki_loader: add more default languages
2023-02-20 11:12:08 +01:00
marc tobias
4be6970bd4 sp_wiki_loader: add more default languages 2023-02-18 13:54:48 +01:00
Sarah Hoffmann
fa681ce246 add timestamp to debug SQL output 2023-02-18 09:49:30 +01:00
Sarah Hoffmann
dd5cd97713 add warning for reverse index migration 2023-02-18 09:43:37 +01:00
Sarah Hoffmann
89d47d26f0 Merge pull request #2985 from lonvia/fix-place-rank-inheritance
Restrict place rank inheritance to address items
2023-02-18 09:35:19 +01:00
Sarah Hoffmann
d574ceb598 restrict place rank inheritance to address items
Place tags must have no influence on street- or POI-level
objects.
2023-02-17 16:25:26 +01:00
Sarah Hoffmann
92e2f5ca8e Merge pull request #2983 from lonvia/improve-reverse-place-node-lookup
Improve reverse place node lookup
2023-02-17 15:51:55 +01:00
Sarah Hoffmann
f2bc792178 use reverse buffered index for search within country 2023-02-17 14:10:01 +01:00
Sarah Hoffmann
8ed096f938 speed up reverse lookup of place nodes
Add a special index that contains the place nodes buffered by their
respective area according to their search rank. This replaces the
maximum area search for place nodes and reduces drastically the number
of place nodes that need to be retrieved.
2023-02-17 14:10:01 +01:00
Sarah Hoffmann
3405dbf90e Merge pull request #2981 from lonvia/add-point-wkb-decoder
Python frontend: add a WKB decoder for the Point class
2023-02-17 08:40:14 +01:00
Sarah Hoffmann
ee0c5e24bb add a WKB decoder for the Point class
This allows to return point geometries from the database and makes
the SQL a bit simpler.
2023-02-16 17:29:56 +01:00
Sarah Hoffmann
b320f1c7e3 Merge pull request #2978 from lonvia/add-debug-view
Add debug view to Python API
2023-02-14 14:08:42 +01:00
Sarah Hoffmann
6c6b1c0606 add typing information for pygments to requirements 2023-02-14 11:58:08 +01:00
Sarah Hoffmann
a5f5add630 actions: use token to avoid rate limiting 2023-02-14 11:57:37 +01:00
Sarah Hoffmann
8557105c40 add debug output for unit tests
This uses the debug output facility meant for pretty HTML output
to give us debugging output for the unit tests.
2023-02-14 11:57:37 +01:00
Sarah Hoffmann
24e7ffb289 add HTML-formatted debug output to lookup 2023-02-14 11:57:37 +01:00
Sarah Hoffmann
0b9bcfe01d Merge pull request #2974 from bloom256/main-tag-in-reverse-lookup
Main tag information added to geocodejson in reverse geocoding
2023-02-11 11:01:11 +01:00
Sarah Hoffmann
1a0e8f810b Merge pull request #2975 from bloom256/typo-fix
typo fixed
2023-02-11 11:00:15 +01:00
danil
7698f0672d typo fixed 2023-02-08 00:21:58 +04:00
danil
da064ea702 Main tag information added to geocodejson in reverse geocoding 2023-02-08 00:19:49 +04:00
Sarah Hoffmann
0c65289a80 Merge pull request #2970 from lonvia/add-details-endpoint
Python implementation of details endpoint
2023-02-06 16:32:35 +01:00
Sarah Hoffmann
95c90a785f actions: force PHPUnit 9
PHPUnit 10 is incompatible with our tests. Not worth adapting anymore.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
42c3754dcd add tests for details result formatting and trim results
Values that are None are no longer included in the output to save
a bit of bandwidth.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
b742200442 expand details BDD tests
There are now minor differences in the output between PHP and
Python versions, so introduce specific tests.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
3ac70f7cc2 implement details endpoint in Python servers 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
104722a56a switch details cli command to new Python implementation 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
1924beeb20 add lookup of postcdoe data 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
70f6f9a711 add lookup of tiger data 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
f1ceefe9a6 add lookup of address interpolations 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
e1fc1566f3 fix new linting issues from pylint 2.16 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
189f74a40d add unit tests for lookup function 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
370c9b38c0 improve scaffolding for API unit tests
Use the static table definition to create the test database.
Add helper function to simplify filling the tables.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
df65c10360 add lookup() call to the library API
Currently only looks places up in placex.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
4573389da7 docs: fix internal links
Fixes #2968.
2023-02-04 21:21:23 +01:00
Sarah Hoffmann
5c55c1d8a1 Merge pull request #2963 from lonvia/add-sqlalchemy-schema
Add table definitions for SQLAlchemy
2023-01-30 11:17:22 +01:00
Sarah Hoffmann
a1d4e53eb8 add pytest-asyncio as a requirement for testing 2023-01-30 09:36:17 +01:00
Sarah Hoffmann
16b6484c65 add property cache for API
This caches results from querying nominatim_properties.
2023-01-30 09:36:17 +01:00
Sarah Hoffmann
2156fd4909 adapt annotations for SQLAlchemy 2.x
It is not possible to produce type annotations that work with both
versions 1.4 and 2.0. So keep to the principle of only supporting
newest versions when it comes to mypy. This means that some types
may have to be string quoted to not cause issues when running with
SQLAlchemy 1.4.
2023-01-28 11:51:26 +01:00
Sarah Hoffmann
7f5fbe1dc7 add new geoalchemy requirement 2023-01-28 11:51:26 +01:00
Sarah Hoffmann
2e9090d121 adapt status to use SQLAlchemy tables 2023-01-28 11:51:26 +01:00
Sarah Hoffmann
23f2690c54 convert StatusResult to a dataclass
This gives us nice str() functions.
2023-01-28 11:51:26 +01:00
Sarah Hoffmann
5226cd2a0b add SQLAlchemy table schema for Nominatim tables
Also defines an extended connection object that includes access to
the table definitions. Makes it easier to access the tables from
code that has been split off into separate modules.
2023-01-28 11:51:26 +01:00
Sarah Hoffmann
c7e8a82d68 Merge pull request #2958 from lonvia/streaming-json-writer
Introduce streaming json writer for JSON output
2023-01-25 17:36:23 +01:00
Sarah Hoffmann
77bec1261e add streaming json writer for JSON output 2023-01-25 15:05:33 +01:00
Sarah Hoffmann
dfcb24061e Merge pull request #2957 from lonvia/reorganise-api-module
Assorted improvements to the new api library module
2023-01-25 09:29:31 +01:00
Sarah Hoffmann
f85b0c6208 allow to add php-compatible endpoints
If the new setting NOMINATIM_SERVE_LEGACY_URLS is set, the servers
expose the endpoints also with the .php suffix to ensure backwards
compatibility.
2023-01-24 21:39:19 +01:00
Sarah Hoffmann
e490a30a4a add support for CORS headers
Adds the additional dependency to sanic-cors for the Sanic server.
2023-01-24 21:39:19 +01:00
Sarah Hoffmann
654b652530 factor out common server implementation code
Most of the server implementation of V1 API now resides in
api.v1.server_glue. The webframeworks only supply some glue code
which is independent to changes in the API code.
2023-01-24 21:39:19 +01:00
Sarah Hoffmann
8f4426fbc8 reorganize code around result formatting
Code is now organized by api version. So formatting has moved to
the api.v1 module. Instead of holding a separate ResultFormatter
object per result format, simply move the functions to the
formater collector and hand in the requested format as a parameter.
Thus reorganized, the api.v1 module can export three simple functions
for result formatting which in turn makes the code that uses
the formatters much simpler.
2023-01-24 17:20:51 +01:00
Sarah Hoffmann
32c1e59622 reorganize api submodule
Use a directory for the submodule where the __init__ file contains
the public API. This makes it easier to separate public interface
from the internal implementation.
2023-01-24 13:28:04 +01:00
Sarah Hoffmann
e56957f047 api: delay setup of initial database connection
Defer database setup until the first call to a function. Needs an
additional lock because the setup still needs to be done sequentially.
2023-01-24 10:56:22 +01:00
Sarah Hoffmann
3cc357bffa Merge pull request #2955 from lonvia/fix-importance-refresh
Fix importance recalculation
2023-01-23 09:07:43 +01:00
Sarah Hoffmann
388faa2c54 Merge pull request #2954 from lonvia/remove-comma-as-separator
Remove comma as name separator
2023-01-23 09:06:23 +01:00
Sarah Hoffmann
ce9ed993c8 fix importance recalculation
The signature of the compute_importance() function has changed.
2023-01-22 22:32:16 +01:00
Sarah Hoffmann
929a13d4cd remove comma as name separator
Commas are most of the time used as a part of a name, not to
separate multiple names.

See also #2950.
2023-01-22 22:29:36 +01:00
Sarah Hoffmann
56f0d678e3 exclude names ending in :wikipedia from indexing
The wikipedia prefix is used for referencing a wikipedia article
for the given tag, not the object, so not useful to search.
2023-01-21 11:16:08 +01:00
Sarah Hoffmann
02645277c8 fix typo in argument to details CLI command
Fixes #2951.
2023-01-21 10:44:10 +01:00
Sarah Hoffmann
5f4e98e0d9 update Makefile in test directory 2023-01-09 20:49:33 +01:00
Sarah Hoffmann
e9e14834bc Merge pull request #2940 from lonvia/remove-ubuntu-18-from-actions
Actions: use Ubuntu 20 image for testing old versions of dependent software
2023-01-05 20:16:19 +01:00
Sarah Hoffmann
5a57d6308e increase minimum required version of PHP to 7.3
Other versions are not tested anymore.
2023-01-05 16:58:46 +01:00
Sarah Hoffmann
cb73d562d5 actions: move tests from Ubuntu 18 to 20
These are the tests that try to make sure old library version really
still work as expected. Increases PHP to 7.3 because 7.2 has errors.
2023-01-05 16:55:32 +01:00
Sarah Hoffmann
6c61690ef3 do not run osm2pgsql append with mutliple threads
As the updates modify the placex table, there may be deadlocks
when different objects want to forward modifications to the same
place (for example because they are both linked to it).
2023-01-05 11:34:56 +01:00
Sarah Hoffmann
bf1f6a997c Merge pull request #2937 from lonvia/python-server-stub
Scaffolding for new Python-based search frontend
2023-01-03 14:26:33 +01:00
Sarah Hoffmann
90b29aa808 Merge pull request #2856 from t-tomek/patch-1
Update DB.php
2023-01-03 10:58:50 +01:00
Sarah Hoffmann
31d0468cd2 no type checking on old Ubuntu 18 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
38f467bae3 add event handler for default connection settings 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
5c7c4bb9a8 update versions for Python packages 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
9646ec4edd drop official support for Ubuntu 18 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
c665796c52 drop support for Python 3.6 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
a72e2ecb3f update dependencies for Actions 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
0c47558729 convert version to named tuple
Also return the new NominatimVersion rather than a string in the
status result.
2023-01-03 10:03:00 +01:00
Sarah Hoffmann
93b9288c30 fix error message for non-existing database 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
9d31a67116 add unit tests for new Python API 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
cf19036ce6 docs: extend dependency documentation 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
7219ee6532 extend BDD API tests to query via Python frameworks
A new config option ENGINE allows to choose between php and any of the
supported Python engines.
2023-01-03 10:03:00 +01:00
Sarah Hoffmann
d7bc846c3c fix liniting issues and add type annotations 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
1adb0a9886 add support for starlette framework 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
8aa01c9c8e add support for falcon as server framework 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
23dabad0b0 add sanic development server implementation 2023-01-03 10:02:53 +01:00
Sarah Hoffmann
45c675bd78 implement command line status call in Python 2023-01-03 10:02:35 +01:00
Sarah Hoffmann
860c6ecbcc Merge pull request #2936 from lonvia/fix-query-for-index-use
Split query that deletes old objects from placex
2023-01-03 09:56:03 +01:00
Sarah Hoffmann
3f38091421 split query that deletes old objects from placex
placex only has partial indexes over OSM types, so the OSM type
needs to be hardcoded to ensure these indexes are used.
2023-01-02 17:25:38 +01:00
Sarah Hoffmann
7704b3fc7b Merge pull request #2932 from lonvia/rework-flex-framework
Switch to osm2pgsql flex output
2022-12-29 17:23:38 +01:00
t-tomek
151b3c4021 Update DB.php 2022-12-27 08:34:20 +01:00
Sarah Hoffmann
610af95ed1 remove old import styles 2022-12-23 19:29:07 +01:00
Sarah Hoffmann
018ef5bd53 bdd: recreate project directory for every run 2022-12-23 18:36:41 +01:00
Sarah Hoffmann
200eae3bc0 add tests for examples in lua style documentation
And fix all the errors the tests have found.
2022-12-23 17:35:28 +01:00
Sarah Hoffmann
9321e425a4 add documentation for flex style
Includes minor adaptions to bring the code in line with the
documentation.
2022-12-23 11:10:40 +01:00
Sarah Hoffmann
9395c0dadc Merge pull request #2931 from mtmail/vagrant-md-use-dotenv
VAGRANT.md - replace local.php settings with .env
2022-12-22 10:20:15 +01:00
marc tobias
a40065878b VAGRANT.md - replace local.php settings with .env 2022-12-22 03:12:30 +01:00
t-tomek
e8d3c0a99a Update DB.php 2022-12-21 19:56:24 +01:00
t-tomek
2e6ff1b750 Update DB.php 2022-12-21 16:32:33 +01:00
t-tomek
15f9e397dd Update DB.php 2022-12-21 16:31:39 +01:00
t-tomek
87a109d0e9 Update DB.php 2022-12-21 16:30:55 +01:00
Sarah Hoffmann
f3ffbe167d use canonical url for nominatim.org 2022-12-20 16:55:47 +01:00
Sarah Hoffmann
89a34e7508 adapt tests for new lua styles 2022-12-19 17:32:28 +01:00
Sarah Hoffmann
f6fc750f08 install new lua import scripts 2022-12-19 17:27:40 +01:00
Sarah Hoffmann
c32f686c62 switch to osm2pgsql flex style by default 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
2ca83efc36 flez: add other default styles 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
06796745ff flex: hide compiled matchers 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
093d531509 flex: switch to functions for substyles
This gives us a bit more flexibility about the implementation
in the future.
2022-12-18 10:10:58 +01:00
Sarah Hoffmann
a915815e4d explicit export for functions in flex-base 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
de3c28104c flex: add combining clean function 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
d9d13a6204 flex: simplify name handling 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
d1f5820711 flex: simplify address configuration 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
7592f8f189 update osm2pgsql (flex not building index) 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
ffbb61713a Merge pull request #2927 from lonvia/remove-forward-dependencies
Remove shortcut disabling forward dependencies
2022-12-17 15:59:09 +01:00
Sarah Hoffmann
6f51c1ba33 remove code that disables processing of forward dependencies 2022-12-11 19:35:58 +01:00
Sarah Hoffmann
cd861345b7 Merge pull request #2921 from lonvia/case-insensitive-utf8
Use grapheme_stripos instead of stripos in PHP code
2022-12-11 15:57:01 +01:00
Sarah Hoffmann
823502a40a use grapheme_stripos instead of stripos in PHP code
The stripos() does not handle non-ASCII correctly.
2022-12-11 13:55:27 +01:00
Sarah Hoffmann
4efad0bb95 Merge pull request #2920 from lonvia/no-postcode-for-rivers
Do not assign postcodes to long linear features
2022-12-10 17:30:32 +01:00
Sarah Hoffmann
922352e215 do not assign postcodes to long linear features
This avoids a postcode in particular for waterway features and
long natural featues like ridges and valleys.

Fixes #2915.
2022-12-10 14:53:08 +01:00
Frederik Ramm
8a0b2dc0be Fix typo in NOMINATIM_LOG_FILE (#2919)
* fix typo in docs (NOMINATIM_LOG_FILE uses s not ms)
2022-12-08 17:34:54 +01:00
Sarah Hoffmann
7eced34e20 Merge pull request #2916 from lonvia/faq-postcodes
Add FAQ about finding bad postcodes
2022-12-08 08:42:10 +01:00
Sarah Hoffmann
64d00c1c8e add FAQ about finding bad postcodes 2022-12-07 21:38:53 +01:00
Sarah Hoffmann
6d48798d45 Merge pull request #2913 from lonvia/remove-duplicate-spaces-in-transliteration
contract duplicate spaces in transliteration string
2022-12-02 19:43:07 +01:00
Sarah Hoffmann
0e186835b9 contract duplicate spaces in transliteration string
There are some pathological cases where an isolated letter may
be deleted because it is in itself meaningless. If this happens in
the middle of a sentence, then the transliteration contains two
consecutive spaces. Add a final rule to fix this.

See #2909.
2022-12-02 10:15:02 +01:00
Sarah Hoffmann
b607eb9678 Merge pull request #2908 from n-timofeev/update-vagrantfile
Update Vagrantfile
2022-11-29 10:33:16 +01:00
n-timofeev
85a68f1e56 Update Vagrantfile 2022-11-29 11:12:10 +03:00
Sarah Hoffmann
45d13bc295 Merge pull request #2906 from lonvia/move-data-dirs-into-module
Reorganize how Nominatim finds its extra data and libraries
2022-11-28 08:44:29 +01:00
Sarah Hoffmann
2231401483 clean up uses of cli.nominatim()
They should not hand in data paths anymore.
2022-11-27 15:27:04 +01:00
Sarah Hoffmann
2abe9e6fd9 use data paths from new nominatim.paths 2022-11-27 12:15:41 +01:00
Sarah Hoffmann
20f56dfc77 introduce paths module which contains the data paths
This means that nominatim can find its data even when not called
from the command line tool.
2022-11-27 12:02:37 +01:00
Sarah Hoffmann
67664406da Merge pull request #2905 from lonvia/remove-nominatim-tool-var
Remove NOMINATIM_NOMINATIM_TOOL variable
2022-11-26 20:24:58 +01:00
Sarah Hoffmann
0ed60d29cb remove NOMINATIM_NOMINATIM_TOOL variable
This was used by the old PHP scripts to call the Python tool.
With the scripts now gone, the variable can be removed.
2022-11-26 16:40:20 +01:00
Sarah Hoffmann
04ee39467a actions: install keys for postgres repo 2022-11-24 14:04:05 +01:00
Sarah Hoffmann
1f3edf6eba prepare release 4.2.0 2022-11-24 10:43:29 +01:00
Sarah Hoffmann
a15c197547 add checklist for releases 2022-11-24 10:43:25 +01:00
Sarah Hoffmann
13dbeb75c7 Merge pull request #2903 from lonvia/migration-for-index-reorganization
Add migration for reorganization of pending indexes
2022-11-24 10:13:38 +01:00
Sarah Hoffmann
6aded60045 add migration for reorganization of pending indexes
Fixes #2900.
2022-11-24 08:48:05 +01:00
Sarah Hoffmann
8dfdf64dd5 Merge pull request #2902 from lonvia/tiger-county-sanitizer
Tiger county sanitizer
2022-11-23 17:58:42 +01:00
Sarah Hoffmann
41e8bddaa9 remove BDD test for tiger:county
We no longer rely on the import to strip the tag.
2022-11-23 10:37:27 +01:00
Sarah Hoffmann
fd3dec8efe add sanitizer for TIGER tags
Currently only takes over cleaning the tiger:county data. This was
done by the import until now.
2022-11-23 10:37:27 +01:00
Sarah Hoffmann
55ee08f42b Merge pull request #2892 from lonvia/optional-forward-dependecies
Add experimental configuration switch for enabling forward dependencies
2022-11-21 16:57:45 +01:00
Sarah Hoffmann
b6ff697ff0 add experimental option for enabling forward dependencies 2022-11-21 14:48:00 +01:00
Sarah Hoffmann
925ac1e1b9 Merge pull request #2890 from lonvia/use-rank-search-for-reverse-polygon-match
Use rank search for reverse polygon match
2022-11-20 22:11:35 +01:00
Sarah Hoffmann
77acc1c2be force use of geometry index for reverse polygon lookup 2022-11-20 20:22:44 +01:00
Sarah Hoffmann
ebe489c227 use rank_search for reverse polygon match 2022-11-20 20:22:23 +01:00
Sarah Hoffmann
9c152a030a fix condition under which place_to_be_deleted is created
It is needed for updates, independently if reverse-only is set.
2022-11-19 21:53:14 +01:00
Sarah Hoffmann
b310c86c55 Merge pull request #2889 from lonvia/fix-interpolation-updates
Drop illegal values for addr:interpolation on update
2022-11-18 18:51:11 +01:00
Sarah Hoffmann
c9ff7d2130 drop illegal values for addr:interpolation on update 2022-11-18 17:26:56 +01:00
Sarah Hoffmann
52456230cc Merge pull request #2887 from lonvia/lookup-linked-places
Add support for lookup of linked places
2022-11-17 13:35:53 +01:00
Sarah Hoffmann
4422533adb Merge pull request #2886 from lonvia/closest-street-in-associated
Handle associatedStreet relations with multiple streets correctly
2022-11-17 07:29:25 +01:00
Sarah Hoffmann
c4b13f2b7f add support for lookup of linked places 2022-11-16 21:34:45 +01:00
Sarah Hoffmann
4f05a03d13 handle associatedStreet relations with multiple streets
When a associatedStreet relation has multiple street members
always take the closest one. Avoid geometry operations for
the frequent case that there is only one street.
2022-11-16 17:25:51 +01:00
Sarah Hoffmann
7a2e586cce Merge pull request #2884 from lonvia/tweak-special-term-penalties
Correctly handle special term + name combination
2022-11-15 19:29:55 +01:00
Sarah Hoffmann
98ce424650 Merge pull request #2885 from lonvia/remove-unused-countries
Remove dependent territories from country list
2022-11-15 19:29:39 +01:00
Sarah Hoffmann
3059a3da4e correctly handle special term + name combination
Special terms with operator name usually appear in combination with the
name. The current penalties only took name + special term into account
not special term + name.

Fixes #2876.
2022-11-15 11:55:40 +01:00
Sarah Hoffmann
d63d7cb9a8 remove dependent territories from country list
Removes territories of US, France, Australia and Netherlands from the
country list. These territories have their own country code (which is
why they are in the list in the first place) but are mapped as part of
the admin_level 2 relations for the respective parent countries.
Therefore they never had any places attached. In practical terms, the
change only affects the number of tables created.
2022-11-15 11:37:30 +01:00
Sarah Hoffmann
f3f542e864 Merge pull request #2881 from lonvia/more-update-tests-for-osm2pgsql
Experimental support for osm2pgsql flex output
2022-11-15 09:39:46 +01:00
Sarah Hoffmann
93ada250f7 bdd: add tests for osm2pgsql update of postcode nodes 2022-11-14 17:27:04 +01:00
Sarah Hoffmann
d8e3ba3b54 bdd: add osm2pgsql tests for updating interpolations 2022-11-14 16:57:31 +01:00
Sarah Hoffmann
a46348da38 bdd: test placex content when updating with osm2pgsql 2022-11-14 14:48:44 +01:00
Sarah Hoffmann
36cf0eb922 reorganize handling of place type changes
Always replace existing entries in place, never delete them because
a direct delete will cause conflicts.
2022-11-14 13:57:26 +01:00
Sarah Hoffmann
63a9bc94f7 fix country handling in flex style
If the country tag does not match a 2-letter code, it needs to
be dropped.
2022-11-10 15:52:13 +01:00
Sarah Hoffmann
2dafc4cf4f remove tests that differ between lua and gazetteer versions 2022-11-10 15:51:55 +01:00
Sarah Hoffmann
68d09f9cad node locations must be stable for osm2pgsql update tests 2022-11-10 11:11:45 +01:00
Sarah Hoffmann
b98d3d3f00 bdd: extend osm2pgsql update tests
Now also checks for correct indexing state of placex table.
2022-11-10 09:38:25 +01:00
Sarah Hoffmann
3683cf7ddc optimise tag match function 2022-11-10 09:38:25 +01:00
Sarah Hoffmann
84e5e601e1 add lua requirements for vagrant scripts 2022-11-10 09:38:25 +01:00
Sarah Hoffmann
a1da149211 CI: require lua libraries 2022-11-10 09:38:25 +01:00
Sarah Hoffmann
74405e9684 add migration for place_to_be_deleted table 2022-11-10 09:38:25 +01:00
Sarah Hoffmann
2fac507453 change updates to handle delete/insert workflow
This makes Nominatim compatible with osm2pgsql's default update
modus operandi of deleting and reinserting data. Deletes are diverted
into a TODO table instead of executing them. When data is reinserted,
the corresponding entry in the TODO table is deleted. After updates are
finished, the remaining entries in the TODO table are executed, doing
the same work as the delete trigger did before.

The new behaviour also works against the gazetteer output with its
insert-only mechanism.
2022-11-10 09:38:23 +01:00
Sarah Hoffmann
51ed55cc32 initial flex import scripts
Only implements the extratags style for the moment. Tests pass
for the same behaviour as the gazetteer output. Updates still need
to be done.
2022-11-10 09:37:38 +01:00
Sarah Hoffmann
de2a3bd5f8 bdd tests: make import style configurable
The switch is for development. Tests are not guaranteed to still
work when run with anything but the 'extratags' style.
2022-11-10 09:37:38 +01:00
Sarah Hoffmann
981e9700be add osm2pgsql gazetteer tests
This ports the gazetteer tests from osm2pgsql to BDD tests.
2022-11-10 09:37:38 +01:00
Sarah Hoffmann
b52ce4f9f2 Merge pull request #2869 from mtmail/improve-tiger-install-doc
Tiger install doc: add -refresh website- step
2022-11-09 20:48:39 +01:00
Sarah Hoffmann
64c591da7f fix type issues with calls to pyosmium 2022-11-09 20:46:33 +01:00
Marc Tobias
2387648a85 Tiger install doc: add -refresh website- step 2022-11-09 17:33:31 +01:00
Sarah Hoffmann
846ecff0c5 Merge pull request #2871 from lonvia/fix-timeout-for-updates
Fix timeout for updates
2022-11-09 14:26:39 +01:00
Sarah Hoffmann
26a5b59c28 add types-requests dependency 2022-11-09 09:12:37 +01:00
Sarah Hoffmann
6ddb39fda3 respect socket timeout also in other replication functions 2022-11-09 09:12:37 +01:00
Sarah Hoffmann
1fdcec985a fix timeout use for replication timeout
The timeout parameter is no longer taken into account since
pyosmium switched to the requests library. This adds the parameter
back.
2022-11-09 09:12:37 +01:00
Sarah Hoffmann
30f526c943 Merge pull request #2870 from mtmail/update-github-actions-to-node-16
update those github action packages still using node12
2022-11-08 17:24:53 +01:00
Marc Tobias
253127cb9f update those github action packages still using node12 2022-11-08 15:16:55 +01:00
Sarah Hoffmann
3237ca587f Merge pull request #2866 from lonvia/reverse-ignore-interpolations-without-parent
Ignore interpolations without parent on reverse search
2022-11-07 09:00:59 +01:00
Sarah Hoffmann
0dbc0ae6d5 ignore interpolations without parent on reverse search
If no parent can be found for an interpolation, there is most
likely a data error involved. So don' t show these interpolations
in reverse search results.
2022-11-05 22:16:09 +01:00
Sarah Hoffmann
7461ff4680 Merge pull request #2865 from Romeo-PHILLIPS/fix/documentation_status_code
Fix: documentation status code
2022-11-05 22:14:44 +01:00
Romeo
afc714e1d3 fix: format 2022-11-04 18:05:40 +01:00
Romeo
3bc0db8d91 fix: markup 2022-11-04 18:04:28 +01:00
Romeo
d573da5b2c fix: 705 Status Code Documentation 2022-11-04 18:03:49 +01:00
Romeo
ecd5a3fdf9 fix: 705 Status Code Documenation 2022-11-04 17:59:36 +01:00
Sarah Hoffmann
543d63e7a9 Merge pull request #2862 from mtmail/remove-version-from-fpm-sock-file
Install scripts: remove version from /var/run/php-fpm filenames
2022-11-04 17:32:50 +01:00
Sarah Hoffmann
7a22ae6bf9 Merge pull request #2863 from lonvia/add-support-for-postgresql-15
Update CI tests to postgresql 15
2022-11-04 17:32:06 +01:00
Sarah Hoffmann
ebe23d6882 update CI tests to postgresql 15 2022-11-04 16:21:15 +01:00
marc tobias
33c805aee0 Install scripts: remove version from /var/run/php-fpm filenames 2022-11-04 14:22:11 +01:00
t-tomek
244b6fcef6 Update DB.php
Use SET instead of UPDATE queries to support read only databases
2022-11-02 14:11:52 +01:00
Sarah Hoffmann
616ff4ae25 actions: pin pyicu to 2.9 2022-10-24 14:21:44 +02:00
Sarah Hoffmann
e221eaa977 Merge pull request #2836 from mtmail/tiger2022
Documentation: remove year from TIGER filename, new 2022 data
2022-10-24 11:21:55 +02:00
Sarah Hoffmann
eed7abb839 Merge pull request #2838 from lonvia/update-osm2pgsql
Update osm2pgsql to latest 1.7.1 release
2022-10-05 18:59:13 +02:00
Sarah Hoffmann
5f6dcd36ed fix flaky API test
The search 'landstr' produces many duplicates so that with
some bad luck 4 or less results may appear. Disable deduplication
to make it more predictable.
2022-10-05 15:16:14 +02:00
Sarah Hoffmann
f395054536 update osm2pgsql to 1.7.1 2022-10-04 21:16:57 +02:00
Sarah Hoffmann
afeafc8aa7 Merge pull request #2835 from lonvia/secondary-importance
Secondary importance
2022-10-04 16:25:47 +02:00
marc tobias
f1ece658f8 Documentation: remove year from TIGER filename 2022-10-04 14:19:36 +02:00
Sarah Hoffmann
b3abb355eb docs: add customization hints for secondary importance
Removing the download links for now as the tile importance
is still too experimental.
2022-10-01 11:01:49 +02:00
Sarah Hoffmann
5877b69d51 do not run unit test when postgis_raster is not available 2022-10-01 11:01:49 +02:00
Sarah Hoffmann
5ec2c1b712 adapt unit tests to changed function names 2022-10-01 11:01:49 +02:00
Sarah Hoffmann
0a73ed7d64 add secondary importance to API BDD tests
Also fixes a path issue during API test DB creation that could
never possibly have worked.
2022-10-01 11:01:49 +02:00
Sarah Hoffmann
abf349fb0d simplify use of secondary importance
The values in the raster are already normalized between 0 and 2**16,
so a simple conversion to [0, 1] will do.

Check for existance of secondary_importance table statically when
creating the SQL function. For that to work importance tables need
to be created before the functions.
2022-10-01 11:01:49 +02:00
Sarah Hoffmann
3185fad918 load views as a SQL file and rename to 'secondary importance'
The only requirement for secondary importance is that a raster table
comes out of it. The generic name leaves open where the data comes
from.
2022-10-01 11:01:49 +02:00
Tareq Al-Ahdal
0ab0f0ea44 Integrated OSM views into importance computation 2022-10-01 11:01:49 +02:00
Tareq Al-Ahdal
ac467c7a2d Enhanced the implementation of OSM views GeoTIFF import functionality 2022-10-01 11:01:49 +02:00
Tareq Al-Ahdal
c85b74497b Initial implementation of GeoTIFF import functionality 2022-10-01 11:01:49 +02:00
Sarah Hoffmann
3381a92d92 Merge pull request #2832 from lonvia/conditional-analyze-on-indexing
Only run analyze on indexing when work was done
2022-09-28 15:17:40 +02:00
Sarah Hoffmann
a2ee58d8a1 only run analyze on indexing when work was done
This speeds up processing when continuing indexing after it was
interrupted.
2022-09-28 10:22:54 +02:00
Sarah Hoffmann
051f3720ce Merge pull request #2829 from lonvia/optimize-indexes
Further optimize indexes
2022-09-26 10:02:51 +02:00
Sarah Hoffmann
f017e1e9a1 make sure indexes are used 2022-09-25 14:09:45 +02:00
Sarah Hoffmann
33ba6896a8 further split up the big geometry index
Adds partial indexes for all geometry queries used during import.
A full index is not necessary anymore at that point. Still create
the index afterwards for use in queries.

Also adds documentation for all indexes on where they are used.
2022-09-21 16:21:41 +02:00
Sarah Hoffmann
f4d3ae6f70 consolidate indexes over geometry_sectors
The index over geometry_sectors are mainly used for ordering
the places which need indexing. That means they function effectively
as a TODO list. Consolodate them so that they always only contain
the places which are still to do. Also add the appropriate index
for the boundary indexing phase.
2022-09-21 10:38:58 +02:00
Sarah Hoffmann
860f3559a1 split up large osmid index on placex
This doesn't do anything in terms of lookup speeds but the resulting
indexes are quite a bit smaller.
2022-09-21 09:24:57 +02:00
Sarah Hoffmann
d8be8a7293 fix funding link 2022-09-19 15:39:58 +02:00
Sarah Hoffmann
9750a361c9 add Github Sponsering to funding page 2022-09-19 15:38:56 +02:00
Sarah Hoffmann
ed3dd81d04 run final index creation in parallel 2022-09-19 11:55:25 +02:00
Sarah Hoffmann
bef1aebf1c add function for parallel execution of SQL scripts 2022-09-19 11:52:17 +02:00
Sarah Hoffmann
26688ba35d add link to funding page 2022-09-19 10:30:58 +02:00
Sarah Hoffmann
a1158feeb8 Merge pull request #2818 from lonvia/better-geometry-index
Add index for lookup of addressable areas
2022-09-19 10:18:43 +02:00
Sarah Hoffmann
aef014a47d add indexes for lookup of addressable areas
The generic geometry index has become to slow for that purpose.
2022-09-18 16:57:12 +02:00
Sarah Hoffmann
d6a0947e5a update security policy for 4.1 version 2022-09-13 08:58:31 +02:00
Sarah Hoffmann
bc94318d83 mypy: fix new warnings due to external type updates 2022-09-05 17:39:35 +02:00
Sarah Hoffmann
d4c6e58b57 Merge pull request #2812 from mausch/patch-1
docs: fix links to rank docs
2022-09-05 17:27:09 +02:00
Mauricio Scheffer
66832cf0a5 docs: fix links to rank docs 2022-09-05 11:11:13 +01:00
Sarah Hoffmann
bcfe817212 Merge pull request #2799 from lonvia/fix-inclusions-with-extratags
Ignore irrelevant extra tags on address interpolations
2022-08-13 19:02:27 +02:00
Sarah Hoffmann
07d72f950b Merge pull request #2739 from tareqpi/collect_os_info.sh
integration of host system information script into Nominatim CLI tool
2022-08-13 19:02:14 +02:00
Sarah Hoffmann
dddfa3a075 ignore irrelevant extra tags on address interpolations
When deciding if an address interpolation has address information, only
look for addr:street and addr:place. If they are not there go looking
for the address on the address nodes. Ignores irrelevant tags like
addr:inclusion.

Fixes #2797.
2022-08-13 14:07:06 +02:00
Tareq Al-Ahdal
74019877a4 Added the feature of collecting host system information to the CI tests 2022-08-13 06:22:13 +08:00
Tareq Al-Ahdal
465d82a92f Integrated 'collect_os_info.py' into Nominatim's CLI tool 2022-08-13 06:18:10 +08:00
Tareq Al-Ahdal
49f889bf09 Enhanced and refactored 'collect_os_info.py'
Changed the script to functional programming paradigm to remove the big number of local attributes to decrease memory usage when running it. Additional OS info are now included.
2022-08-13 06:13:05 +08:00
Tareq Al-Ahdal
5e477e3b5b Merge remote-tracking branch 'upstream/master' into collect_os_info.sh 2022-08-13 05:53:39 +08:00
Sarah Hoffmann
67cfad6a2c Merge pull request #2798 from lonvia/more-rank-change-fixes
Invalidations when boundaries and places change their rank
2022-08-12 11:42:03 +02:00
Sarah Hoffmann
487e81fe3c more invalidations when boundary changes rank
When a boundary or place changes its address rank, all places where
it participates as address need to be potentially reindexed.
Also use the computed rank when testing place nodes against
boundaries. Boundaries are computed earlier.

Fixes #2794.
2022-08-12 09:48:46 +02:00
Sarah Hoffmann
18f525ac54 Merge pull request #2793 from lonvia/increase-minimum-results
Fix minimum number of results that are searched for
2022-08-09 20:08:45 +02:00
Sarah Hoffmann
e0c184e097 fix base number of returned results
The intent was to always search for at least 10 results.

Improves on #882.
2022-08-09 13:53:20 +02:00
Sarah Hoffmann
78716ab8b9 Merge pull request #2792 from lonvia/new-type-annotations
Adapt to new type annotations from typeshed
2022-08-09 13:52:20 +02:00
Sarah Hoffmann
8d082c13e0 adapt to new type annotations from typeshed
Some more functions frrom psycopg are now properly annotated.
No ignoring necessary anymore.
2022-08-09 11:06:54 +02:00
Sarah Hoffmann
196dc2a659 docs: add types-psutil requirement 2022-08-08 09:46:25 +02:00
Sarah Hoffmann
4fe797d704 remove mypy ignore for psutil.virtual_memory()
Now available in typeshed.
2022-08-08 09:44:45 +02:00
Sarah Hoffmann
3c188164ab Merge pull request #2789 from lonvia/update-osm2pgsql
Update osm2pgsql (fixes admin_level parsing)
2022-08-08 09:15:58 +02:00
Sarah Hoffmann
5330370076 update osm2pgsql (fix admin_level parsing) 2022-08-07 18:34:47 +02:00
Sarah Hoffmann
eecc73ea1a docs: fix dangling links 2022-08-05 15:29:43 +02:00
Sarah Hoffmann
8c73c0795e docs: update links to vagrant instructions 2022-08-05 15:27:11 +02:00
Sarah Hoffmann
7d68aa8f04 prepare release 4.1.0 2022-08-05 14:33:11 +02:00
Sarah Hoffmann
a0cd96e05e Merge pull request #2786 from lonvia/export-centroid-for-tokenizer
Export centroid to tokenizer
2022-08-01 11:38:24 +02:00
Sarah Hoffmann
b19c90b9a6 export centroid to tokenizer
May come in handy when developping sanitizers for an area smaller
than country size.
2022-07-31 22:10:58 +02:00
Sarah Hoffmann
e427712cb0 Merge pull request #2784 from lonvia/doscs-customizing-icu-tokenizer
Document the public API of sanitizers and token analysis modules
2022-07-31 19:15:50 +02:00
Sarah Hoffmann
9864b191b1 fix various typos 2022-07-31 17:10:35 +02:00
Sarah Hoffmann
e7574f119e add simple examples of sanitizers and token analysis 2022-07-29 17:15:25 +02:00
Sarah Hoffmann
51b6d16dc6 overhaul the token analysis interface
The functional split betweenthe two functions is now that the
first one creates the ID that is used in the word table and
the second one creates the variants. There no longer is a
requirement that the ID is the normalized version. We might
later reintroduce the requirement that a normalized version be available
but it doesn't necessarily need to be through the ID.

The function that creates the ID now gets the full PlaceName. That way
it might take into account attributes that were set by the sanitizers.

Finally rename both functions to something more sane.
2022-07-29 15:14:11 +02:00
Sarah Hoffmann
34d27ed45c move PlaceName into the generic data module 2022-07-29 11:42:20 +02:00
Sarah Hoffmann
094100bbf6 harmonize spelling
Stick with the American spelling of Analyze.
2022-07-29 10:52:01 +02:00
Sarah Hoffmann
c8873d34af harmonize interface of token analysis module
The configure() function now receives a Transliterator object instead
of the ICU rules. This harmonizes the parameters with the create
function.
2022-07-29 10:43:07 +02:00
Sarah Hoffmann
f0d640961a add documentation for custom token analysis 2022-07-29 09:41:28 +02:00
Sarah Hoffmann
3746befd88 add documentation for sanitizer interface
Also switches mkdocstrings to 0.18 with the rather unfortunate
consequence that now mkdocstrings-python-legacy is needed as well.
2022-07-28 22:00:29 +02:00
Sarah Hoffmann
a8b037669a Merge pull request #2780 from lonvia/python-modules-in-project-directory
Support for external sanitizer and token analysis modules
2022-07-28 21:58:04 +02:00
Sarah Hoffmann
d819036daa add support for external token analysis modules 2022-07-25 16:27:22 +02:00
Sarah Hoffmann
6d41046b15 add support for external sanitizer modules 2022-07-25 16:10:19 +02:00
Sarah Hoffmann
7b7203c149 add function for loading plugin modules
Loads modules for configurable code like tokenizers, sanitizers, etc.
Supports internal modules, external libraries and code from the
project directory.
2022-07-25 16:10:10 +02:00
Sarah Hoffmann
95d4061b2a Merge pull request #2775 from lonvia/remove-centos-instructions
Remove vagrant scripts for CentOS
2022-07-25 10:29:32 +02:00
Sarah Hoffmann
375b57a96a vagrant: remove proj dependency and only require php-cli 2022-07-24 10:24:18 +02:00
Sarah Hoffmann
12ace4329d remove CentOS installation instructions
Fixes #2601.
2022-07-24 10:22:22 +02:00
Sarah Hoffmann
09e0be0e39 Merge pull request #2774 from lonvia/parameter-arrays
Ignore URL parameters in array notation
2022-07-23 23:56:32 +02:00
Sarah Hoffmann
cd4bcea894 ignore API parameters in array notation
PHP automatically parses parameters in an array notation(foo[]) into
array types. Ignore these parameters as 'unknown'.

Fixes #2763.
2022-07-23 10:51:44 +02:00
Sarah Hoffmann
1bee151fe3 Merge pull request #2772 from kianmeng/fix-typos
docs: fix typos
2022-07-20 17:13:30 +02:00
Kian-Meng Ang
f5e52e748f docs: fix typos 2022-07-20 22:05:31 +08:00
Sarah Hoffmann
b7f6c7c76a docs: slightly increase recommended hardware requirements 2022-07-20 10:16:23 +02:00
Sarah Hoffmann
bc7f6209d8 Merge pull request #2770 from lonvia/typed-python
Type annotations for Python code
2022-07-19 09:03:30 +02:00
Sarah Hoffmann
372a548c28 CI: remove installation of pip on Ubuntu 20 2022-07-18 12:19:04 +02:00
Sarah Hoffmann
5aad105c73 add explicit cast for fetchone 2022-07-18 10:18:51 +02:00
Sarah Hoffmann
f40c83d025 CIL use psutil type stubs 2022-07-18 09:55:58 +02:00
Sarah Hoffmann
83054af46f remove typing_extensions requirement
The typing_extensions package is only necessary now when running mypy.
It won't be used at runtime anymore.
2022-07-18 09:55:58 +02:00
Sarah Hoffmann
cb81f11422 CI: make type checking strict 2022-07-18 09:55:58 +02:00
Sarah Hoffmann
a849f3c9ec add type annotations for command line functions 2022-07-18 09:55:54 +02:00
Sarah Hoffmann
25d854dc5c add type annotations for Tiger import function 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
9963261d8d add type annotations to special phrase importer 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
459ab3bbdc add type annotations to database check functions 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
a21d4d3ac4 add type annotations for database import functions 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
4da1f0da6f add type annotations for migrations 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
17bbe2637a add type annotations to tool functions 2022-07-18 09:54:27 +02:00
Sarah Hoffmann
6c6bbe5747 add type annotations for ICU tokenizer 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
18b16e06ca add type annotations for legacy tokenizer 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
e37cfc64d2 add type annotations to ICU tokenizer helper modules 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
77510f4a3b add typing extensions for Ubuntu22.04 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
d35e3c25b6 add type annotations for token analysis
No annotations for ICU types yet.
2022-07-18 09:47:57 +02:00
Sarah Hoffmann
62eedbb8f6 add type hints for sanitizers 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
5617bffe2f add type annotations for indexer 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
8adab2c6ca add typing information for postcode formatter 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
d0c44431d0 add typing information for place_info and country_info 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
282a61ce51 add typing information for utils submodule 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
7a1d22ff15 type annotations for non-blocking DB connection 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
0dff71a410 add type annotations for SQL preprocessor 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
26f30bff28 add type annotation to DB utils
As a cursor is needed as type, make this a public type.
2022-07-18 09:47:57 +02:00
Sarah Hoffmann
e6775e713c add typing information to DB properties 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
69f9122bef add typing annotations for DB status module
Requires TypedDict which is only available from Python 3.8. Require
therefore typing_extensions to make the functions available for
earlier Python versions.
2022-07-18 09:47:57 +02:00
Sarah Hoffmann
fc254fc744 adapt use of Connection in bdd tests to name change 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
845c43137a add type annotations to freeze functions 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
aaf2b6032e fix uses of config.get_path() to expect None 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
c4928c646d define type for enivronment dictionaries 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
f12fe54d2b restrict return type more 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
fc03c0266a add type annotations to exec_utils 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
7b042de300 CI: install type info for psycopg2 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
681aad7e0d avoid issues with Python < 3.9 and linting 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
f22fa992f7 move complex typing annotations to extra file 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
992e6f72cf type annotations for DB utils 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
e6ee3c772c type annotations for DB connection 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
9d716f0f7d mypy: add psycopg2 typing info from typeshed 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
95ed95c616 add type annotations to config module 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
bf36f33e79 add type annotations for version.py 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
58ab8319b9 mypy: ignore dotenv library 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
a87cb77ce8 document use of mypy 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
2be45a35b4 CI: add mypy to tests 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
9b636fdc10 mypy: minimal annotations to enable a clean run 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
b1903f0fbf Merge pull request #2761 from lonvia/repair-index-analysis
Repair `admin --analyse-indexing`
2022-07-18 09:38:08 +02:00
Sarah Hoffmann
00f5b78160 Merge pull request #2764 from otbutz/patch-4
Remove legacy Postgres options
2022-07-13 15:51:47 +02:00
otbutz
d58061473e Remove legacy Postgres options 2022-07-12 09:49:10 +02:00
Sarah Hoffmann
33cb925f2e Merge pull request #2691 from mtmail/ubuntu-22
Vagrant and CI tests for Ubuntu 22.04
2022-07-11 15:37:51 +02:00
marc tobias
c70ca7f57b In tests for PHP 8 disable Just-in-time, it conflicts with tools that determine coverage 2022-07-09 22:03:48 +02:00
Marc Tobias
a6dab5e300 Vagrant and CI tests for Ubuntu 22.04 2022-07-09 22:03:48 +02:00
Sarah Hoffmann
7cafec0750 decode_json() always create arrays instead of objects 2022-07-09 09:10:21 +02:00
Sarah Hoffmann
4b12d52ef5 convert admin --analyse-indexing to new indexing method
A proper run of indexing requires the place information from the
analyzer. Add the pre-processing of place data, so the right
information is handed into the update function.
2022-07-07 16:20:08 +02:00
Sarah Hoffmann
300612c5a8 Merge pull request #2760 from lonvia/reorganize-data-classes
Code cleanup: move some common code into the data submodule
2022-07-07 16:12:11 +02:00
Sarah Hoffmann
856925d19b remove analyze() from PlaceInfo class
The function creates circular dependencies.
2022-07-07 12:06:58 +02:00
Sarah Hoffmann
cbbcbb1fd7 move country_info into data submodule 2022-07-06 11:08:36 +02:00
Sarah Hoffmann
bce93d60bd move PlaceInfo into data submodule
This data structure is shared between indexer and tokenizer.
2022-07-06 10:54:47 +02:00
Sarah Hoffmann
69e51aebab test: avoid column names with upper-case letters
This may cause problems when the column names get quoted.
2022-07-05 09:12:55 +02:00
Sarah Hoffmann
8ac133f2ee CI: remove unneed stuff to make space for DB 2022-07-03 16:42:57 +02:00
Sarah Hoffmann
67996929e0 Merge pull request #2706 from mtmail/php-fixes-php7-vs-php8
PHP 8 behaves slightly different with in_array and usort
2022-07-03 11:28:52 +02:00
Marc Tobias
ccf119206d PHP 8 behaves slightly different with in_array and usort 2022-07-03 10:55:34 +02:00
Sarah Hoffmann
bc63f10057 fix syntax error with tablespaces 2022-06-30 09:19:16 +02:00
Sarah Hoffmann
6f15306766 docs: replace deprecated pages option
Fixes #2661.
2022-06-29 20:30:28 +02:00
Sarah Hoffmann
161d83af5b fix handling of zero importance
To avoid importance becoming zero and cancelling out other weights,
df008d99f5 introduced a minimum value
for importance. That broke importances for interpolated addresses,
which are less than zero.

Instead of setting a minimum, set zero importances to a very small
value.

Fixes #2753.
2022-06-29 17:54:30 +02:00
Sarah Hoffmann
3bf3b894ea Merge pull request #2757 from lonvia/filter-postcodes
Add filtering, normalisation and variants for postcodes
2022-06-24 21:09:41 +02:00
Sarah Hoffmann
536f08f33a ignore 5+ postcodes in the US for now
Hierarchical postcodes need a different treatment.
2022-06-24 19:24:22 +02:00
Sarah Hoffmann
3dd7410bb7 bdd: correctly skip postcode tests for legacy 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
93d5be097a bdd: do not expect legacy word table to be without empty tokens
It can happen for bogus names and this will not get fixed anymore.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
6eb9044353 adapt search algorithm to new postcode format in word 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
612d34930b handle postcodes properly on word table updates
update_postcodes_from_db() needs to do the full postcode treatment
in order to derive the correct word table entries.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
5be320368c add documentation for postcode customization 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
7f2ad4ac7e fix linting issue 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
0f00f4968c fix up BDD tests for postcode changes
Includes smaller code fixes found by the tests.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
37b2c6a830 port legacy tokenizer to new postcode handling
Also documents the changes to the SQL functions of the tokenizer.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
e86db3001f fix postcode pattern for Mozambique
Optional groups are not implemented yet.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
7b6ec4fc6c add tests for discarding bad postcodes 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
67dfa38e60 fix liniting problems 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
2eca9fc8af cache postcode normalization 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
b5e5efc131 only add well-formatted postcodes to location table 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
80ea13437d move postcode matcher in a separate file 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
bf86b45178 move postcode centroid computation to Python 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
4885fdf0f9 add class for online centroid computation 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
b7704833e4 icu: switch postcodes to using the pre-formatted one 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
ca7b46511d introduce and use analyzer for postcodes 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
18864afa8a postcodes: introduce a default pattern for countries without postcodes 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
5ba75df507 postcode: generate a generic form 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
9cf700e85d add postcodes for most of the remaining countries
Now includes all postcodes that have optional parts.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
9172696324 postcodes: add support for optional spaces 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
49626ba709 add postcode formats with optional country code
If the country code is not part of the mandatory output, the
country code filter will do the correct handling.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
baee6f3de0 postcodes: strip leading country codes 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
28ab2f6048 add postcodes patterns without optional spaces 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
90d4d339db initial postcode cleaner for simple patterns
Moves postcodes that are either in countries without a postcode
system or don't correspond to the local pattern for postcodes into
a field for a normal address part. Makes them searchable but not as
a special address. This has two consequences: they are no longer a
skippable part of the address and the postcodes cannot be searched
on their own.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
6e0014e138 add postcode patterns for numeric postcodes
Adds patterns for countries that have simple numeric-only postcodes.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
8080625747 remove postcodes from countries that don't have them
The postcodes will only be removed as a 'computed postcode' they
are still searchable for the given object.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
21fb501699 add info about countries without a postcode 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
0cd3a1b9bd avoid near searches in very large areas
At some point the contains call becomes too expensive.
2022-06-23 23:42:09 +02:00
Sarah Hoffmann
8de483a45b Merge pull request #2755 from Luflosi/fix-typo
Fix typo
2022-06-20 22:23:36 +02:00
Luflosi
3ea87169ac Fix typo 2022-06-20 20:41:00 +02:00
Sarah Hoffmann
42d16d8296 Merge pull request #2751 from mtmail/issue-2750
Documentation fix: should be "nominatim refresh"
2022-06-20 10:21:06 +02:00
marc tobias
adf3ae004f Documentation fix: should be "nominatim refresh" 2022-06-20 02:32:23 +02:00
Sarah Hoffmann
fced1172c4 Merge pull request #2746 from bgo-eiu/patch-2
Added additional languages for Pakistan in country settings
2022-06-18 09:40:47 +02:00
Sarah Hoffmann
299e98776e Merge pull request #2749 from stefkiourk/patch-1
Typos and syntax on Reverse.md
2022-06-17 22:11:55 +02:00
Stef Ki
b803505402 Typos and syntax on Reverse.md 2022-06-17 21:01:38 +02:00
Sarah Hoffmann
8fb9795d04 Merge pull request #2748 from lonvia/bdd-grid-origin
BDD tests: remove support for scenes
2022-06-17 15:25:29 +02:00
Sarah Hoffmann
d8623d6818 bdd: remove support for scenes
Only keep support for the special point geometry 'country:xx'.
2022-06-17 11:54:18 +02:00
Sarah Hoffmann
6c58a4c46c bdd: move query tests from scene to grid description 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
19f67e167c bdd: remove step for scene setup 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
00d8df6fc3 bdd: move update tests from scenes to grid descriptions 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
02068aec7f bdd: move import tests from scenes to grid descriptions 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
3493d317e4 bdd: clear lof buffer after a successful import run 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
a2b486a5b0 bdd: allow to set an origin of the grid 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
3d0f8bdc39 Merge pull request #2745 from lonvia/city-in-city-fix
Improve hierarchy computation for place areas
2022-06-16 15:36:39 +02:00
bgo-eiu
04644102f2 added additional languages for pakistan in country settings 2022-06-16 06:26:44 -04:00
Sarah Hoffmann
f833cc80df use default ranks when reorganising rank_address
When shifting address ranks, the evaluation is always done against
unshifted address ranks on import because the objects we compare against
have not been indexed yet. This changes for updates when the object have
been touched in the meantime. To ensure consistent behaviour across
imports and updates, always use the  unshifted address ranks.
2022-06-16 11:20:23 +02:00
Sarah Hoffmann
df0142678a improve address ordering with mixes of place and admin areas
Resolves a couple of situations where a mixed use of places areas and
administrative boundaries would result in a hierarchy that did not
properly respect the contains relation.
2022-06-16 10:44:16 +02:00
Sarah Hoffmann
800240550b Merge pull request #2737 from lonvia/reset-linking-ranks
Fix rank inheritance from linked places
2022-06-06 09:29:32 +02:00
Sarah Hoffmann
15cf7dd416 add testcase for #2551
This test proves that places that are linked need to be reindexed.
2022-06-05 21:39:17 +02:00
Sarah Hoffmann
2c05fc858a fix rank inheritance from linked places
When taking over the address rank from a linked place, it needs
to be the originally computed rank, not the one that might have
been adjusted in the meantime. The adjustment was made under the
assumption that the node is not linked.
2022-06-05 19:38:14 +02:00
Sarah Hoffmann
a024c7665c Merge pull request #2736 from lonvia/reverse-interpolation-index-order
Change indexing order for interpolations and non-addressable objects
2022-06-03 10:42:54 +02:00
Sarah Hoffmann
cbb4749996 change indexing order for interpolations
Interpolations are now indexed after rank 30 objects. The housenumber
nodes no longer need information from the interpolations while the
interpolations can make use of precomputed postcodes.
2022-06-02 15:16:46 +02:00
Sarah Hoffmann
4b0d9f71e8 Merge pull request #2735 from lonvia/geocodejson-type-reverse
Also fix type output in geocodejson for reverse
2022-06-01 22:14:06 +02:00
Sarah Hoffmann
218c56f9a6 use getattr() instead of __getattr__
Makes the linter happy.
2022-06-01 21:26:13 +02:00
Sarah Hoffmann
a35eda3d2a also fix type output in geocodejson for reverse 2022-06-01 20:46:08 +02:00
Sarah Hoffmann
8a0e3e2f3d Merge pull request #2732 from lonvia/fix-ordering-address-parts
Fix order when searching for addr:* components
2022-05-31 20:26:05 +02:00
Sarah Hoffmann
12a3d51bcc Merge pull request #2731 from lonvia/cleanup-special-phrases
Minor code reorganisation around special phrase parsing
2022-05-31 17:13:56 +02:00
Sarah Hoffmann
60367d95dd Merge pull request #2730 from lonvia/exclude-inclusion-tag
Exclude addr:inclusion from search
2022-05-31 17:13:37 +02:00
Sarah Hoffmann
bd0e157b91 fix order when searching for addr:* components
When matching addr:* components the preference was given to
matches that do not intersect with the place.
2022-05-31 16:57:37 +02:00
Sarah Hoffmann
1821f68ca0 exclude addr:inclusion from search 2022-05-31 14:19:19 +02:00
Sarah Hoffmann
b5ac546275 CI: always use the latest version of pylint
This makes it easier to reproduce issues locally.
2022-05-31 09:12:26 +02:00
Sarah Hoffmann
46689df668 custom comparison for SpecialPhrase
Duplicate elemination only works when a custom hash/equal function
is implemented that is based on the members.
2022-05-30 16:30:41 +02:00
Sarah Hoffmann
e828d0d3f7 move quoting hack to wiki loader
The bad quotes around the type for special phrases
specifically occure in the Wiki pages, so it should be
removed by the loader and not in the generic SpecialPhrase
object.
2022-05-30 14:40:33 +02:00
Sarah Hoffmann
cce0e5ea38 convert special phrase loaders to generators
Generators simplify the code quite a bit compared to the previous
Iterator approach.
2022-05-30 14:12:46 +02:00
Sarah Hoffmann
042e314589 remove the language parameter in the SPWikiLoader
Languages must always be configured through config or environment.
Also use monkeypatched environment in tests.
2022-05-30 10:26:20 +02:00
Sarah Hoffmann
61d813bfef add get_str_list() for config
Converts a config value written as a comma-sparated list into
a Python list of strings.
2022-05-29 13:53:50 +02:00
Sarah Hoffmann
ecee5cf801 Merge pull request #2728 from lonvia/allow-more-partials
Allow search for partials consisting of 3 or more words
2022-05-27 18:09:11 +02:00
Sarah Hoffmann
9e4e913bf7 allow search for partials consisting of 3 or more words
The search query builder currently rejects searches for partial
names only, when the partial terms are all very frequent to avoid
queries that return too many results.

This change slightly relaxes the condition to allow the search when
there are 3 or more partial terms. With so many terms the number
of matches should be managable.
2022-05-27 16:49:14 +02:00
Sarah Hoffmann
98fc528d8e Merge pull request #2715 from otbutz/patch-2
Simplify apache rewrite rules
2022-05-24 14:40:28 +02:00
otbutz
d1cd2d1674 Change to regular regex group 2022-05-24 11:32:59 +02:00
Sarah Hoffmann
b593fe9c3e Merge pull request #2718 from nslxndr/fix-log-endtime
Undefined offset in error log
2022-05-23 16:25:41 +02:00
Sarah Hoffmann
6ca6725f6e Merge pull request #2722 from lonvia/fix-relinking-on-updates
Fix bug with keeping linking on updates
2022-05-23 11:36:20 +02:00
Sarah Hoffmann
1d203fdb3c fix bug with keeping linking on updates
When moving the finding of linked places to the precomputation stage,
it was also moved before the statement where the linked_place_id was
removed from the linkee. The result was that the current linkee was
excluded when looking for a linked place on updates because it was
still linked to the boundary to be updated.

Fixed by allowing to either keep the linkage or change to an unlinked
place.
2022-05-23 10:55:10 +02:00
Sandor Nagy
3f30699131 correct end time computation 2022-05-20 23:11:00 +02:00
otbutz
22bd9c4993 Simplify apache rewrite rules 2022-05-20 10:15:28 +02:00
Sarah Hoffmann
4654701c10 Merge pull request #2713 from lonvia/remove-county-nodes-in-canada
Remove county nodes in Canada from addresses
2022-05-19 10:21:09 +02:00
Sarah Hoffmann
8a67ddcb2b remove county nodes in Canada from addresses
Canada has complete coverage for administrative boundaries on
county level. Removing the county nodes from the addresses avoids error
due to a wide-spread doubling of place nodes for city counties.
2022-05-18 10:19:05 +02:00
Sarah Hoffmann
ab71f17c47 Merge pull request #2710 from lonvia/offline-import-mode
Assorted performance improvements for BDD tests
2022-05-12 11:08:29 +02:00
Sarah Hoffmann
f314abcfe1 bdd: restrict imports to four languages
This mainly restricts the number of country names that are loaded.
2022-05-11 16:40:53 +02:00
Sarah Hoffmann
2d1a22705f Merge pull request #2709 from lonvia/less-strict-country-assignment
Be more strict with country assignments
2022-05-11 16:24:47 +02:00
Sarah Hoffmann
e74e577029 bdd: recreate functions on template DB
Avoids calling function refresh on every scenario. The content won't
change between runs.
2022-05-11 15:50:22 +02:00
Sarah Hoffmann
aa0ae610c6 avoid calling OSM servers during bdd tests 2022-05-11 15:33:01 +02:00
Sarah Hoffmann
dc6c4bf22e add offline import mode
In offline mode no attempts are made to download data from the internet.
At the moment that only concerns the computation of the database date.
It contacts the main API to get the date.
2022-05-11 15:03:02 +02:00
Sarah Hoffmann
a7a5f0161f Merge pull request #2708 from lonvia/use-format-literals
Assorted fixes for new pylint warnings
2022-05-11 14:29:56 +02:00
Sarah Hoffmann
739fe1c2c4 no longer allow fuzzy assignment of country
The fallback country boundaries already contain a sufficiently large
part of the water area, so there is no need to extend the country
assignment even more. Features outside countries should not show a
country in their address.
2022-05-11 11:54:25 +02:00
Sarah Hoffmann
3ba975466c fix spacing
Some versions of pylint are oddly picky.
2022-05-11 10:36:09 +02:00
Sarah Hoffmann
d14a585cc9 pylint: disable no-self-use check
This checker encourages bad behaviour (namely changing the static
status of a function during inheritence) and will be made optional
in upcoming versions of pylint.
2022-05-11 10:25:00 +02:00
Sarah Hoffmann
7f7a7df3a2 solve assorted issue with newer pylint versions
Includes more use of 'with', adding encodings to open statements
and a couple of issues with parameter renaming.
2022-05-11 10:22:14 +02:00
Sarah Hoffmann
5d5f40a82f use context management when processing Tiger data 2022-05-11 09:48:56 +02:00
Sarah Hoffmann
ae6b029543 remove redundant 'u' prefixes for unicode strings 2022-05-11 09:48:56 +02:00
Sarah Hoffmann
bb2bd76f91 pylint: avoid explicit use of format() function
Use psycopg2 SQL formatters for SQL and formatted string literals
everywhere else.
2022-05-11 09:48:56 +02:00
Sarah Hoffmann
4e1e166c6a add a function to return a formatted version
Replaces the various repeated format strings throughout the code.
2022-05-11 09:01:24 +02:00
Sarah Hoffmann
5ff35d9984 Merge pull request #2707 from lonvia/make-icu-tokenizer-the-default
Make ICU tokenizer the default
2022-05-11 08:52:49 +02:00
Sarah Hoffmann
c6a426a885 no longer need postgresql-server-dev packages 2022-05-10 18:33:51 +02:00
Sarah Hoffmann
11103268e9 make legacy tokenizer tests the extra on CI 2022-05-10 18:33:34 +02:00
Sarah Hoffmann
b332b1ae23 Merge pull request #2704 from mtmail/migrate-phpunit-xml-schema
PHPUnit 9 changed configuration schema slightly
2022-05-10 17:44:34 +02:00
Sarah Hoffmann
7e70e5f503 always state encoding when opening files in text mode
Also applies to Path.write_text().
2022-05-10 15:36:29 +02:00
Marc Tobias
99fa23040a PHPUnit 9 changed configuration schema slightly 2022-05-10 15:20:43 +02:00
Sarah Hoffmann
adeebec32a switch tests to ICU tokenizer as default 2022-05-10 14:54:50 +02:00
Sarah Hoffmann
b93ef23d3f add migration hint for the new tokenizer default 2022-05-10 12:07:21 +02:00
Sarah Hoffmann
4002bee0c1 make ICU the default tokenizer 2022-05-10 12:02:50 +02:00
Sarah Hoffmann
ed6fda6968 Merge pull request #2702 from lonvia/move-country-names-into-includes
Clean up country name settings
2022-05-10 09:21:16 +02:00
Sarah Hoffmann
2ae13c5583 Merge pull request #2695 from mtmail/git-commit-hash-to-version
add git commit hash to 'nominatim --version' output
2022-05-10 09:14:15 +02:00
Marc Tobias
821dabb138 add git commit hash to --version output 2022-05-09 23:56:13 +02:00
Sarah Hoffmann
9d468f6da0 support arbitrary prefixes in country name list
This means we can now get rid of the last special cases for names.
2022-05-09 11:55:26 +02:00
Sarah Hoffmann
3a8ddf736e move country names into separate include files 2022-05-09 11:55:26 +02:00
Sarah Hoffmann
720c7b7519 Merge pull request #2696 from mtmail/norminatyn-typos
fix typos of name Nominatim
2022-05-05 10:04:55 +02:00
Marc Tobias
0de83c4a51 fix typos of name Nominatim 2022-05-05 01:04:47 +02:00
Sarah Hoffmann
8c073993ef Merge pull request #2693 from mtmail/nominatim-cli-version
new "nominatim --version" global CLI argument
2022-05-04 09:14:35 +02:00
Marc Tobias
a79ab41782 new nominatim --version CLI argument 2022-05-04 01:33:25 +02:00
Sarah Hoffmann
f509526e5c Merge pull request #2681 from lonvia/improve-geocodejson
Fix 'type' field in the geocodejson response
2022-05-02 16:05:02 +02:00
Sarah Hoffmann
896199c9d4 Merge pull request #2687 from lonvia/check-for-wikipedia
Add check for presence of wikipedia importance
2022-05-02 16:04:32 +02:00
Sarah Hoffmann
08672cdf0a explicit cast for osm_type parameter in SQL needed
Otherwise PostgreSQL won't correctly pick up the index
condition.
2022-05-02 14:12:17 +02:00
Sarah Hoffmann
8163723e22 respect exclude_place_ids for housenumber search 2022-05-02 11:44:10 +02:00
Sarah Hoffmann
32a5f812a9 Merge pull request #2689 from lonvia/relations-in-associated-street
Accept any OSM type in street member of associatedStreet
2022-05-02 11:42:34 +02:00
Sarah Hoffmann
372874e89a accept any OSM type in street member of associatedStreet
This is needed for pedestrian areas mapped as multipolygons
and consequently as relations. The lookup in placex guarantees
that the referenced OSM object is indeed a street.

Fixes #2669.
2022-05-02 09:48:51 +02:00
Sarah Hoffmann
8ebb8ee304 Merge pull request #2686 from mtmail/ubuntu20-php-fpm-version
Install-on-Ubuntu-20.sh - correct php version
2022-04-29 14:16:44 +02:00
Sarah Hoffmann
3d58254462 skip wikipedia table test on reverse-only installations
Wikipedia importances are not imported on reverse-only imports.
2022-04-29 14:12:55 +02:00
Marc Tobias
95de411a81 Install-on-Ubuntu-20.sh - correct php-fpm version 2022-04-29 13:24:15 +02:00
Sarah Hoffmann
439d17569d Merge pull request #2685 from lonvia/show-inherited-housenumber
Keep address parts inherited from surrounding buildings after indexing
2022-04-29 12:15:33 +02:00
Sarah Hoffmann
8bcdba1a14 add check for wikipedia importance data
Adds a new check level WARNING because missing wikipedia importances
are not necessarily an error. If the database is run for reverse
requests only, then it is fine to go without them.
2022-04-29 12:14:53 +02:00
Sarah Hoffmann
37e5f07d83 Merge pull request #2684 from lonvia/translit-keep-spacing-marks
ICU: better letter identification in normalization
2022-04-29 10:38:28 +02:00
Sarah Hoffmann
3c68b12176 keep inherited address parts after indexing
The inherited housenumber is needed for display output. We can't
take the one from the housenumber field because it is already
normalized. Remove the inherited address only when reindexing.

Fixes #2683.
2022-04-28 21:38:00 +02:00
Sarah Hoffmann
63dc4b39bc ICU: better letter identification in normalization
The Letter class does not include non-spacing marks that can also
have a consonant or vowel meaning, especially in Indian languages.
Use the alnum propoerty instead which includes them all. Also
include the vowel-canceling Virama, which is not a letter by itself
but changes the transliteration.
2022-04-28 18:23:17 +02:00
Sarah Hoffmann
0ea099bfd5 mention the breaking API change in the migratioin docs 2022-04-27 11:52:53 +02:00
Sarah Hoffmann
310776671b adapt docs to geocodejson changes 2022-04-27 11:50:12 +02:00
Sarah Hoffmann
4b84de400b geocodejson: add osm_key and osm_value fields
Return OSM main tag information in geocodejson. This is not part
of the official spec but can be useful to get more detailed information
of the object type. Brings the Nominatim output closer to what
Photon produces.
2022-04-27 10:58:25 +02:00
Sarah Hoffmann
8677da2a72 geocodejson: type should contain the general feature class
'type' so far contained the value of the OSM tag. That is rarely
helpful because it is not a restricted class of values. Change
this to contain the types as defined in the geocodejson spec,
which correspond to the address layer names.
2022-04-27 10:53:12 +02:00
Sarah Hoffmann
de828b723e Merge pull request #2678 from lonvia/address-part-order
Change selection of primary address part for ways that cross boundaries
2022-04-22 20:32:10 +02:00
Sarah Hoffmann
a515761193 further tweaking of address distance
For point features, keep using the distance to centroid.
For area features, add a tie breaker for the case where the
center point falls on the boundary.
2022-04-22 14:32:19 +02:00
Sarah Hoffmann
784dad866f change distance computation between place and address part
Instead of computing the distance to the centroid of the area
compute the distance of the area to the centroid of the feature.
This means we give preference to the area that covers the centroid.
It's still a heuristics but one that is a bit less random.
2022-04-22 14:32:09 +02:00
Sarah Hoffmann
403e6f7e5c Merge pull request #2666 from lonvia/admin-command-for-forced-indexing
Admin command for forced indexing
2022-04-14 21:44:08 +02:00
Sarah Hoffmann
27f7c7fd88 add documentation for new refresh command 2022-04-14 15:10:24 +02:00
Sarah Hoffmann
4f59644cc2 add tests for new data invalidation functions 2022-04-14 14:52:13 +02:00
Sarah Hoffmann
c3f1d34b71 add new commands for forced invalidation before indexing 2022-04-14 11:05:43 +02:00
Sarah Hoffmann
f8f20899a3 recommend PostgreSQL 13+
See https://github.com/osm-search/Nominatim/discussions/2659.
2022-04-14 09:21:25 +02:00
Sarah Hoffmann
a319b0a0b4 docs: different default for format on osm.org
Add a note that the format parameter is needed for
nominatim.openstreetmap.org for historical reasons.
2022-04-08 17:13:42 +02:00
Sarah Hoffmann
604ddc0f9d Merge pull request #2660 from lonvia/pyosmium-contextmanager
Support using ReplicationServer as contextmanager
2022-04-08 17:07:33 +02:00
Sarah Hoffmann
126cabacb8 support new ReplicationServer as contextmanager 2022-04-07 17:58:04 +02:00
Sarah Hoffmann
f78ae969e9 Merge pull request #2466 from I70l0teN4ik/state-code
add ISO3166-2-lvl<admin_level> field to response address details
2022-04-07 16:39:50 +02:00
Artem Ziablytskyi
d1479072ae fix bdd tests and docs 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
9a56e53d50 use ISO3166-2-lvl<admin_level> instead of typeLabel prefix 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
7899654675 proper instruction to import data 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
a79c1bda9b Fix API docs and Vagrant instructions to import data 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
665fae8343 Fix API docs and Vagrant instructions to import data 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
6bee188f24 Change the key to <addresspart_type>-ISO3166-2 to support xml response correctly 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
82dbcbb12a add <addresspart_type>:ISO3166-2 field to response address details 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
76c146f326 add state_code field to response address details 2022-04-07 16:37:51 +02:00
Sarah Hoffmann
fd4ab3f262 Merge pull request #2629 from tareqpi/country-names-yaml-configuration
Move default country names into yaml configuration
2022-04-04 09:04:25 +02:00
Tareq Al-Ahdal
cfbd3652ef fix linting error 2022-04-02 00:14:18 +08:00
Tareq Al-Ahdal
e9c14979a4 remove the conversion to json for name 2022-04-01 22:54:14 +08:00
Tareq Al-Ahdal
e9f979b67b 'read_config' is no longer a fixture
add 'read_config' to test cases that need it
2022-04-01 22:52:17 +08:00
Tareq Al-Ahdal
a323b8f63a test for loading special characters from country_settings.yaml 2022-04-01 21:58:57 +08:00
Tareq Al-Ahdal
9411c14fd2 fix reset country info before loading custom data 2022-04-01 21:55:34 +08:00
Tareq Al-Ahdal
8525e7542f custom country config loads correctly 2022-04-01 21:46:56 +08:00
Sarah Hoffmann
7dabbc5462 Merge pull request #2655 from lonvia/migration-internal-country-name
Add migration for new country name handling in ICU tokenizer
2022-03-31 18:04:18 +02:00
Sarah Hoffmann
de18cd1523 add test for new table_has_column function 2022-03-31 15:55:20 +02:00
Sarah Hoffmann
36a1560117 add migration to mark internal country names 2022-03-31 15:55:20 +02:00
Tareq Al-Ahdal
b5f311d6bc separate unit test function into three functions 2022-03-30 22:06:59 +08:00
Sarah Hoffmann
83dd4362aa remove temporary file 2022-03-30 15:13:31 +02:00
Sarah Hoffmann
a71cab639b Merge pull request #2650 from mtmail/update-lookup-examples
documentation: update example output of lookup endpoint
2022-03-28 20:21:45 +02:00
Marc Tobias
5e0155ae29 documentation: update example output of lookup endpoint 2022-03-28 16:41:10 +02:00
Tareq Al-Ahdal
afef83b1c6 fix edge case handling when 'names' is not there 2022-03-25 22:25:55 +08:00
Tareq Al-Ahdal
9db13aac72 Added unit tests for loading country info from yaml file 2022-03-25 22:22:44 +08:00
Tareq Al-Ahdal
9a1f891998 fix linting error 2022-03-24 13:27:24 +08:00
Tareq Al-Ahdal
7bb7ed468a fix storing of escape sequences in database 2022-03-24 13:18:44 +08:00
Tareq Al-Ahdal
4fc61d260f clean up 2022-03-24 13:16:59 +08:00
Tareq Al-Ahdal
1ceb6926b7 merge of insert query + modularity enhancements 2022-03-24 13:13:38 +08:00
Sarah Hoffmann
d33c82cb66 Merge pull request #2641 from lonvia/reinit-tokenizer-dir
Transparantly reinitialize tokenizer directory when necessary
2022-03-20 21:46:07 +01:00
Sarah Hoffmann
4c66c35ed6 reinit the tokenizer directory on website refresh
This means the project directory is usable again, once refresh --website
was run.
2022-03-20 17:49:22 +01:00
Sarah Hoffmann
54db1d8915 docs: copying project dir no longer necessary 2022-03-20 16:01:27 +01:00
Sarah Hoffmann
a0ed80d821 restore the tokenizer directory when missing
Automatically repopulate the tokenizer/ directory with the PHP stub
and the postgresql module, when the directory is missing. This allows
to switch working directories and in particular run the service
from a different maschine then where it was installed.
Users still need to make sure that .env files are set up correctly
or they will shoot themselves in the foot.

See #2515.
2022-03-20 11:31:42 +01:00
Sarah Hoffmann
e65913d376 cache loaded configuration
Reading the YAML files is fairly expensive and slows down the BDD tests
significantly. Therefore cache the results from reading the file.
2022-03-20 11:30:03 +01:00
Sarah Hoffmann
2f266d946b Merge pull request #2639 from lonvia/remove-operator
No longer use operator tag as a name
2022-03-18 16:42:18 +01:00
Tareq Al-Ahdal
b6ac4ad837 fix linting error 2022-03-18 21:05:47 +08:00
Sarah Hoffmann
42f0282f14 remove special case for operator names
The OSM data has been sufficiently cleaned up by now that
the operator no longer needs to be considered a name tag.
Use 'brand' as the searchable alternative.
2022-03-18 10:48:53 +01:00
Tareq Al-Ahdal
af739d2f57 modify logic of _include_key function 2022-03-18 06:52:16 +08:00
Tareq Al-Ahdal
fa2aca1cbc adding prefix to keys is now more configurable 2022-03-18 06:20:00 +08:00
Tareq Al-Ahdal
943e5fe699 Revert the removal of new line at the end of the file 2022-03-18 06:07:48 +08:00
Tareq Al-Ahdal
d09670d208 modify logic to prepend 'name:' to keys' 2022-03-18 06:01:25 +08:00
Tareq Al-Ahdal
83b4b8d9c1 reattach 'name:' prefix to keys 2022-03-18 05:46:23 +08:00
Tareq Al-Ahdal
d32a7c1888 initialize an empty dictionary for nested name key 2022-03-18 02:50:33 +08:00
Tareq Al-Ahdal
d0c1b73fb3 remove duplicate values 2022-03-18 02:43:42 +08:00
Tareq Al-Ahdal
90ac15748e fix comment 2022-03-18 02:38:04 +08:00
Tareq Al-Ahdal
6be2077d92 Merge branch 'master' into country-names-yaml-configuration 2022-03-18 02:36:12 +08:00
Tareq Al-Ahdal
456d439e97 Reformatting of country keys 2022-03-18 02:23:11 +08:00
Sarah Hoffmann
2723553593 Merge pull request #2637 from lonvia/keep-linked-place-names
Introduce separation of names from linked places
2022-03-17 16:39:30 +01:00
Sarah Hoffmann
23de4c7aca adapt ParameterParser tests to new key list 2022-03-17 11:45:05 +01:00
Sarah Hoffmann
ce14964943 fix linting 2022-03-17 11:05:32 +01:00
Sarah Hoffmann
e133476c35 merge linked names correctly into namedetails
Convert the '_place_*' entries back to normal entries before
returning them in the 'namedetails' section. If the name field is
duplicated, kept the '_place_*' notation. This preserves the previous
behaviour before _place_ names were introduces but adds the additional
names from the linked place for reference.
2022-03-17 11:02:02 +01:00
Sarah Hoffmann
524dc64ab7 make sure outputs take into account linked place names 2022-03-16 21:44:52 +01:00
Sarah Hoffmann
17da5f45be fix return code for PHP exceptions
These have returned a 0 until now.
2022-03-16 21:44:02 +01:00
Sarah Hoffmann
42cd021d04 save differing linked polace names in extra fields
This keeps the names tracable and ensures that all names are searchable
when they differ. Do not keep names when they are exactly the same
to save some space. Linked names are cleaned out before relinking.
2022-03-16 16:38:52 +01:00
Sarah Hoffmann
433d2f4c7d Merge pull request #2633 from lonvia/fix-reverse-single-interpolation-point
Correctly handle single-point interpolations in reverse
2022-03-16 14:22:59 +01:00
Sarah Hoffmann
be8f5778a1 use https protocol for cloning from github
Does not need authentication.
2022-03-16 12:05:58 +01:00
Sarah Hoffmann
ef98a85b05 correctly handle single-point interpolations in reverse
Lookup in location_property_osmline needs to be special cased
for startnumber = endnumber. Also adds tests for the case.

Fixes #2680.
2022-03-16 11:19:09 +01:00
Tareq Al-Ahdal
b4bd4ff67d fix linting error 2022-03-15 19:14:04 +08:00
Sarah Hoffmann
930a5cd12a Merge pull request #2632 from nslxndr/fix-log-typo
Fix typo in log message on replication initialisation
2022-03-15 11:01:57 +01:00
Sandor Nagy
7e3701b64a Fix typo in log message on replication initialisation 2022-03-15 07:50:47 +01:00
Tareq Al-Ahdal
165d17f7f7 reintroduce 'name:' prefix to country name keys 2022-03-13 18:58:27 +08:00
Tareq Al-Ahdal
3939cb614e Remove country.sql from CMakeLists.txt 2022-03-13 18:56:19 +08:00
Tareq Al-Ahdal
377cf36be3 modify data import logic to load country names from yaml 2022-03-12 15:20:57 +08:00
Tareq Al-Ahdal
8b6652a40b move default country names into yaml configuration 2022-03-12 15:17:01 +08:00
Sarah Hoffmann
479d726774 Merge pull request #2627 from mtmail/location-of-osm2pgsql
documentation: clarify osm2pgsql isnt in project directory by default
2022-03-10 15:39:10 +01:00
Marc Tobias
1fcc9717bb documentation: clarify osm2pgsql isnt in project directory by default 2022-03-10 14:16:12 +01:00
Sarah Hoffmann
c35b3ea5c7 Merge pull request #2621 from lonvia/housenumber-analyzer
Introduce optional token analysis for housenumbers
2022-03-01 15:19:07 +01:00
Sarah Hoffmann
15beeef6ce do not expand records in select list
An expression of the form 'SELECT (func()).*' will be expanded
by Postgresql _before_ execution with the result that the function
will be called as many times as there are fields in the record.
This is not what we want. The function call needs to go into
the FROM clause instead.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
92bc3cd0a7 fix linting issue 2022-03-01 09:34:32 +01:00
Sarah Hoffmann
0a9f971e44 add tests for new analyzed housenumbers 2022-03-01 09:34:32 +01:00
Sarah Hoffmann
4a3bbd0319 adapt housenumber cleanup to new word table structure 2022-03-01 09:34:32 +01:00
Sarah Hoffmann
89e1446131 bdd: disable some housenumber tests for legacy
Optional spaces in housenumbers are not supported by legacy tokenizer,
so disable those tests.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
b694a97edf add documentation for housenumber analyzer 2022-03-01 09:34:32 +01:00
Sarah Hoffmann
13ed184efd housenumber analyzer: avoid creating too many variants
Housenumber fields with lots of text are likely bad data. So is
data with many changes from letter to digit. Exclude them from adding
optional spaces.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
f03a05f6bb add new analyser for houenumbers
This analyser makes spaces optional.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
a6903651fc add framework for analysing housenumbers
This lays the groundwork for adding variants for housenumbers.
When analysis is enabled, then the 'word' field in the word table
is used as usual, so that variants can be created. There will be
only one analyser allowed which must have the fixed name
'@housenumber'.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
b8c544cc98 icu: move token deduplication into TokenInfo
Puts collection into one common place.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
243725aae1 icu: move housenumber token computation out of TokenInfo
This was the last function to use the cache. There is a more clean
separation of responsibility now.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
0bb59b2e22 handle unknown analyzer
When changing something in the default configuration of the sanatizers
that refers to an analyzer that is not yet loaded, there shouldn't be
any errors.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
837d44391c move generation of normalized token form to analyzer
This gives the analyzer more flexibility in choosing the normalized
form. In particular, an analyzer creating different variants can choose
the variant that will be used as the canonical form.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
691ec08586 Merge pull request #2614 from lonvia/reorganise-country-names
Reorganise handling of country names imported from OSM
2022-02-25 09:46:20 +01:00
Sarah Hoffmann
5425394654 add migration to add new derived_names column 2022-02-24 20:50:33 +01:00
Sarah Hoffmann
1d82569f6d add tests for country updates 2022-02-24 16:18:49 +01:00
Sarah Hoffmann
f74228830d bdd: run full import on tests
This uncovered a couple of outdated/wrong tests which have been
fixed, too.
2022-02-24 14:27:51 +01:00
Sarah Hoffmann
a9e3329c39 country_name: use separate columns for names from OSM
This allows us to distinguish between base names and imported ones
and consiquently removing imported ones if necessary.
2022-02-23 09:23:06 +01:00
Sarah Hoffmann
a3e4e8e5cd delete unused country name tokens 2022-02-23 09:23:06 +01:00
Sarah Hoffmann
898febcec5 update supported versions 2022-02-23 09:22:17 +01:00
Sarah Hoffmann
855909b4e9 add 'healthcare' as main tag
Given that the tag is most of the time duplicated by an amenity
tag which is already imported, only import it as a fallback when
there is no name.

Fixes #2609.
2022-02-21 11:52:17 +01:00
Sarah Hoffmann
85d65a2fd2 create idx_place_interpolations for import already
It is needed to look up if a node is part of an interpolation.

Fixes #2608.
2022-02-18 11:11:22 +01:00
Sarah Hoffmann
cd9b0c9a20 Merge pull request #2603 from lonvia/one-step-housenumber-search
One step housenumber search
2022-02-10 17:27:56 +01:00
Sarah Hoffmann
0e11ca9b76 add test that interpolations are found by odd/even 2022-02-10 11:23:51 +01:00
Sarah Hoffmann
fd38dd02ce make sure step is taken into account for interpolations 2022-02-09 21:42:28 +01:00
Sarah Hoffmann
474418f03c include houseumber search in name query
The name query already looks for the existence of housenumbers and
may as well retrive them. Saves up to threee additional lookups.
It also means that we can lift the restriction on looking
for existance of housenumbers for simple queries only.
2022-02-08 22:35:12 +01:00
Sarah Hoffmann
6b9fea6f1a disable debug message in interpolation processing 2022-02-07 23:30:25 +01:00
Sarah Hoffmann
02894ca4a4 Merge pull request #2602 from lonvia/filter-bad-housenumbers
Handle mistagged housenumbers like names
2022-02-07 16:27:04 +01:00
Sarah Hoffmann
7d19209fa1 liniting: disable too-many-ancestors
This is triggered by UserDict which is meant of deriving.
2022-02-07 11:49:18 +01:00
Sarah Hoffmann
a6b4e8ff67 add tests for housenumber-as-name feature 2022-02-07 11:45:12 +01:00
Sarah Hoffmann
38c3ef3da0 add tests for get_string_list()
Renaming test file for sanitizer config because pytest requires
unique names for test files.
2022-02-07 11:22:24 +01:00
Sarah Hoffmann
610f2cc254 sanitizer: move helpers into a configuration class 2022-02-07 10:48:00 +01:00
Sarah Hoffmann
a79a3210e6 implement is-a-name option for housenumbers 2022-02-07 09:27:11 +01:00
Sarah Hoffmann
39ede26b5c Merge pull request #2598 from geofabrik/doc-update-systemd-timer
Document how to set up systemd timers for --once updates
2022-02-06 10:24:48 +01:00
Sarah Hoffmann
c3f206733f really remove CentOS from CI 2022-02-05 16:07:12 +01:00
Sarah Hoffmann
69481d1590 remove CentOS from CI
The CentOS docker image no longer works after CentOS8 went EOL.
See #2601 for discussion.
2022-02-05 15:14:47 +01:00
Sarah Hoffmann
6877668cab Merge pull request #2599 from StephanGeorg/patch-1
Fixed link
2022-02-03 09:45:59 +01:00
Stephan Georg
dc520bd156 Fixed link 2022-02-03 09:39:03 +01:00
Amanda McCann
bc4a343502 Document how to set up systemd timers for --once updates 2022-02-01 17:01:45 +01:00
Sarah Hoffmann
fbc8884693 restrict change propagation to interpolation lines
Also means that Postgresql will use the right index for the query.
2022-01-28 11:05:37 +01:00
Sarah Hoffmann
c50c534d19 Merge pull request #2597 from lonvia/reorganise-interpolations
Reorganise interpolation code
2022-01-28 08:40:08 +01:00
Sarah Hoffmann
45627b485f Merge pull request #2596 from lonvia/remove-codecov
Remove codecov
2022-01-27 17:11:17 +01:00
Sarah Hoffmann
b6fa121f53 remove tests for closest housenumber function 2022-01-27 16:21:45 +01:00
Sarah Hoffmann
9b31ffaa9f php unit tests don't work on ubuntu 18 2022-01-27 15:18:23 +01:00
Sarah Hoffmann
39e300640e remove codecov
Causes more trouble than doing good.
2022-01-27 15:17:33 +01:00
Sarah Hoffmann
2ffc1537e7 raise PostgreSQL requirement to 9.6
The new code uses the open-ended array notation which is only
available sind psql 9.6.
2022-01-27 15:15:56 +01:00
Sarah Hoffmann
64abc90d30 use new tiger step column for queries 2022-01-27 14:08:08 +01:00
Sarah Hoffmann
788505095e add step column to tiger data table
This replaces the interpolationtype column.
2022-01-27 11:54:12 +01:00
Sarah Hoffmann
98432395c3 add migration for upcoming change to tiger tables 2022-01-27 11:48:27 +01:00
Sarah Hoffmann
6b89624f33 adapt frontend to new interpolation table layout 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
4b28b4fed4 adapt BDD tests for new interpolation style 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
fea4dbba50 inherit tags from interpolation not parent
Nodes on an interpolation now only get the address tags of
interpolations and then compute their own parent from that. They no
longer inherit the parent directly.
2022-01-27 11:14:55 +01:00
Sarah Hoffmann
83d2c440d5 add migration for new interpolation table layout 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
e6d855b954 add migration for new lookup index 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
9f64c34f1a optimize indexes for interpolation lines
Do not index 'inactive' rows (with startnumber is null) where possible.
2022-01-27 11:14:55 +01:00
Sarah Hoffmann
638ed15ada improve handling von updates on nodes in interpolations
Use the same update mechanism as for updates on the interpolations
themselves. Updates must solely happen in place_insert as this is
the place where actual changes of the data happen.
2022-01-27 11:14:55 +01:00
Sarah Hoffmann
c0d8b95f67 update interpolations instead of deleting and recreating 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
c65938d53c Merge pull request #2595 from nslxndr/fix-doc-typos
Fix typos in UI doc
2022-01-26 23:08:41 +01:00
Sandor Nagy
2e3f3a55f1 Fix typos in UI doc 2022-01-26 21:39:20 +01:00
Sarah Hoffmann
cdd0f78bc6 Merge pull request #2594 from lonvia/update-osm2pgsql
Update to osm2pgsql 1.6.0
2022-01-25 12:11:57 +01:00
Sarah Hoffmann
9fac20ceef update to osm2pgsql 1.6.0 2022-01-24 16:55:52 +01:00
Sarah Hoffmann
38bd08d25f Merge pull request #2591 from lonvia/cleanup-place-insert
Reorganise code of place_insert() trigger
2022-01-24 15:58:23 +01:00
Sarah Hoffmann
b44493e7f2 reorganise place_insert trigger
Code cleanup and formatting as well as minor improvements, in
particular removal of unnecessary code.
2022-01-24 09:12:50 +01:00
Sarah Hoffmann
f6ec8d2e33 Merge pull request #2589 from lonvia/clean-housenumbers
Add command for cleaning up word table
2022-01-21 10:17:58 +01:00
Sarah Hoffmann
c170d323d9 add tests for cleaning housenumbers 2022-01-20 23:47:20 +01:00
Sarah Hoffmann
3ce123ab69 do not clean housenumbers in reverse-only mode 2022-01-20 20:21:13 +01:00
Sarah Hoffmann
d8b7a51ab6 add actual removal of housenumber tokens 2022-01-20 20:18:15 +01:00
Sarah Hoffmann
344a2bfc1a add new command for cleaning word tokens
Just pulls outdated housenumbers for the moment.
2022-01-20 20:05:15 +01:00
Sarah Hoffmann
86588419fb Merge pull request #2588 from lonvia/housenumber-sanitizer
Move housenumber parsing into sanitizer
2022-01-20 17:44:24 +01:00
Sarah Hoffmann
d09db09849 adapt ICU tets to new housenumber sanitizer
Restrict tests to making sure that handing in multiple housenumbers
works.
2022-01-20 16:05:49 +01:00
Sarah Hoffmann
1e5a8561c0 fix linting issues 2022-01-20 16:00:23 +01:00
Sarah Hoffmann
f3c9578bca complete documentation for new clean-houseunubmers sanatizer 2022-01-20 15:49:32 +01:00
Sarah Hoffmann
3741afa6dc generalize filter-kind parameter for sanatizers
Now behaves the same for tag_analyzer_by_language and
clean_housenumbers. Adds tests.
2022-01-20 15:42:42 +01:00
Sarah Hoffmann
560a006892 add pytest config
We are using custom marks now which need to be registered to avoid
warnings.
2022-01-20 15:38:02 +01:00
Sarah Hoffmann
4774e45218 clean_housenumbers: make kinds and delimiters configurable
Also adds unit tests for various options.
2022-01-20 12:07:12 +01:00
Sarah Hoffmann
206ee87188 factor out housenumber splitting into sanitizer 2022-01-19 17:27:50 +01:00
Sarah Hoffmann
a7e048484b Merge pull request #2585 from lonvia/name-mutations
Introduce character mutations to token analysis
2022-01-19 17:09:36 +01:00
Sarah Hoffmann
d6b5f2f5da docs: add pointer to caddy deployment discussion 2022-01-19 15:28:01 +01:00
Sarah Hoffmann
3df560ea38 fix linting error 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
adbaf700cd move parsing of mutation config to setup phase 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
4a41bff3ab add documentation for new mutation feature 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
b453b0ea95 introduce mutation variants to generic token analyser
Mutations are regular-expression-based replacements that are applied
after variants have been computed. They are meant to be used for
variations on character level.

Add spelling variations for German umlauts.
2022-01-18 11:09:21 +01:00
Sarah Hoffmann
0192a7af96 move variant configuration reading in separate file 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
630ad38a67 refactor variant production to use generators 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
21156fc2a2 Merge pull request #2578 from lonvia/iso-3166-2
Make ISO3166-2 references searchable
2022-01-13 14:54:35 +01:00
Sarah Hoffmann
fa99f5bc03 Merge pull request #2579 from geofabrik/doc-update-typo
Fix typo in name of service. The rest of the docs call it nominatim-updateS
2022-01-13 14:01:57 +01:00
Amanda McCann
09aa1e7af4 Fix typo in name of service. The rest of the docs call it nominatim-updateS 2022-01-13 13:14:17 +01:00
Sarah Hoffmann
2034ed387b make ISO3166-2 references searchable 2022-01-13 09:44:42 +01:00
Sarah Hoffmann
d6140d6d54 Merge pull request #2571 from lonvia/ukrainian-apostrophe
Consider "modifier letter apostrophe" to be punctuation
2022-01-11 09:41:07 +01:00
Sarah Hoffmann
fb54bd3fcf consider "modifier letter apostrophe" to be punctuation
While technically being a letter, the apostrophe is often replaced
with a normal apostrophe in writing which is a punctuation mark.
This makes sure that the modifier letter apostrophe yields the same
normalization results and thus is really interchangable.

Only has an effect after the next reimport.

Fixes #2569.
2022-01-10 17:40:03 +01:00
Sarah Hoffmann
a486ee347a Merge pull request #2570 from woodpeck/patch-3
Fix typos
2022-01-10 14:21:48 +01:00
Frederik Ramm
5fb3582b31 Fix typos 2022-01-10 13:38:53 +01:00
Sarah Hoffmann
8b0b9db31e Merge pull request #2565 from lonvia/swap-wordset-order
Swap order of query interpretation
2022-01-06 09:02:46 +01:00
Sarah Hoffmann
f9889f81d6 swap order of query interpretation
A forward interpretation of the form 'street, city, country' is
much more frequent than the reverse form 'country, city, street'.
Thus swap the order of interpretations that the forward order comes
first.
2022-01-05 15:21:14 +01:00
Sarah Hoffmann
efafa52719 Merge pull request #2562 from lonvia/copyright-headers
Add consistent copyright headers
2022-01-04 23:10:37 +01:00
Micah David Cochran
8bda59fbe7 made collect_os_info script in Python 2022-01-03 14:57:01 -06:00
Sarah Hoffmann
c3788d765e add consistent SPDX copyright headers 2022-01-03 16:23:58 +01:00
Sarah Hoffmann
e407558f76 Merge pull request #2559 from lonvia/disable-jit-in-queries
Disable JIT and parallel workers on search frontend
2022-01-03 15:13:57 +01:00
Sarah Hoffmann
042df4198a disable JIT and parallel workers on search frontend
Bad query planning now also interferes with queries for search and
reverse.
2021-12-22 10:47:54 +01:00
Sarah Hoffmann
ab6f35d83a Merge pull request #2553 from lonvia/revert-street-matching-to-full-names
Revert street matching to full names
2021-12-14 15:52:34 +01:00
Micah David Cochran
f20d85738f add utils/collect_os_info.sh script 2021-12-13 11:26:09 -06:00
Sarah Hoffmann
f9b56a8581 correctly match abbreviated addr:street
This only works when addr:street is abbreviated and the street
name isn't. It does not work the other way around.
2021-12-08 21:58:43 +01:00
Sarah Hoffmann
fedc8ed474 Merge pull request #2542 from lonvia/update-phpunit
Update PHPUnit use to 9.5
2021-12-07 15:44:45 +01:00
Sarah Hoffmann
79aeb31088 restrict PHPUnit to 9.5 version
There are so many breaking changes with PHPUnit that it is
impossible to give any other guarantees.
2021-12-07 14:49:31 +01:00
Sarah Hoffmann
04857d32cd enable PHPUnit 9 for coverage
A couple of functions have been renamed.
2021-12-07 12:07:17 +01:00
Sarah Hoffmann
109cdce92c php unit: replace deprecated regex assert
The regEx assertion has been renamed in PHPUnit 9.5
and causes deprecation warnings.
2021-12-07 11:34:21 +01:00
Sarah Hoffmann
b7554d9ed8 php unit: don't enforce a name on the test database
Also gets rid of a PHPUnit deprecation warning.
2021-12-07 11:31:45 +01:00
Sarah Hoffmann
6106f1a32e php test: class must be called like the file 2021-12-07 11:20:38 +01:00
Sarah Hoffmann
f2a8307bb6 disable codecov
Not working.
2021-12-07 11:13:30 +01:00
Sarah Hoffmann
470ee7aef9 Merge pull request #2540 from lonvia/remove-support-for-centos7
Remove installation instructions for CentOS 7
2021-12-07 09:17:29 +01:00
Sarah Hoffmann
aefca48e78 remove installation instructions for CentOS 7
This ends official support for CentOS 7.
2021-12-06 16:05:27 +01:00
Sarah Hoffmann
5e792078b3 remove some odd varaints of addr:street from the styles
Some import has added names in partial tags which confuse the
street name matching.
2021-12-06 15:17:00 +01:00
Sarah Hoffmann
7f7d2fd5b3 skip most addr: tags with suffixes
Only one addr: tag can be processed currently, so make
sure it is the one without suffixes to not get odd data.
addr:street is the exception because it uses a different
matching mechanism.
2021-12-06 14:55:10 +01:00
Sarah Hoffmann
5e435b41ba ICU: matching any street name will do again 2021-12-06 14:26:08 +01:00
Sarah Hoffmann
44cfce1ca4 revert to using full names for street name matching
Using partial names turned out to not work well because there are
often similarly named streets next to each other. It also
prevents us from being able to take into account all addr:street:*
tags.

This change gets all the full term tokens for the addr:street tags
from the DB. As they are used for matching only, we can assume that
the term must already be there or there will be no match. This
avoid creating unused full name tags.
2021-12-06 11:38:38 +01:00
Sarah Hoffmann
bb175cc958 Merge pull request #2539 from lonvia/clean-up-python-tests
Restructure and extend python unit tests
2021-12-03 17:08:25 +01:00
Sarah Hoffmann
5a9fb6eaf7 specify text type in test SQL
Older version of postgres fail otherwise.
2021-12-03 13:56:23 +01:00
Sarah Hoffmann
54d35ddfe9 split cli tests by subcommand and extend coverage 2021-12-02 23:45:48 +01:00
Sarah Hoffmann
7beccb7997 remove unnecessary pass statements 2021-12-02 15:54:24 +01:00
Sarah Hoffmann
14a78f55cd more unit tests for tokenizers 2021-12-02 15:46:36 +01:00
Sarah Hoffmann
7617a9316e extend API unit tests 2021-12-01 20:48:29 +01:00
Sarah Hoffmann
a52ed366e4 add tests for migration 2021-12-01 20:27:40 +01:00
Sarah Hoffmann
7be164e2a5 more testing for refresh functions 2021-12-01 14:58:54 +01:00
Sarah Hoffmann
a24f25c0d8 more tests for exec utilities 2021-12-01 14:23:51 +01:00
Sarah Hoffmann
993b238a41 add more tests for database import 2021-12-01 11:54:58 +01:00
Sarah Hoffmann
bbbfc8201c add tests for adding additional data
Also adds checks that parameters for osm2pgsql are set
as expected.
2021-12-01 11:22:46 +01:00
Sarah Hoffmann
6f03a4d6ce add tests for flatten_config_file and other than yaml formats 2021-12-01 10:24:11 +01:00
Sarah Hoffmann
c8958a22d2 tests: add fixture for making test project directory 2021-11-30 18:01:46 +01:00
Sarah Hoffmann
37afa2180b generalize fixtures for cli tests 2021-11-30 14:07:39 +01:00
Sarah Hoffmann
b2df8e478a python test: move single-use fixtures to subdirectories 2021-11-30 12:03:16 +01:00
Sarah Hoffmann
50fccb52be remove unused test files 2021-11-30 11:44:10 +01:00
Sarah Hoffmann
b90e719da5 organise python tests in subdirectories
The directories follow the same structure as the modules in
nominatim/.
2021-11-30 11:22:26 +01:00
Sarah Hoffmann
97f1723181 Merge pull request #2530 from lonvia/declassify-highway
Change default rank for highway objects to 30
2021-11-25 08:41:25 +01:00
Sarah Hoffmann
80e0a3cce4 change default rank for highway objects to 30
The highway key is being used more and more for non-ways these
days. This clashes with Nominatim's assumption that essentially
everything that has a highway tag can be used as the street part
of the address.

Change the default rank of highway objects to 30 to avoid this.
Only the known values for streets keep the rank 26 and are now
listed explicitly.
2021-11-24 22:10:40 +01:00
Sarah Hoffmann
79effae933 Merge pull request #2529 from lonvia/sort-street-results-by-tiger-housenumber
Take tiger housenumber into account when ranking street results
2021-11-24 16:23:41 +01:00
Sarah Hoffmann
810056349f add migration for inclusive housenumber Tiger index 2021-11-24 12:03:20 +01:00
Sarah Hoffmann
b1d490ea53 add index for Tiger housenumber queries 2021-11-24 11:10:20 +01:00
Sarah Hoffmann
345637290b take Tiger housenumbers into account when ranking street results
Queries with a housenumber need to rank streets higher that
have the requested housenumber attached. We already do that for
ordinary housenumber objects and for interpolations. This
adds support for Tiger housenumbers as well.

Fixes #2501.
2021-11-24 11:10:20 +01:00
Sarah Hoffmann
9628df3031 Merge pull request #2528 from lonvia/allow-french-extra-housenumbers
Don't penalize French 'bis' housenumbers
2021-11-21 10:53:20 +01:00
Sarah Hoffmann
423f338d04 Merge pull request #2526 from lonvia/docs-moving-database
Add a section about moving the database to another machine
2021-11-19 21:14:53 +01:00
Sarah Hoffmann
3a2597e5c4 don't penalize French 'bis' housenumbers
House numbers of the form '9 bis' are usual in France. So
be a bit more lenient before adding penalties to house numbers
with letters in them.

Fixes #2527.
2021-11-19 21:12:17 +01:00
Sarah Hoffmann
641f261495 Merge pull request #2525 from lonvia/fix-replication-indexer
Fix instantiation of indexer for replication
2021-11-19 16:16:30 +01:00
Sarah Hoffmann
5884a6e7a6 add a section about moving the database to another machine 2021-11-19 16:11:32 +01:00
Sarah Hoffmann
10e979e841 only instantiate indexer once for replication
Also makes sure that indexer object exists everywhere were needed.

See #2518.
2021-11-19 14:48:58 +01:00
Sarah Hoffmann
8dc1441635 Merge pull request #2517 from lonvia/transliteration-special-chars
ICU: avoid non-alphanumerical characters in transliteration
2021-11-11 07:42:42 +01:00
Sarah Hoffmann
c79dcfad9a make sure housenumbers are properly quoted 2021-11-10 20:44:28 +01:00
Sarah Hoffmann
1886952666 avoid special characters in word tokens
Transliteration should only consist of ASCII letters
and numbers. Avoid any other characters.
2021-11-10 17:14:13 +01:00
Sarah Hoffmann
7326b246b7 Merge pull request #2516 from lonvia/test-for-website-dir
Better error reporting when API script does not exist
2021-11-10 13:27:09 +01:00
Sarah Hoffmann
345c812e43 better error reporting when API script does not exist
Check if the API script exists on the expected location before
running php-cli. This way we can add a useful hint about the
project directory.

Fixes #2513.
2021-11-10 11:58:20 +01:00
Sarah Hoffmann
fd4ba3989e Merge pull request #2511 from lonvia/fix-combination-error-needs-address
Fix boolean combination of NeedsAddress flag
2021-11-06 12:11:55 +01:00
Sarah Hoffmann
e2d2571ad0 fix combination of NeedsAddress flag
When dealing with multiple partial terms, only keep the
flag, when all partial terms are so frequent as to need
an address.

Fixes #2510.
2021-11-05 22:18:37 +01:00
Sarah Hoffmann
d479a0585d prepare release 4.0.0 2021-11-02 20:27:55 +01:00
Sarah Hoffmann
addfae31b6 fix typo 2021-11-02 11:09:17 +01:00
Sarah Hoffmann
ccf61db726 Merge pull request #2502 from lonvia/improve-development-documentation
Extend developer's documentation
2021-11-01 16:12:23 +01:00
Sarah Hoffmann
5b86b2078a docs: add overview over indexing 2021-11-01 11:04:03 +01:00
Sarah Hoffmann
a069479340 docs: section about database layout
Replaces the import description which basically was
table layout only now.
2021-10-29 12:03:22 +02:00
Sarah Hoffmann
d11bf9288e Merge pull request #2498 from lonvia/ordering-for-unlisted-place-results
Include unlisted places in ordering by housenumber
2021-10-28 15:28:47 +02:00
Sarah Hoffmann
86eeb4d2ed Merge pull request #2497 from lonvia/docs-maintenance
docs: add new maintenance section
2021-10-28 11:33:34 +02:00
Sarah Hoffmann
2275fe59ab include unlisted places in ordering by housenumber
When ordering results by the fact that they have a housenumber,
also take cases into account where the housenumber is on the
place itself. This may happen when the search includes the name
of the place and the housenumber or for addr:place addresses
where the place is unlisted.
2021-10-28 11:27:31 +02:00
Sarah Hoffmann
48be8c33ba docs: add new maintenance section
currently used for postcode updates, word count updates and
deleted relations.
2021-10-28 09:22:37 +02:00
Sarah Hoffmann
d3d07128b2 Merge pull request #2495 from lonvia/fix-normalization-in-php
ICU: use correct normalization during search
2021-10-27 14:40:42 +02:00
Sarah Hoffmann
37eeccbf4c ICU: use normalization from config in PHP
The TERM_NORMALIZATION config option is no longer applicable.
That was already documented but not yet implemented.
2021-10-27 11:32:44 +02:00
Sarah Hoffmann
1722fc537f bdd: add tests for non-latin scripts 2021-10-26 17:29:03 +02:00
Sarah Hoffmann
b240b182cb Merge pull request #2493 from lonvia/handle-frequent-partials
Tune search queries with frequent partial words
2021-10-26 17:00:43 +02:00
Sarah Hoffmann
c0f347fc8c adapt BDD tests to stricter partial search 2021-10-26 15:52:57 +02:00
Sarah Hoffmann
53dbe58ada do not count words when in reverse-only mode 2021-10-26 12:00:13 +02:00
Sarah Hoffmann
2c4b798f9b further refactor setup to keep function small 2021-10-26 12:00:13 +02:00
Sarah Hoffmann
1cf14a8e94 searches for house numbers must have an address 2021-10-26 12:00:13 +02:00
Sarah Hoffmann
4864bf1509 disallow search for partials without address
Very frequent partial terms take too long to look up and
do not return any valuable results unless the search is
further narrowed down by an address.
2021-10-26 12:00:13 +02:00
Sarah Hoffmann
9934421442 make word count computation part of the import
Accurate word counts are now essential when using
the ICU tokenizer and don't hurt for the legacy one.

Adds about an hour import time.
2021-10-26 12:00:13 +02:00
Sarah Hoffmann
d7267c1603 actions: move ICU tests into its own run 2021-10-26 11:59:13 +02:00
Sarah Hoffmann
5c778c6d32 Merge pull request #2486 from lonvia/fix-special-phrases
Fix parsing of operator in special phrases
2021-10-25 21:45:08 +02:00
Sarah Hoffmann
85797acf1e ICU: add an index over word_ids
Needed for keyword lookup in the details response.
2021-10-25 21:33:27 +02:00
Sarah Hoffmann
c4f5c11a4e be case-insensitve about special phrase operator 2021-10-25 19:51:20 +02:00
Sarah Hoffmann
5a1c3dbea3 fix parsing of operator in special phrases
Because of unstripped input, the operators wouldn't match.
2021-10-25 19:46:30 +02:00
Sarah Hoffmann
8e439d3dd9 Merge pull request #2484 from lonvia/fix-index-use
Reverse: add index hints
2021-10-25 17:20:42 +02:00
Sarah Hoffmann
9ebf921c53 Merge pull request #2483 from lonvia/fix-warming
Fix warming for ICU tokenizer
2021-10-25 16:21:36 +02:00
Sarah Hoffmann
7bd9094aaa reverse: add index hints
The fairly complex where condition of idx_placex_geometry_placenode
won't always be matched by the query planner if the condition
part doesn't appear verbatim in the query.

Fixes #2480.
2021-10-25 15:01:03 +02:00
Sarah Hoffmann
16cc395f78 fix warming for ICU tokenizer
Running the warm-up search requests requires querying
the most frequent words. This must be done via the tokenizer
to honor the different formats of the word table.
2021-10-25 13:08:16 +02:00
Sarah Hoffmann
13e7398566 allow relative paths for log files 2021-10-25 10:26:05 +02:00
Sarah Hoffmann
8b90ee4364 Merge pull request #2476 from lonvia/harmonize-configuration-file-settings
Standardize handling of file names in configuration values
2021-10-24 10:57:48 +02:00
Sarah Hoffmann
1098ab732f allow relative paths for flatnode file 2021-10-22 17:32:51 +02:00
Sarah Hoffmann
507fdd4f40 switch IMPORT_STYLE to use generic file search
Allows relative paths wrt project directory.
2021-10-22 16:49:57 +02:00
Sarah Hoffmann
0ae8d7ac08 have ADDRESS_LEVEL_CONFIG use load_sub_configuration
This means that relative paths now are looked up in the
project directory.
2021-10-22 16:36:52 +02:00
Sarah Hoffmann
c77df2d1eb replace NOMINATIM_PHRASE_CONFIG with command line option 2021-10-22 14:41:14 +02:00
Sarah Hoffmann
cefae021db doc: clarify relative paths for tokenizer config 2021-10-21 16:38:06 +02:00
Sarah Hoffmann
771aee8cd8 Merge pull request #2475 from lonvia/catchup-mode
Add catch-up mode to replication and extend documentation for updating
2021-10-21 16:21:58 +02:00
Sarah Hoffmann
2d13d8b3b6 extend documentation for updating database
Explains the different modes and adds hints for
setting up a systemd job.
2021-10-21 12:14:47 +02:00
Sarah Hoffmann
c1fa70639b add new replication mode catch-up
This mode gets updates until the server reports no new diffs
anymore.

Also adds additional indexing, when the main indexing step left
a couple of objects to process. This happens only when the
next update is expected to be more than 40min away.
2021-10-20 22:05:15 +02:00
Sarah Hoffmann
12643c5986 run Tiger import with parallel threads per default 2021-10-19 15:00:26 +02:00
Sarah Hoffmann
a0f5613a23 Merge pull request #2472 from lonvia/word-count-computation
Fix word count computation for ICU tokenizer
2021-10-19 14:58:57 +02:00
Sarah Hoffmann
824562357b adapt tests for new word count mechanism 2021-10-19 12:03:48 +02:00
Sarah Hoffmann
ec7184c533 icu: no longer precompute terms
The ICU analyzer no longer drops frequent partials, so it is no
longer necessary to know the frequencies in advance.
2021-10-19 11:52:28 +02:00
Sarah Hoffmann
e8e2502e2f make word recount a tokenizer-specific function 2021-10-19 11:21:16 +02:00
Sarah Hoffmann
c86cfefc48 Merge pull request #2471 from lonvia/update-install-rules
Reorganise, update and extend documentation
2021-10-19 09:11:16 +02:00
Sarah Hoffmann
2635fe8b4c docs: fix more links 2021-10-18 17:26:14 +02:00
Sarah Hoffmann
632436d54d docs: refer to our new Settings chapter in the import instruchtions 2021-10-18 17:02:52 +02:00
Sarah Hoffmann
74be6828dd check and fix all liks in documentation 2021-10-18 16:53:24 +02:00
Sarah Hoffmann
f4acfed48f add extended documentation of settings 2021-10-18 16:30:52 +02:00
Sarah Hoffmann
91e1c1bea8 docs: update overview pages 2021-10-18 09:04:06 +02:00
Sarah Hoffmann
bbb9a41ea4 docs: move place ranking into customization part 2021-10-18 09:04:06 +02:00
Sarah Hoffmann
f6418887b2 docs: nominatim-ui has a new place for custom config 2021-10-18 09:04:06 +02:00
Sarah Hoffmann
a3f8a097a1 docs: move import style description to customize section 2021-10-18 09:04:06 +02:00
Sarah Hoffmann
751563644f docs: make customization chapter a separate section 2021-10-18 09:04:01 +02:00
Sarah Hoffmann
e52b801cd0 fix typo 2021-10-18 09:03:07 +02:00
Sarah Hoffmann
445a6428a6 docs: remove the development warning for ICU tokenizer 2021-10-18 09:03:07 +02:00
Sarah Hoffmann
d59b26dad7 docs: add a warning about using --no-updates with TIGER data 2021-10-18 09:03:07 +02:00
Sarah Hoffmann
47417d1871 update and extend man page
Provide extended descriptions for most subcommands.
2021-10-18 09:03:07 +02:00
Sarah Hoffmann
381aecb952 rename manual directory to man
Avoids confusion between 'docs' and 'manual'.
2021-10-18 09:03:07 +02:00
Sarah Hoffmann
45344575c6 add munin scipts and ICU subrules to installation 2021-10-18 09:03:07 +02:00
Sarah Hoffmann
83381625bd Merge pull request #2469 from lonvia/fix-tablespace-assignment
Fix template expressions for tablespaces
2021-10-15 18:20:43 +02:00
Sarah Hoffmann
552fb16cb2 fix template expressions for tablespaces 2021-10-15 15:11:09 +02:00
Sarah Hoffmann
75c631f080 Merge pull request #2450 from mtmail/tiger-data-2021
US TIGER data 2021 released
2021-10-11 19:22:15 +02:00
Sarah Hoffmann
e2464fdf62 Merge pull request #2465 from lonvia/use-spgist-index
Use SP-GIST for building index
2021-10-11 10:48:44 +02:00
Sarah Hoffmann
9ff98073db remove outdated country_languages.php 2021-10-10 21:58:43 +02:00
Sarah Hoffmann
98ee5def37 add recommendation for Postgis 3+ 2021-10-10 21:55:38 +02:00
Sarah Hoffmann
3649487f5e use SP-GIST index for building index where available
Point-in-polygon queries are much faster with a SP-GIST geometry
index, so use that for the index used to check if a housenumber
is inside a building.

Only available with Postgis 3. There is an automatic fallback to
GIST for Postgis 2.
2021-10-10 21:55:38 +02:00
Sarah Hoffmann
4b007ae740 Merge pull request #2460 from lonvia/multiple-analyzers
Add support for multiple token analyzers
2021-10-09 14:41:09 +02:00
Sarah Hoffmann
6c79a60e19 add documentation for new configuration of ICU tokenizer 2021-10-07 11:55:53 +02:00
Sarah Hoffmann
2a94bfc703 fix argument description for check_database 2021-10-07 09:49:13 +02:00
Sarah Hoffmann
299934fd2a reorganize and complete tests around generic token analysis 2021-10-06 17:03:37 +02:00
Sarah Hoffmann
b18d042832 add tests for sanitizer tagging language 2021-10-06 12:29:25 +02:00
Sarah Hoffmann
97a10ec218 apply variants by languages
Adds a tagger for names by language so that the analyzer of that
language is used. Thus variants are now only applied to names
in the specific language and only tag name tags, no longer to
reference-like tags.
2021-10-06 11:09:54 +02:00
Sarah Hoffmann
d35400a7d7 use analyser provided in the 'analyzer' property
Implements per-name choice of analyzer. If a non-default
analyzer is choosen, then the 'word' identifier is extended
with the name of the ana;yzer, so that we still have unique
items.
2021-10-05 14:10:32 +02:00
Sarah Hoffmann
92f6ec2328 remove support for properties on variants
Those are not going to be used in the near future, so no need to
carry that code around just now.
2021-10-05 10:29:36 +02:00
Sarah Hoffmann
9ba2019470 precompute replacements while loading configuration 2021-10-05 10:20:08 +02:00
Sarah Hoffmann
c171d88194 move parsing of token analysis config to analyzer
Adds a second callback for the analyzer which is responsible
for parsing the configuration rules and converting it to
whatever format necessary. This way, each analyzer implementation
can define its own configuration rules.
2021-10-04 18:31:58 +02:00
Sarah Hoffmann
7cfcbacfc7 make token analyzers configurable modules
Adds a mandatory section 'analyzer' to the token-analysis entries
which define, which analyser to use. Currently there is exactly
one, generic, which implements the former ICUNameProcessor.
2021-10-04 17:37:34 +02:00
Sarah Hoffmann
52847b61a3 extend ICU config to accomodate multiple analysers
Adds parsing of multiple variant lists from the configuration.
Every entry except one must have a unique 'id' paramter to
distinguish the entries. The entry without id is considered
the default. Currently only the list without an id is used
for analysis.
2021-10-04 16:40:28 +02:00
Sarah Hoffmann
5a36559834 move flatten_config_list into config module
For general usage by other modules.
2021-10-04 11:56:54 +02:00
Sarah Hoffmann
19d4e047f6 Merge pull request #2458 from lonvia/add-tokenizer-preprocessing
Add a "sanitation" step for name and address tags before token processing
2021-10-01 21:53:34 +02:00
Sarah Hoffmann
6b348d43c6 replace test variable for PG env tests
'tty' was removed in PG14 and causes an error.
2021-10-01 12:27:24 +02:00
Sarah Hoffmann
732cd27d2e add unit tests for new sanatizer functions 2021-10-01 12:27:24 +02:00
Sarah Hoffmann
8171fe4571 introduce sanitizer step before token analysis
Sanatizer functions allow to transform name and address tags before
they are handed to the tokenizer. Theses transformations are visible
only for the tokenizer and thus only have an influence on the
search terms and address match terms for a place.

Currently two sanitizers are implemented which are responsible for
splitting names with multiple values and removing bracket additions.
Both was previously hard-coded in the tokenizer.
2021-10-01 12:27:24 +02:00
Sarah Hoffmann
16daa57e47 unify ICUNameProcessorRules and ICURuleLoader
There is no need for the additional layer of indirection that
the ICUNameProcessorRules class adds. The ICURuleLoader can
fill the database properties directly.
2021-10-01 12:27:24 +02:00
Sarah Hoffmann
5e5addcdbf fix typo 2021-09-29 14:16:09 +02:00
Sarah Hoffmann
be65c8303f export more data for the tokenizer name preparation
Adds class, type, country and rank to the exported information
and removes the rather odd hack for countries. Whether a place
represents a country boundary can now be computed by the tokenizer.
2021-09-29 11:54:14 +02:00
Sarah Hoffmann
231250f2eb add wrapper class for place data passed to tokenizer
This is mostly for convenience and documentation purposes.
2021-09-29 11:54:07 +02:00
Sarah Hoffmann
d44a428b74 Merge pull request #2455 from lonvia/adjust-address-levels-slovakia
Adjust address levels for boundaries in Slovakia
2021-09-28 11:21:08 +02:00
Sarah Hoffmann
40f9d52ad8 Merge pull request #2454 from lonvia/sort-out-token-assignment-in-sql
ICU tokenizer: switch match method to using partial terms
2021-09-28 09:45:15 +02:00
Sarah Hoffmann
7f3b05c179 adjust address levels for boundaries in Slovakia
Levels choosen according to OSM wiki. Mainly moves admin_level 6
to county level and admin_level 8 to city/town level. Higher
levels are adjusted accordingly.

Fixes #2453.
2021-09-27 23:32:11 +02:00
Sarah Hoffmann
09c9fad6c3 adapt tests to new ICU address token handling 2021-09-27 17:36:23 +02:00
Sarah Hoffmann
bb18479d5b remove unused parameter 2021-09-27 14:58:43 +02:00
Sarah Hoffmann
779ea8ac62 Merge pull request #2452 from lonvia/update-houses-on-street-name-change
Force update of surrounding houses when street or place name changes
2021-09-27 14:55:50 +02:00
Sarah Hoffmann
bd7c7ddad0 icu tokenizer: switch to matching against partial names
When matching address parts from addr:* tags against place names,
the address names where so far converted to full names and compared
those to the place names. This can become problematic with the new
ICU tokenizer once we introduce creation of different variants
depending on the place name context. It wouldn't be clear which
variant to produce to get a match, so we would have to create all of
them. To work around this issue, switch to using the partial terms
for matching. This introduces a larger fuzziness between matches but
that shouldn't be a problem because matching is always geographically
restricted.

The search terms created for address parts have a different problem:
they are already created before we even know if they are going to be
used. This can lead to spurious entries in the word table, which slows
down searching. This problem can also be circumvented by using only
partial terms for the search terms. In terms of searching that means
that the address terms would not get the full-word boost, but given
that the case where an address part does not exist as an OSM object
should be the exception, this is likely acceptable.
2021-09-27 11:36:19 +02:00
Sarah Hoffmann
c6fdcf9b0d adapt documentation for SQL tokenizer interface 2021-09-27 11:36:19 +02:00
Sarah Hoffmann
59fe74ddf6 move name matching into tokenizer module
Instead of requesting the match tokens from the tokenizer
when looking for parent streets/places and address parts,
hand in the saved tokens and ask if they match. This gives
the tokenizer more freedom to decide how name matching
should be done.
2021-09-27 11:36:19 +02:00
Sarah Hoffmann
6d7c067461 force update on rank30 children when place name changes
Name changes may have an effect on parenting. Don't update
surrounding rank30 objects with addr:place tags as this is
potentially too expensive.
2021-09-27 11:04:17 +02:00
Sarah Hoffmann
316205e455 force update of surrounding houses when street name changes
When the street changes its name then this may cause changes
in the parenting of rank-30 objects with an addr:street
tag.

Fixes #2242.
2021-09-27 10:22:41 +02:00
marc tobias
834ae0a93f US TIGER data 2021 released 2021-09-25 00:05:17 +02:00
Sarah Hoffmann
d562f11298 slightly increase radius to look for postcodes 2021-09-24 23:56:42 +02:00
Sarah Hoffmann
972628c751 Merge pull request #2449 from lonvia/address-ranking-spain
Adjust address ranks for Spain
2021-09-24 22:48:21 +02:00
Sarah Hoffmann
09b1db63f4 adjust address ranks for Spain
Adjusts levels for boundaries according to the list on
https://wiki.openstreetmap.org/wiki/Tag:boundary%3Dadministrative

* no admin_level 5, so drop that from addresses
* admin_level 6 has the province
* admin_level 7 has the county when it exists

Also reranks place=province so that it matches up with
admin_level 6 and introduces place=civil_parish which
is used as a place node for some admin_level=9 boundaries
in Galicia.
2021-09-24 18:39:44 +02:00
Sarah Hoffmann
e9d54f752c Merge pull request #2447 from lonvia/fix-dynamic-address-assignment
Fix dynamic assignment of address parts
2021-09-19 15:57:28 +02:00
Sarah Hoffmann
c335025167 CI: install locale for CentOS 2021-09-19 13:49:11 +02:00
Sarah Hoffmann
2b2109c89a Remove the installation warning
Installation has become a lot easier.
2021-09-19 13:01:32 +02:00
Sarah Hoffmann
56124546a6 fix dynamic assignment of address parts
A boolean check for dynamic changes of address parts is not
sufficient. The order of choice should be:

 1. an addr:* part matches the name
 2. the address part surrounds the object
 3. the address part was declared as isaddress

The implementation uses a slightly different ordering
to avoid geometry checks unless strictly necessary (isaddress
is false and no matching address).

See #2446.
2021-09-19 12:34:39 +02:00
Sarah Hoffmann
336258ecf8 Merge pull request #2440 from lonvia/generic-config-loader
Add generic loader for YAML configuration files
2021-09-04 17:41:15 +02:00
Sarah Hoffmann
b894d2c04a fix indent 2021-09-04 10:30:35 +02:00
Sarah Hoffmann
8e1d4818ac use yaml config loader for country info 2021-09-04 00:22:55 +02:00
Sarah Hoffmann
28c98584c1 add tests for generic YAML config reader 2021-09-03 22:31:30 +02:00
Sarah Hoffmann
1c42780bb5 introduce generic YAML config loader
Adds a function to the Configuration class to load a YAML
file. This means that searching for the file is generalised
and works the same now for all configuration files. Changes
the search logic, so that it is always possible to have a
custom version of the configuration file in the project
directory.

Move ICU tokenizer to use new load function.
2021-09-03 18:20:07 +02:00
Sarah Hoffmann
18554dfed7 Merge pull request #2437 from lonvia/tweak-ranking-searches
Some more tweaks for search interpretation
2021-09-03 14:16:23 +02:00
Sarah Hoffmann
2e493fec46 Merge pull request #2436 from lonvia/country-configuration
Move configuration of default languages into a configuration file
2021-09-03 08:55:36 +02:00
Sarah Hoffmann
98c2e08add reduce penalty for special searches by name
Additional penalty for special terms with operator None
should only go to near searches. To reduce the number
of produced searches, restrict the none operator to
appear only in conjunction with the name.
2021-09-03 08:50:38 +02:00
Sarah Hoffmann
94d3dee369 further increase penalty on housenumbers without numbers
Make the penality dependent on the length of the token:
no penalty for one letter house numbers and increasing one
for more letters.
2021-09-02 18:11:49 +02:00
Sarah Hoffmann
7e7dd769fd remove language and partition from name import 2021-09-02 14:41:11 +02:00
Sarah Hoffmann
79da96b369 read partition and languages from config file 2021-09-02 14:41:11 +02:00
Sarah Hoffmann
78fcabade8 move country name generation to country_info module 2021-09-02 14:41:11 +02:00
Sarah Hoffmann
284645f505 move generation of country tables in own module 2021-09-02 14:41:11 +02:00
Sarah Hoffmann
0b349761a8 add country configuration
The new configuration saves the default language(s) originally
maintained in the OSM wiki as well as the partition information.
2021-09-02 14:41:11 +02:00
Sarah Hoffmann
d18794931a Merge pull request #2435 from lonvia/simplified-to-traditional-chinese
icu: normalise simplified to traditional chinese
2021-08-31 15:29:26 +02:00
Sarah Hoffmann
b7d4ff3201 icu: normalise simplified to traditional chinese
The conversion is unambigious in most cases, so that the
information loss is minimal.
2021-08-31 11:18:34 +02:00
Sarah Hoffmann
4c6d674e03 Merge pull request #2434 from lonvia/vagrant-scripts-in-actions
Test installation instructions via CI
2021-08-29 10:11:59 +02:00
Sarah Hoffmann
2c97af8021 CI: use packaged source also for test runs 2021-08-24 10:10:01 +02:00
Sarah Hoffmann
832f75a55e CI: unify jobs for different vagrant scripts 2021-08-24 10:10:01 +02:00
Sarah Hoffmann
4e77969545 add workflow for centos 8 2021-08-24 10:10:01 +02:00
Sarah Hoffmann
6ebbbfee61 CI: use vagrant scripts for import tests
Use vanilla docker images of Ubuntu and leave the setup
to the vagrant scripts. Then do the usual import tests.

Also fixes a couple of issues found with the scripts
2021-08-24 10:10:01 +02:00
Sarah Hoffmann
0fabeefc3e Merge pull request #2432 from Mastercuber/patch-1
Added postcode
2021-08-22 09:32:31 +02:00
Mastercuber
c70d72f06b Added postcode
Added postcode to the list of addressdetails
2021-08-22 02:52:41 +02:00
Sarah Hoffmann
cc141bf1a5 Add link to fixthemap to issue template 2021-08-21 20:36:16 +02:00
Sarah Hoffmann
199532c802 Merge pull request #2429 from lonvia/place-name-to-admin-boundary
Indexing: move linking of places to the preparation stage
2021-08-21 10:21:39 +02:00
Sarah Hoffmann
28ee3d0949 move linking of places to the preparation stage
Linked places may bring in extra names. These names need to be
processed by the tokenizer. That means that the linking needs
to be done before the data is handed to the tokenizer. Move finding
the linked place into the preparation stage and update the name
fields. Everything else is still done in the indexing stage.
2021-08-20 22:44:17 +02:00
Sarah Hoffmann
925195725d Merge pull request #2428 from lonvia/rename-icu-tokenizer
Rename legacy_icu tokenizer to icu tokenizer
2021-08-18 15:02:19 +02:00
Sarah Hoffmann
f6d22df76e adapt CI workflow to new tokenizer name 2021-08-18 09:08:20 +02:00
Sarah Hoffmann
118858a55e rename legacy_icu tokenizer to icu tokenizer
The new icu tokenizer is now no longer compatible with the old
legacy tokenizer in terms of data structures. Therefore there
is also no longer a need to refer to the legacy tokenizer in the
name.
2021-08-17 23:11:47 +02:00
Sarah Hoffmann
656c1291b1 Merge pull request #2427 from lonvia/remove-us-states-special-casing
Move US state hack into legacy tokenizer
2021-08-17 21:55:32 +02:00
Sarah Hoffmann
f00b8dd1c3 move special hack for US states to legacy tokenizer
The hack for IL, AL and LA is only needed because these abbreviations
are removed by the legacy tokenizer as a stop word. There is no need
to keep the hack for future tokenizers. Move it therefore to the
token extraction function.
2021-08-17 14:28:55 +02:00
Sarah Hoffmann
5f2b9e317a add tests for US state hacks
IL, AS and LA are replaced with the US state in Geocode because
the old tokenizer would simply remove the abbreviations otherwise.
2021-08-17 10:49:07 +02:00
Sarah Hoffmann
4ae5ba7fc4 Merge pull request #2425 from lonvia/tokenizer-documentation
Introduce official Tokenizer API
2021-08-17 09:38:03 +02:00
Sarah Hoffmann
3656eed9ad add mkdocstrings requirement for building docs
mkdocstrings also needs access to the Python sources, so set
a PYTHONPATH accordingly. This makes running mkdocs directly
a bit awkward, therefore add a `make serve-doc` target.
2021-08-16 11:51:49 +02:00
Sarah Hoffmann
2e82a6ce03 docs: extend explanation of query phrase 2021-08-16 11:51:49 +02:00
Sarah Hoffmann
c4b8a3b768 add documentation for PHP part of tokenizer 2021-08-16 11:51:49 +02:00
Sarah Hoffmann
1147b83b22 php: make word list a first-class object
This separates the logic of creating word sets from the Phrase
class. A tokenizer may now derived the word sets any way they
like. The SimpleWordList class provides a standard implementation
for splitting phrases on spaces.
2021-08-16 11:51:49 +02:00
Sarah Hoffmann
0fb8eade13 remove country restriction from tokenizer
Restricting tokens due to the search context is better done in
the generic search part instead of repeating the same test in
every tokenizer implementation.
2021-08-16 11:41:54 +02:00
Sarah Hoffmann
78d11fe628 document tokenizer SQL interface 2021-08-16 11:41:54 +02:00
Sarah Hoffmann
90b40fc3e6 define formal public Python interface for tokenizer
This introduces an abstract class for the Tokenizer/Analyzer
for documentation purposes.
2021-08-16 11:41:54 +02:00
Sarah Hoffmann
e25e268e2e docs: querying and tokenizers 2021-08-16 08:59:44 +02:00
Sarah Hoffmann
68bff31cc9 docs: add developer doc page for Tokenizer 2021-08-16 08:58:56 +02:00
Sarah Hoffmann
31d9545702 Merge pull request #2424 from lonvia/multi-country-import
Update instructions for importing multiple regions
2021-08-16 08:48:28 +02:00
Sarah Hoffmann
e449071a35 Merge pull request #2423 from hummeltech/patch-1
Fix old paths for `phpcs` when using `make test`
2021-08-15 22:00:50 +02:00
Sarah Hoffmann
23e3724abb ignore words without id for status 2021-08-15 21:59:36 +02:00
Sarah Hoffmann
75a5c7013f split up large setup function 2021-08-15 12:24:13 +02:00
Sarah Hoffmann
56d24085f9 port multi-region update scripts to nominatim tool
Also updates the documentation. For the simple case of just
importing multiple regions, provide simplified instructions
that use the new multi-file import feature.

Fixes #2365.
2021-08-14 23:55:48 +02:00
Sarah Hoffmann
95b82af42a update osm2pgsql to 1.5.1 2021-08-14 22:46:35 +02:00
Sarah Hoffmann
87dedde5d6 allow multiple files for the import command
The files are forwarded to osm2pgsql which is now able to merge
them correctly.
2021-08-14 21:42:21 +02:00
David Hummel
8b6489c60e Fix old paths for phpcs when using make test
These paths no longer exist since db3ced17bb, they are now all located under `lib-php`
2021-08-12 13:34:18 -07:00
Sarah Hoffmann
bf4f05fff3 Merge pull request #2413 from osm-search/helm-chart
Installation docs - link to Kubernetes install project
2021-08-08 11:09:36 +02:00
mtmail
b0aaa25f0d Installation docs - link to Kubernetes install project
As reported by @robjuz in https://github.com/osm-search/Nominatim/discussions/2412
2021-08-03 12:02:35 +02:00
Sarah Hoffmann
c3ddc7579a Merge pull request #2408 from lonvia/icu-change-word-table-layout
Change table layout of word table for ICU tokenizer
2021-07-28 14:28:49 +02:00
Sarah Hoffmann
fdff579188 php: force use of global Exception class 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
d48793c22c fix Python linitin errors 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
001b2aa9f9 fix linitin issues in PHP 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
1db098c05d reinstate word column in icu word table
Postgresql is very bad at creating statistics for jsonb
columns. The result is that the query planer tends to
use JIT for queries with a where over 'info' even when
there is an index.
2021-07-28 11:31:47 +02:00
Sarah Hoffmann
324b1b5575 bdd tests: do not query word table directly
The BDD tests cannot make assumptions about the structure of the
word table anymore because it depends on the tokenizer. Use more
abstract descriptions instead that ask for specific kinds of
tokens.
2021-07-28 11:31:47 +02:00
Sarah Hoffmann
e42878eeda adapt unit test for new word table
Requires a second wrapper class for the word table with the new
layout. This class is interface-compatible, so that later when
the ICU tokenizer becomes the default, all tests that depend on
behaviour of the default tokenizer can be switched to the other
wrapper.
2021-07-28 11:31:47 +02:00
Sarah Hoffmann
eb6814d74e convert word info column to json before copying 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
6ad35aca4a adapt special terms lookup to new word table 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
70f154be8b switch word tokens to new word table layout 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
4342b28882 switch special phrases to new word table format 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
5394b1fa1b switch postcode tokens to new word table layout 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
5ab0a63fd6 switch housenumber tokens to new word table layout 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
1618aba5f2 switch country name tokens to new word table layout 2021-07-28 11:31:47 +02:00
Sarah Hoffmann
8377528952 new word table layout for icu tokenizer
The table now directly reflects the different token types.
Extra information is saved in a json structure that may be
dynamically extended in the future without affecting the
table layout.
2021-07-28 11:31:47 +02:00
Sarah Hoffmann
34dcf02dee fix typos in tokenizer docs 2021-07-28 11:28:49 +02:00
Sarah Hoffmann
5d7d7f15d9 Merge pull request #2401 from lonvia/port-add-data-to-python
Port add-data functions from PHP to Python
2021-07-26 12:38:56 +02:00
Sarah Hoffmann
0c023fb4d2 adapt cli tests to Python port for add-data 2021-07-26 10:41:37 +02:00
Sarah Hoffmann
1bd068d42d remove unused update script 2021-07-26 10:41:37 +02:00
Sarah Hoffmann
e42349c963 replace add-data function with native Python code 2021-07-26 10:41:37 +02:00
Sarah Hoffmann
878835e4bd move add-data subcommand into a separate file 2021-07-25 18:14:12 +02:00
Sarah Hoffmann
8096a1d67f fix parameters for TokenWord creation 2021-07-20 10:21:40 +02:00
Sarah Hoffmann
e16c5d5f70 Merge pull request #2397 from lonvia/increase-minimum-required-versions
Increase minimum required PostgreSQL version to 9.5
2021-07-19 14:28:02 +02:00
Sarah Hoffmann
2c8242c8df remove special code for pre9.5 postgresql
9.5 is now the minimum requirement.
2021-07-19 10:24:57 +02:00
Sarah Hoffmann
e7d6f89aca increase minimum version for PostgreSQL to 9.5
This is the minimum version we can test with the CI.
With 9.5 there is also complete support for jsonb available.
2021-07-19 10:21:19 +02:00
Sarah Hoffmann
379f5db516 require Python 3.6 also in CMakeFile
This had been forgotten when increasing the minimum Python version.
2021-07-19 10:14:14 +02:00
Sarah Hoffmann
ee32315378 Merge pull request #2396 from lonvia/partial-word-token
Reorganise code that build the SearchDescription
2021-07-19 09:42:37 +02:00
Sarah Hoffmann
cca912af4e make all Token menbers private 2021-07-18 22:54:55 +02:00
Sarah Hoffmann
86ea077092 merge marking rare name with adding name token
Only name tokens can be rare, so this should be the same
function.
2021-07-18 16:52:37 +02:00
Sarah Hoffmann
5d6aabc457 add documentation for public interface of SearchDescription 2021-07-18 16:10:42 +02:00
Sarah Hoffmann
b14ce959d9 factor out check if a token fits current search
Saves allocating an empty array.
2021-07-17 22:01:35 +02:00
Sarah Hoffmann
a48ebd9b47 move SearchDescription building into tokens
Moving the logic for extending the SearchDescription into the
token classes splits up the code and makes it more readable.
More importantly: it allows tokenizer to define custom token
classes in the future.
2021-07-17 20:24:33 +02:00
Sarah Hoffmann
3cd85eaaf1 remove Token from explicit input for SearchDescription extension
The token string is only required by the PartialToken type, so
it can simply save the token string internally. No need to pass
it to every type.

Also moves the check for multi-word partials to the token loader
code in the tokenizer. Multi-word partials can only happen with
the legacy tokenizer and when the database was loaded with an
older version of Nominatim. No need to keep the check for
everybody.
2021-07-17 18:18:31 +02:00
Sarah Hoffmann
ec3f6c9c42 factor out query position
Moves token and phrase position and phrase type into a separate
class that is handed in when assembling the search description.
This drastically reduces the number of parameters for the function
to extend the search descriptions and gives us more flexibility
in the future for more complex positional analysis.
2021-07-15 14:12:59 +02:00
Sarah Hoffmann
143ff14466 remove special status of partial tokens
Full-word tokens are no longer marked by a space at the
beginning of the token. Use the new Partial token category
instead. This removes a couple of special casing, we don't
really need.

The word table still has the space for compatibility reasons,
so the tokenizer code needs to get rid of it when loading the
tokens.
2021-07-14 22:17:17 +02:00
Sarah Hoffmann
6070c3d1d5 introduce a separate token type for partials
This means that the leading space can be removed as a partial
word indicator.
2021-07-13 16:57:12 +02:00
Sarah Hoffmann
bc8b2d4ae0 Merge pull request #2393 from lonvia/fix-flake8-issues
Fix flake8 issues
2021-07-13 16:46:12 +02:00
Sarah Hoffmann
14f777da18 use psycopg's SQL quoting where possible
Use the SQL formatting supplied with psycopg whenever the
query needs to be put together from snippets.
2021-07-12 22:05:22 +02:00
Sarah Hoffmann
6f6681ce67 add helper function for execute_values
Make psycopg2's convenience function accessible through
the cursor.
2021-07-12 21:08:20 +02:00
Sarah Hoffmann
06602b4ec0 provide wrapper function for DROP TABLE
Use psycopg2 formatting to ensure correct quoting.
2021-07-12 20:32:46 +02:00
Sarah Hoffmann
cf98cff2a1 more formatting fixes
Found by flake8.
2021-07-12 17:45:42 +02:00
Sarah Hoffmann
b4fec57b6d Merge pull request #2391 from lonvia/fix-sonar-issues
Fix bugs and code smells found by Sonarqube
2021-07-12 17:14:59 +02:00
Sarah Hoffmann
f8b5a63de3 factor out connection reset code 2021-07-12 14:58:44 +02:00
Sarah Hoffmann
568316f07c simplify analyse function 2021-07-12 14:47:50 +02:00
Sarah Hoffmann
daa597b300 split up variant computation for better readability 2021-07-12 14:43:50 +02:00
Sarah Hoffmann
47adb2a3fc reorganise process_place function
Move address processing into its own function as it is
rather extensive.
2021-07-12 11:57:55 +02:00
Sarah Hoffmann
fff0012249 simplify website setup code
Use formaat strings and move variable quoting code into extra
function.
2021-07-12 11:41:05 +02:00
Sarah Hoffmann
d5a1883b62 avoid repeated patterns for table name 2021-07-12 11:33:09 +02:00
Sarah Hoffmann
a08ef43e40 simplify if statements 2021-07-12 11:28:47 +02:00
Sarah Hoffmann
bc5e15996a convert single case switch to if statement 2021-07-12 11:28:47 +02:00
Sarah Hoffmann
128ca800cd avoid local variable assignment 2021-07-11 23:22:53 +02:00
Sarah Hoffmann
000d133af6 fix more missing braces on one-liners 2021-07-11 23:22:53 +02:00
Sarah Hoffmann
1e40d65aa9 remove dead code 2021-07-11 23:22:53 +02:00
Sarah Hoffmann
bffbe68ec3 do not intermix params with and without default 2021-07-11 23:22:53 +02:00
Sarah Hoffmann
58b10074ad directly return data in function
The temporary variable is not necessary.
2021-07-11 19:24:04 +02:00
Sarah Hoffmann
d933ead2b5 remove unnecessayly nested ifs
Found by Sonarqube.
2021-07-11 19:11:37 +02:00
Sarah Hoffmann
1cdc30c5e8 remove unused functions
The functions were necessary for the transitory code
to Python and are no longer used.
2021-07-11 19:10:04 +02:00
Sarah Hoffmann
3661f7a321 avoid multiple returns of same value
Found by Sonarqube.
2021-07-11 18:23:42 +02:00
Sarah Hoffmann
27af9b102c always use brackets on if statements
This adds bracket around all one-line if statements that did
not have them yet.
2021-07-10 17:04:46 +02:00
Sarah Hoffmann
500c61685b remove unused variables
As reported by sonarqube.
2021-07-09 16:36:42 +02:00
Sarah Hoffmann
106d960f84 fix bad use of echo in PHP output 2021-07-09 12:50:35 +02:00
Sarah Hoffmann
322fa19ceb Merge pull request #2390 from lonvia/responsible-disclosure
Add security issue disclosure policy
2021-07-09 12:32:37 +02:00
Sarah Hoffmann
5bea0b6086 add security issue disclosure policy 2021-07-09 11:36:59 +02:00
Sarah Hoffmann
a5970d7548 Merge pull request #2384 from lonvia/actions-add-icu-tokenizer
CI: run tests on Ubuntu 18
2021-07-07 14:39:53 +02:00
Sarah Hoffmann
c216144dd1 add missing pyyaml requirement 2021-07-07 11:29:33 +02:00
Sarah Hoffmann
42e08da7ca enable PHP 7.2 for Ubuntu 18 CI 2021-07-07 11:29:33 +02:00
Sarah Hoffmann
a2edbbf78a cannot use capture_output in subprocess.run
Only available since Python 3.7.
2021-07-06 22:57:42 +02:00
Sarah Hoffmann
1e86dc1d93 remove default parameter for namedtuple
This is only available in Python 3.7.
2021-07-06 22:57:42 +02:00
Sarah Hoffmann
54f295be52 CI: run tests on older Ubuntu version as well 2021-07-06 22:57:42 +02:00
Sarah Hoffmann
8bc3c0a07c Merge pull request #2382 from lonvia/remove-json-config
Remove outdated ICU tokenizer JSON config
2021-07-05 12:34:34 +02:00
Sarah Hoffmann
d75bc20174 Merge pull request #2383 from lonvia/remove-more-names
Exclude name:etymology and name:signed
2021-07-05 12:34:16 +02:00
Sarah Hoffmann
fd8751658f exclude name:etymology and name:signed
name:etymology contains a description of the name origin and is
thus more informative than search-worthy.

name:signed basically indicates that the feature does not have
a name.
2021-07-05 11:04:16 +02:00
Sarah Hoffmann
4db5a1a0b8 remove outdated ICU tokenizer JSON config 2021-07-05 11:01:35 +02:00
Sarah Hoffmann
4c52777ef0 Merge pull request #2371 from lonvia/increase-python-version
Increase minimum required Python version to 3.6
2021-07-05 10:32:38 +02:00
Sarah Hoffmann
d4c7bf20a2 Merge pull request #2381 from lonvia/reorganise-abbreviations
Reorganise abbreviation handling
2021-07-05 10:32:16 +02:00
Sarah Hoffmann
affe1300d9 add warning about experimental nature of ICU tokenizer 2021-07-04 10:44:58 +02:00
Sarah Hoffmann
62d5984b1b limit the number of variants that can be produced 2021-07-04 10:28:28 +02:00
Sarah Hoffmann
c32551b4e0 restrict partial word counting to names of reasoanble length
The partial word count does not split names to save a bit of time.
The result is that it might enounter unreasonably long names
which in truth consist of multiple words. No accurate statistics
are needed so simply restrict the count to words shorter than
75 characters.
2021-07-04 10:28:28 +02:00
Sarah Hoffmann
e85f7e7aa9 fix subsequent replacements
Two replacement words directly following each other did not
work as expected because each expects a space at the
beginning/end while there was only one space available.

Also forbit composing a word after a space was added in the
end by a previous replacement.
2021-07-04 10:28:28 +02:00
Sarah Hoffmann
7b0f6b7905 leave ICU variant properties empty for now
Saving unused properties causes unnecessary duplicates.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
0894ce9dc3 import abbreviations from OSM Wiki
Replaces the variant rules with a slightly cleaned-up
version of the abbreviation lists at
https://wiki.openstreetmap.org/wiki/Name_finder:Abbreviations
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
4fd2e961b6 improve normalization
Make sure all special symbols are removed during normalization already.
Those won't be interpreted in any way because they are unlikely to be
searched for.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
b9fbfeff67 only consider partials in multi-words for initial count
This ensures that it is less likely that we exclude meaningful
words like 'hauptstrasse' just because they are frequent.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
5dd24b3ef0 add documentation for ICU tokenizer configuration 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
62828fc5c1 switch to a more flexible variant description format
The new format combines compound splitting and abbreviation.
It also allows to restrict rules to additional conditions
(like language or region). This latter ability is not used
yet.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
a6aa6360e0 use yaml tag syntax to mark include files 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
c4f6c06f44 add dependency on datrie 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
0d80a9b897 tests for composing decomposed suffixes 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
f70930b1a0 make compund decomposition pure import feature
Compound decomposition now creates a full name variant on
import just like abbreviations. This simplifies query time
normalization and opens a path for changing abbreviation
and compund decomposition lists for an existing database.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
9ff4f66f55 complete tests for icu tokenizer 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
32ca631b74 fix full term token in special phrases 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
2e81084f35 complete tests for rule loader 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
a0a7b05c9f correctly quote strings when copying in data
Encapsulate the copy string in a class that ensures that
copy lines are written with correct quoting.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
2f6e4edcdb update unit tests for adapted abbreviation code 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
1bd9f455fc add abbreviations from legacy tokenizer
These abbreviations are not a perfect fit anymore because
abbreviation replacement is now applied before transliteration.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
2e3c5d4c5b adapt tests for ICU tokenizer 2021-07-04 10:28:20 +02:00
Sarah Hoffmann
8413075249 move abbreviation computation into import phase
This adds precomputation of abbreviated terms for names and removes
abbreviation of terms in the query. Basic import works but still
needs some thorough testing as well as speed improvements during
import.

New dependency for python library datrie.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
6ba00e6aee icu tokenizer: move transliteration rules in separate file
The tokenizer configuration has become difficult to handle
due to the additional manual transliteration rules. Allow
to have a separate rule file that is given to the ICU library
as is.
2021-07-04 10:28:20 +02:00
Sarah Hoffmann
de4fac33dc docs: nominatim-ui should be installed from the release
The development version does not provide the pre-packaged
dist directory anymore.
2021-07-03 21:16:52 +02:00
Sarah Hoffmann
c9984669a7 Merge pull request #2373 from lonvia/tweak-search-cost
Further tweaking of search cost
2021-06-26 16:21:08 +02:00
Sarah Hoffmann
63755c31ff remove penalty for full words in address
Now that mutli-word partials no longer exist, multi-word full
words need to be used to search in addresses and therefore no
longer should have a penalty.

Also changes the condition when a full word is included into
the address. It is no longer relevant if an equivalent partial
exists but only if the term consists of more than one word.
2021-06-26 11:37:15 +02:00
Sarah Hoffmann
161f5f5cee adjust penalty for housenumber-in-name searches
When searching for house numbers in the name (for place-only
terms) then the same penalties need to apply as for the
regular house number search.

Change the code to first compute the penalties and then create
the new search variants.
2021-06-26 11:37:15 +02:00
Sarah Hoffmann
c7073a1fc0 increase minimum Python to 3.6
Python 3.6 introduces formatted string literals and
flag enums as well as a much faster dict implementation.
These changes make the code so much simpler as to warrant
dropping Python 3.5 support.

Affected distributions are Ubuntu 16.04 and Debian Stretch.
2021-06-21 18:37:37 +02:00
Sarah Hoffmann
e7b4fc70e7 make sure old data gets deleted on place type change
When changing from some other place type to place=postcode
make sure that the old place type entry in the place table
is deleted.
2021-06-18 10:58:41 +02:00
Sarah Hoffmann
457982e1d2 update postcode in place if it already exists 2021-06-18 00:28:52 +02:00
Sarah Hoffmann
aa558e6080 Merge pull request #2369 from lonvia/exclude-poi-from-housenumber-search
Do not return POIs when dropping house number in query
2021-06-17 15:30:05 +02:00
Sarah Hoffmann
fe11d3cbbd do not return POIs when dropping house number in query
We've previously added searching through rank 30 in a house
number search to enable searches for house number+name.
This had the unintended side effect that rank 30 objects
are also returned in s search that dropped the house number
from the query. This is wrong because POIs cannot function
as a parent to a house number.

This fix drops all rank 30 objects from the results for a
house number search if they do not match the requested house
number.
2021-06-17 14:21:20 +02:00
Sarah Hoffmann
1ce223a83b Merge pull request #2360 from AntoJvlt/postcodes-place-table
Use place instead of placex to compute postcodes
2021-06-16 11:45:07 +02:00
AntoJvlt
3676310efe Improved performance of the postcodes query and some code cleaning 2021-06-12 15:46:08 +02:00
AntoJvlt
ddf866c4c7 Always delete old placex entry for type=postcode when inserting a new one into the place table 2021-06-12 15:35:51 +02:00
AntoJvlt
9e07a197e9 Handle postcode type change in place insert trigger 2021-06-09 09:31:32 +02:00
AntoJvlt
1c175e3a67 Clean and update tests for postcodes 2021-06-09 09:31:32 +02:00
AntoJvlt
47fb7cd3a8 Use place_exists() into can_compute() for postcodes 2021-06-09 09:31:32 +02:00
AntoJvlt
e879814e43 Update tests for postcodes 2021-06-09 09:31:32 +02:00
AntoJvlt
a4733eed90 Use place instead of placex to compute postcodes 2021-06-09 09:31:32 +02:00
Sarah Hoffmann
38fbc4fcbb do not fail CI on codecov errors
The CodeCove upload depends on unreliable external code.
2021-06-08 10:42:14 +02:00
Sarah Hoffmann
c6fe91bfa5 Merge pull request #2359 from lonvia/switch-bdd-tests-to-api-search
Remove deprecated commandline query function
2021-06-06 18:29:51 +02:00
Sarah Hoffmann
7383f05e45 remove deprecated query interface
Searches can now be done via the thin API wrapper.
2021-06-06 15:28:21 +02:00
Sarah Hoffmann
3aac51c81f switch BDD tests to always use search API 2021-06-06 15:27:52 +02:00
Sarah Hoffmann
f0a7850edf Merge pull request #2358 from AntoJvlt/documentation-update
Update documentation
2021-06-04 23:54:37 +02:00
AntoJvlt
4336ca69c7 Update documentation 2021-06-03 18:39:40 +02:00
Sarah Hoffmann
4bca5e838b Merge pull request #2357 from lonvia/legacy-tokenizer-fix-word-entries
Fix insertion of special terms and countries into word table
2021-06-02 20:58:14 +02:00
Sarah Hoffmann
bc981d0261 fix insertion of special terms and countries into word table
Special terms need to be prefixed by a space because they are
full terms.

For countries avoid duplicate entries of word tokens.

Adds tests for adding country terms.
2021-06-02 20:22:39 +02:00
Sarah Hoffmann
b1d33e6b49 Merge pull request #2356 from lonvia/freeze-after-import
Call freeze after running and non-updateable import
2021-06-02 16:25:26 +02:00
Sarah Hoffmann
38d442edf6 docs: reload SQL when migrating to 3.6
SQL functions must always be reloaded when updating the software.
All other updates included the instruction as part of some other
migration. From 3.7 on it will happen as part of the migration
command.

Fixes #2335.
2021-06-02 16:11:29 +02:00
Sarah Hoffmann
72625dc72a call freeze after running and non-updateable import
Some of the tables will have already been removed but
the tables for indexing are still there and should be
dropped.
2021-06-02 11:08:48 +02:00
Sarah Hoffmann
cc2f152d70 commit changes to replication log table
Fixes #2350.
2021-05-26 11:47:08 +02:00
Sarah Hoffmann
f74dc38766 always compute guessed postcode for POIs from centroid
When guessing postcodes from the area, only postcodes within
that area are accepted. For POIs that is usually not what we
want as the postcode would have to be within a house for
example.

Fixes #2301.
2021-05-26 11:15:13 +02:00
Sarah Hoffmann
7d9665d8d2 Merge pull request #2349 from lonvia/fix-website-refresh
Only initialise tokenizer for refresh functions where needed
2021-05-25 20:43:44 +02:00
Sarah Hoffmann
a0e85cc17c only initialise tokenizer for refresh functions where needed
Fixes #2347.
2021-05-25 19:16:22 +02:00
Sarah Hoffmann
29b02f9e56 Merge pull request #2346 from lonvia/words-vs-tokens
Cleanup use of partial words in legacy tokenizers
2021-05-24 17:41:38 +02:00
Sarah Hoffmann
24c986c842 add tests for new full name computation with ICU 2021-05-24 10:41:42 +02:00
Sarah Hoffmann
4f4d15c28a reorganize keyword creation for legacy tokenizer
- only save partial words without internal spaces
- consider comma and semicolon a separator of full words
- consider parts before an opening bracket a full word
  (but not the part after the bracket)

Fixes #244.
2021-05-24 10:41:42 +02:00
Sarah Hoffmann
fa3e48c59f use make_keywords for place search terms also
Ensures that place indeed uses the same search names as other
names.
2021-05-23 23:08:11 +02:00
Sarah Hoffmann
02f6afa51b always ignore multi term partials in search
Partial terms should only ever consist of one word. Ignore
any other, they are a leftover from inefficient word index
builts.
2021-05-23 22:13:03 +02:00
Sarah Hoffmann
10143e0ac7 Merge pull request #2342 from lonvia/icu-tokenizer-ci
Add BDD tests with icu tokenizer to CI runs
2021-05-22 10:36:35 +02:00
Sarah Hoffmann
8f3429939f CI: run BDD tests with legacy_icu tokenizer 2021-05-21 23:18:45 +02:00
Sarah Hoffmann
00094c43d1 enable Tiger BDD API test for legacy_icu 2021-05-21 22:39:56 +02:00
Sarah Hoffmann
8bf15fa691 Merge pull request #2341 from lonvia/cleanup-python-tests
Cleanup and linting of python tests
2021-05-20 17:30:30 +02:00
Sarah Hoffmann
63dc503b8d Merge pull request #2337 from mogita/fix/invalid-query-string
fix: add the missing question mark
2021-05-20 10:26:23 +02:00
Sarah Hoffmann
430c316e45 test: fix linting errors 2021-05-19 23:07:39 +02:00
Sarah Hoffmann
01f5a9ff84 test: more use of table_factory 2021-05-19 17:37:03 +02:00
Sarah Hoffmann
af52eed0dd test: avoid use of tempfile module
Use the tmp_path fixture instead which provides automatic
cleanup.
2021-05-19 16:43:26 +02:00
Sarah Hoffmann
f93d0fa957 test: use src_dir fixture instead of self-computed paths 2021-05-19 16:03:54 +02:00
Sarah Hoffmann
c06a1d007a test: replace raw execute() with fixture code where possible 2021-05-19 12:11:04 +02:00
Sarah Hoffmann
65bd749918 test: use table_rows() and execute_values() where possible
Some uses of scalar() could also be replaced with convenience
functions from the word table mock.
2021-05-19 10:51:10 +02:00
Sarah Hoffmann
510eb53f53 test: move Testingcursor into separate class
Also adds more convenience functions: counting with a where
statement and a wrapper to execute_values().
2021-05-19 10:30:36 +02:00
mogita
507543a482 fix: add the missing question mark 2021-05-19 13:35:15 +08:00
Sarah Hoffmann
16bb007135 Merge pull request #2336 from lonvia/do-not-mask-error-when-loading-tokenizer
Do not hide errors when importing tokenizer
2021-05-18 23:00:10 +02:00
Sarah Hoffmann
1ffb6bd5d0 Merge pull request #2321 from AntoJvlt/csv-import-special-phrases
CSV import for special phrases and loader refactoring
2021-05-18 22:58:25 +02:00
AntoJvlt
799a4c9ab6 Documentation update and small code fixes 2021-05-18 22:35:21 +02:00
Sarah Hoffmann
b2722650d4 do not hide errors when importing tokenizer
Explicitly check for the tokenizer source file to check that
the name is correct. We can't use the import error for that
because it hides other import errors like a missing
library.

Fixes #2327.
2021-05-18 16:28:21 +02:00
Sarah Hoffmann
54b06d7abc Merge pull request #2332 from lonvia/fix-keyword-details
Always use object type for details keywords
2021-05-18 11:30:58 +02:00
Sarah Hoffmann
fef1bbb1a7 always use object type for details keywords
When name and address is empty, the keywords field in the response
of the details API would be an array because that is what PHP's
json_encode defaults to with empty array(). This default can only
be changed globally per json_encode call and that might cause
unintended colleteral damage. Work around the issue by making
name and address an empty array instead of keywords.

Fixes #2329.
2021-05-17 16:36:32 +02:00
AntoJvlt
3206bf59df Resolve conflicts 2021-05-17 13:52:35 +02:00
AntoJvlt
a33f2c0f5b Special phrases documentation updated 2021-05-17 13:25:16 +02:00
AntoJvlt
8b8dfc46eb Added --no-replace command for special phrases importation and added corresponding tests 2021-05-17 13:25:06 +02:00
AntoJvlt
06aab389ed Code cleaning and SPLoader deleted 2021-05-16 16:59:12 +02:00
AntoJvlt
fb0ebb5bf0 Add tests for the new SPWikiLoader and SPCsvLoader 2021-05-16 16:10:06 +02:00
Sarah Hoffmann
925726222f Merge pull request #2323 from darkshredder/disable-search-reverse-only
Feat: Disabled search API for --reverse-only imports
2021-05-14 10:40:22 +02:00
Sarah Hoffmann
550e7edb64 Merge pull request #2328 from lonvia/convert-tiger-to-csv
Switch external Tiger data to CSV format
2021-05-14 09:58:50 +02:00
Sarah Hoffmann
2992dea5c8 install default settings for legacy_icu tokenizer 2021-05-14 09:44:10 +02:00
Sarah Hoffmann
e76e4bd964 adapt documentation to use Tiger CSV dump 2021-05-14 00:02:50 +02:00
Sarah Hoffmann
7d621389ee adapt tests to new TIGER CSV format 2021-05-14 00:02:50 +02:00
Sarah Hoffmann
35efe3b41c use tokenizer during Tiger data import
This also changes the required import format to CSV.
2021-05-14 00:02:50 +02:00
Darkshredder
e5ffc59cd5 feat: Added reverse-only-search validation 2021-05-14 02:36:21 +05:30
Sarah Hoffmann
d7f9d2bde9 Merge pull request #2326 from lonvia/wokerpool-for-tiger-data
Use WorkerPool when importing Tiger data
2021-05-13 22:09:56 +02:00
Sarah Hoffmann
5feece64c1 use WorkerPool for Tiger data import
Requires adding an option that SQL errors are ignored.
2021-05-13 20:36:50 +02:00
Sarah Hoffmann
b9a09129fa move WorkerPool into db module
The pool is independent of the indexer and may also be used
by other parts of the software.
2021-05-13 17:11:17 +02:00
Sarah Hoffmann
96e6bbe3a1 Merge pull request #2325 from lonvia/do-not-precompute-postcodes
Do not preload postcodes in the legacy tokenizer
2021-05-13 17:00:29 +02:00
Frederik Ramm
fe39185894 Add array_key_last function for PHP <7.3
This patch adds an array_key_last function if it doesn't yet exist, fixes #2316. It is tested on PHP 7.2.24 but not PHP 7.3.
2021-05-13 16:42:22 +02:00
Sarah Hoffmann
fc860787dd do not preload postcodes
This is too expensive for updates.
2021-05-13 16:14:12 +02:00
Sarah Hoffmann
63e35574d4 Merge pull request #2324 from lonvia/generic-external-postcodes
Rework postcode handling and generalised external postcode support
2021-05-13 14:52:19 +02:00
Sarah Hoffmann
db2dbf15f7 fix token_info migration
A bad indent meant that only one table received the new column.
2021-05-13 14:31:41 +02:00
Sarah Hoffmann
f5977dac75 ignore invalid coordinates in external postcodes 2021-05-13 14:15:42 +02:00
Sarah Hoffmann
8f2746fe24 ignore entries without country code 2021-05-13 14:15:42 +02:00
Sarah Hoffmann
41b9bc9984 add documentation for external postcode feature 2021-05-13 14:15:42 +02:00
Sarah Hoffmann
1ccd4360b4 correctly handle removing all postcodes for country 2021-05-13 14:15:42 +02:00
Sarah Hoffmann
bf864b2c54 index postcodes after refreshing 2021-05-13 14:15:42 +02:00
Sarah Hoffmann
4abaf71234 add and extend tests for new postcode handling 2021-05-13 14:15:42 +02:00
Sarah Hoffmann
a4aba23a83 move filling of postcode table to python
The Python code now takes care of reading postcodes from placex,
enhancing them with potentially existing external postcodes and
updating location_postcodes accordingly. The initial setup and
updates use exactly the same function.

External postcode handling has been generalized. External postcodes
for any country are now accepted. The format of the external postcode
file has changed. We now expect CSV, potentially gzipped. The
postcodes are no longer saved in the database.
2021-05-13 14:15:42 +02:00
Sarah Hoffmann
cae0cf3546 Merge pull request #2322 from mtmail/type-label-already-lowercased
typelabel value is already lowercased
2021-05-12 20:25:22 +02:00
marc tobias
38f9e18afb typelabel value is already lowercased 2021-05-12 19:16:51 +02:00
AntoJvlt
9d83da830f Introduction of SPCsvLoader to load special phrases from a csv file 2021-05-10 23:26:39 +02:00
AntoJvlt
00959fac57 Refactoring loading of external special phrases and importation process by introducing SPLoader and SPWikiLoader 2021-05-10 21:49:31 +02:00
Sarah Hoffmann
40cb17d299 Merge pull request #2314 from lonvia/fix-status-no-import-date
Correctly catch the exception when import date is missing
2021-05-06 17:41:53 +02:00
Sarah Hoffmann
2ae293aeb6 Merge pull request #2312 from lonvia/icu-tokenizer
Add new tokenizer based on libICU
2021-05-06 17:22:04 +02:00
Sarah Hoffmann
d8ead78e03 correctly catch the exception when import date is missing 2021-05-06 16:27:42 +02:00
Sarah Hoffmann
b2c6eca2c8 add missing transliterations
The ICU library only offers transliterations for a limited set of
script. Add transliterations for missing scripts from the PostgreSQL
module. These means that the same selection of scripts is supported
as with the old module.
2021-05-05 21:16:55 +02:00
Sarah Hoffmann
872ab91421 fix name of transliterator
Should be different from the normalisation rules.
2021-05-05 17:09:38 +02:00
Sarah Hoffmann
a263e54b94 enable BDD tests for different tokenizers
The tokenizer to be used can be choosen with -DTOKENIZER.

Adapt all tests, so that they work with legacy_icu tokenizer.
Move lookup in word table to a function in the tokenizer.
Special phrases are temporarily imported from the wiki until
we have an implementation that can import from file. TIGER
tests do not work yet.
2021-05-05 10:31:51 +02:00
Sarah Hoffmann
18c99a5c5f add unit tests for legacy ICU tokenizer 2021-05-05 10:15:27 +02:00
Sarah Hoffmann
d55fc39275 cache translieration results 2021-05-05 10:15:27 +02:00
Sarah Hoffmann
ba8ed7967d add PHP part for new ICU-base tokenizer 2021-05-05 10:15:27 +02:00
Sarah Hoffmann
f44af49df9 add Python part for new ICU-based tokenizer 2021-05-05 10:15:27 +02:00
Sarah Hoffmann
3c67bae868 Merge pull request #2310 from RhinoDevel/master
2nd try: Add hint about replication update & recheck intervals being in seconds.
2021-05-04 12:45:26 +02:00
Marc
3dade534fd Add hint about replication update & recheck intervals being in seconds. 2021-05-04 11:47:15 +02:00
Sarah Hoffmann
8b1a509442 Merge pull request #2305 from lonvia/tokenizer
Factor out normalization into a separate module
2021-05-03 09:15:34 +02:00
Sarah Hoffmann
8bdb9aa607 mock tokenizer factory for replication tests 2021-05-01 10:50:39 +02:00
Sarah Hoffmann
36c624ec71 commit between migrations
Later migrations may require tables set up by older ones.
2021-05-01 10:47:35 +02:00
Sarah Hoffmann
7fd871a74d increase database version for tokenizer migration 2021-05-01 10:47:35 +02:00
Sarah Hoffmann
ced8f0f4a2 fix liniting issues 2021-04-30 17:59:50 +02:00
Sarah Hoffmann
388ebcbae2 move index creation for word table to tokenizer
This introduces a finalization routing for the tokenizer
where it can post-process the import if necessary.
2021-04-30 17:41:08 +02:00
Sarah Hoffmann
20891abe1c indexer: fetch extra place data asynchronously
The indexer now fetches any extra data besides the place_id
asynchronously while processing the places from the last batch.
This also means that more places are now fetched at once.
2021-04-30 17:41:08 +02:00
Sarah Hoffmann
6ce6f62b8e fetch place info asynchronously 2021-04-30 17:41:08 +02:00
Sarah Hoffmann
602728895e indexer: fetch ids in batches 2021-04-30 17:41:08 +02:00
Sarah Hoffmann
fc995ea6b9 move database check for module to tokenizer 2021-04-30 17:41:08 +02:00
Sarah Hoffmann
be6262c6ce move status test to tokenizer
The availability of the module is now tested by the tokenizer.
2021-04-30 17:41:08 +02:00
Sarah Hoffmann
893490f94e add more tests for legacy tokenizer 2021-04-30 17:41:08 +02:00
Sarah Hoffmann
044bb6afa5 move tokenization in query into tokenizer 2021-04-30 17:41:08 +02:00
Sarah Hoffmann
3eb4d88057 boilerplate for PHP code of tokenizer
This adds an installation step for PHP code for the tokenizer. The
PHP code is split in two parts. The updateable code is found in
lib-php. The tokenizer installs an additional script in the
project directory which then includes the code from lib-php and
defines all settings that are static to the database. The website
code then always includes the PHP from the project directory.
2021-04-30 11:31:52 +02:00
Sarah Hoffmann
23fd1d032a tests for legacy tokenizer 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
7cb7cf848d move amenity creation to tokenizer
The BDD tests still use the old-style amenity creation scripts
because we don't have simple means to import a hand-crafted
test file of special phrases right now.
2021-04-30 11:30:51 +02:00
Sarah Hoffmann
bef300305e move default country name creation to tokenizer
The new function is also used, when a country us updated. All SQL
function related to country names have been removed.
2021-04-30 11:30:51 +02:00
Sarah Hoffmann
dc700c25b6 cache all postcodes 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
0ba93e5ba9 reorganise address iteration in tokenizer 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
0da481f207 remove debug code 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
d75a235c1f use address tokens in SQL 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
9e92759ac7 extract address tokens in tokenizer 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
ffc2d82b0e move postcode normalization into tokenizer 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
d8ed1bfc60 move houseunumber handling to tokenizer
Normalization and token computation are now done in the tokenizer.
The tokenizer keeps a cache to the hundred most used house numbers
to keep the numbers of calls to the database low.
2021-04-30 11:30:51 +02:00
Sarah Hoffmann
d711f5a81e move name token creation into tokenizer
Name tokens are now handed in via token_info and used from there.

Also moves the generic search name insertion function back to
placex_triggers.sql.
2021-04-30 11:30:51 +02:00
Sarah Hoffmann
fa2bc60468 introduce name analyzer
The name analyzer is the actual work horse of the tokenizer. It
is instantiated on a thread-base and provides all functions for
analysing names and queries.
2021-04-30 11:30:51 +02:00
Sarah Hoffmann
e1c5673ac3 require tokeinzer for indexer 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
1b1ed820c3 introduce index for finding surrounding buildings 2021-04-30 11:30:51 +02:00
Sarah Hoffmann
a73711f3cd add extra column for tokenizer
Add a jsonb column to the placex and location_property_osmline tables
which can be used by the installed tokenizer as required. No other
part of the software will use or otherwise rely on this column.
2021-04-30 11:30:51 +02:00
Sarah Hoffmann
9397bf54b8 introduce external processing in indexer
Indexing is now split into three parts: first a preparation step
that collects the necessary information from the database and
returns it to Python. In a second step the data is transformed
within Python as necessary and then returned to the database
through the usual UPDATE which now not only sets the indexed_status
but also other fields. The third step comprises the address
computation which is still done inside the update trigger in
the database.

The second processing step doesn't do anything useful yet.
2021-04-30 11:30:51 +02:00
Sarah Hoffmann
fbbdd31399 move word table and normalisation SQL into tokenizer
Creating and populating the word table is now the responsibility
of the tokenizer.

The get_maxwordfreq() function has been replaced with a
simple template parameter to the SQL during function installation.
The number is taken from the parameter list in the database to
ensure that it is not changed after installation.
2021-04-30 11:30:51 +02:00
Sarah Hoffmann
b5540dc35c add migration for configurable tokenizer
Adds a migration that initialises a legacy tokenizer for
an existing database. The migration is not active yet as
it will need completion when more functionality is added
to the legacy tokenizer.
2021-04-30 11:29:57 +02:00
Sarah Hoffmann
296a66558f move module installation to legacy tokenizer 2021-04-30 11:29:57 +02:00
Sarah Hoffmann
af968d4903 introduce tokenizer modules
This adds the boilerplate for selecting configurable tokenizers.
A tokenizer can be chosen at import time and will then install
itself such that it is fixed for the given database import even
when the software itself is updated.

The legacy tokenizer implements Nominatim's traditional algorithms.
2021-04-30 11:29:57 +02:00
Sarah Hoffmann
5c7b9ef909 Merge pull request #2303 from lonvia/remove-aux-support
Remove support for AUX housenumber tables
2021-04-30 11:19:35 +02:00
Sarah Hoffmann
185d369404 remove support for AUX housenumber tables
These tables have never been actively maintained and the code is
completely untested. With the upcomming changes, it is unlikely
that the code remains usable.

This removes the aux tables and all code that references them.
2021-04-30 10:08:29 +02:00
Sarah Hoffmann
51d20b19b6 Merge pull request #2299 from lonvia/update-actions
Fix database check for reverse-only
2021-04-27 12:18:45 +02:00
Sarah Hoffmann
46e8c6b112 Merge pull request #2291 from AntoJvlt/special-phrases-statistics
Special phrases statistics
2021-04-27 11:57:05 +02:00
Sarah Hoffmann
c8fb25201a do not check for extra housenumber index for reverse-only
Also adds a database check for reverse only import to the CI.
2021-04-27 10:14:26 +02:00
Sarah Hoffmann
1fd483643b add tests for different scripts 2021-04-26 23:01:06 +02:00
Sarah Hoffmann
a21a0864f1 Merge pull request #2298 from lonvia/add-warming-to-ci
Add warming to CI import tests and fix more Python 3.5 compatibility issues
2021-04-26 11:21:44 +02:00
Sarah Hoffmann
4457bf7528 avoid Path in subprocess parameters
Not supported by Python 3.5.
2021-04-26 10:55:23 +02:00
Sarah Hoffmann
5ed6f18d83 add warming to CI import test 2021-04-26 09:54:09 +02:00
AntoJvlt
abb3d56b20 Switching to log info and only send warning for invalid phrases 2021-04-25 17:57:43 +02:00
AntoJvlt
c5ecb9bae0 Implemented statistics for the import of special phrases through the SpecialPhrasesImporterStatistics class 2021-04-25 17:57:43 +02:00
AntoJvlt
1b68152fb2 reorganization of folder/file for the special phrases importer 2021-04-25 17:57:42 +02:00
Sarah Hoffmann
6812f397af Merge pull request #2297 from lonvia/update-deployment-docs
docs: update deployment to use project directory
2021-04-24 15:35:00 +02:00
Sarah Hoffmann
68bd9c6091 Merge pull request #2296 from lonvia/disable-too-few-public-methods-check
pylint: disable too-few-public-methods check
2021-04-24 15:03:28 +02:00
Sarah Hoffmann
754f9e3a20 docs: update deployment to use project directory
Fixes #2295.
2021-04-24 15:00:46 +02:00
Sarah Hoffmann
b951b11336 fix pylint complaints 2021-04-24 11:59:32 +02:00
Sarah Hoffmann
89c90bedb9 pylint: disable check too-few-public-methods 2021-04-24 11:39:44 +02:00
Sarah Hoffmann
b4fe7d7c7d Merge pull request #2293 from darkshredder/update-manpage
Updated manual page
2021-04-24 09:20:28 +02:00
Sarah Hoffmann
5071710db7 Merge pull request #2294 from lonvia/update-actions
CI: add import test against Python 3.5 and fix discovered issues
2021-04-23 23:33:15 +02:00
Sarah Hoffmann
9faaf3fc88 actions: add import on ubuntu 18.04
This uses oldest possible dependencies where possible.
2021-04-23 22:50:08 +02:00
Sarah Hoffmann
9c51c133f7 indexes with includes are not available for postgresql < 11 2021-04-23 22:50:08 +02:00
Sarah Hoffmann
91d2fb6b1c use group() for regex matches
Needed for compatibility with Python 3.5.
2021-04-23 22:50:08 +02:00
Sarah Hoffmann
280406c0d7 use pathlib version of open 2021-04-23 22:50:08 +02:00
Sarah Hoffmann
d5fc3b5e99 subprocess needs string argument
Compatibility change for Python 3.5.
2021-04-23 22:50:08 +02:00
Sarah Hoffmann
f8f8c7e534 check for existance of custom .env before opening 2021-04-23 22:50:08 +02:00
Sarah Hoffmann
3a642d50a4 use more generic ImportError to check for module
ModuleNotFoundError was only introduced in Python 3.6.
2021-04-23 22:50:08 +02:00
Sarah Hoffmann
9685c68e30 replace usages of fromisoformat() with strptime()
fromisoformat was only introduced with Python 3.7 while we
still support Python 3.5.

Fixes #2292.
2021-04-23 22:50:08 +02:00
Sarah Hoffmann
95e6ec091b remove argparse dependency for vagrant scripts
Users don't need to recreate the manpage.
2021-04-23 22:50:08 +02:00
Darkshredder
34f5e4a199 Updated manual page 2021-04-24 01:42:38 +05:30
Sarah Hoffmann
788baafa26 bdd tests: fix place dependen ranking tests
The ranks of places may differ for some countries. Force the
place nodes in the test on null island which always uses the
default ranking.
2021-04-22 17:31:00 +02:00
Sarah Hoffmann
4c31813398 Merge pull request #2288 from RhinoDevel/patch-1
Replace "nominatim-update" with "nominatim".
2021-04-22 17:12:25 +02:00
RhinoDevel
b7bae80616 Replace "nominatim-update" with "nominatim".
If I am not mistaken, the correct command to index imported data via commandline is "nominatim index".
2021-04-22 15:40:22 +02:00
Sarah Hoffmann
f7e4aa51d3 indexer: reset query counter
Reset the counter for queries after the asynchronous connections
have been reopened.
2021-04-21 10:33:45 +02:00
Sarah Hoffmann
696c50459f Merge pull request #2285 from lonvia/split-indexer-code
Rework indexer code
2021-04-20 15:34:14 +02:00
Sarah Hoffmann
50b6d7298c factor out async connection handling into separate class
Also adds a test for reconnecting regularly while indexing.
2021-04-20 14:08:37 +02:00
Sarah Hoffmann
26a81654a8 indexer: make self.conn function-local
Also switches to our internal connect function which gives us
a cursor with a sclar() function.
2021-04-20 14:08:37 +02:00
Sarah Hoffmann
6430371d7d make index() function private 2021-04-20 14:08:37 +02:00
Sarah Hoffmann
18705b3f18 move analyse function into indexinf function 2021-04-20 14:08:37 +02:00
Sarah Hoffmann
c6bd2bb7fb indexer: move runner into separate file 2021-04-20 14:08:37 +02:00
Sarah Hoffmann
c4fd94bd1a Merge pull request #2284 from lonvia/cleanup-word-frequency-computation
Rename and simplify function for word pre-computation
2021-04-19 18:28:04 +02:00
Sarah Hoffmann
b88b952f56 simplify token precomputation
Rename function to reflect that it is only used for precomputation.
The token IDs are not really needed, so don't bother to compute
the array of tokens.
2021-04-19 17:24:19 +02:00
Sarah Hoffmann
d68b02d36a remove unused word recomputation script
Has been replaced by a script recomputing counts from search_name.
2021-04-19 16:40:57 +02:00
Sarah Hoffmann
b9b85eb208 Merge pull request #2283 from darkshredder/tiger-data-test-fix
Fix: tiger-data tarfile test
2021-04-19 13:56:36 +02:00
Darkshredder
1f898405a6 Fix: tiger-data tarfile test 2021-04-19 16:02:52 +05:30
Sarah Hoffmann
6f6910101e Merge pull request #2282 from lonvia/add-paths-to-config
Include software paths in Python config object
2021-04-19 12:14:25 +02:00
Sarah Hoffmann
79d55357e8 simplify sql and website creation functions 2021-04-19 10:53:30 +02:00
Sarah Hoffmann
4fa6c0ad53 simplify constructor for SQL preprocessor
Use sql path from config.
2021-04-19 10:26:25 +02:00
Sarah Hoffmann
8f63f9516b simplify interface for adding tiger data
Also simplifies tests using existing fixtures.
2021-04-19 10:26:25 +02:00
Sarah Hoffmann
995ba2c7c2 add library directories to config
Allows to reduce the number of parameters in functions that take
the config anyway.
2021-04-19 10:26:25 +02:00
Sarah Hoffmann
830e3be1e6 Merge pull request #2281 from changpingc/changping/fix-tiger-index
fix index on location_property_tiger (parent_place_id)
2021-04-19 08:42:59 +02:00
Channgping Chen
29a314a092 fix index on location_property_tiger (parent_place_id)
Looks like 2af82975cd
accidentally renamed an index. Because of the added "if not
exists" clause, the index doesn't get created. This
significantly slows down reverse queries because they now
require full scans on location_property_tiger.

Without this fix, reverse queries can take 8s on a full
planet install on an r5.8xlarge instance in EC2.
2021-04-19 00:33:15 +00:00
Sarah Hoffmann
abdba5fdc7 Merge pull request #2280 from AntoJvlt/Fix-special-phrases-import-and-tests-cleaning
Fix regex and sanity check for the import of special phrases and tests cleaning.
2021-04-18 11:57:19 +02:00
AntoJvlt
b2ae715699 Only log a warning if a wrong input is detected on the wiki while importing special phrases 2021-04-17 20:19:39 +02:00
AntoJvlt
a95c748363 Fix occurence regex 2021-04-17 19:24:13 +02:00
AntoJvlt
ec859e41c6 Cleaned tests and add database cleaning tests on test_import_from_wiki 2021-04-17 19:23:33 +02:00
Sarah Hoffmann
7aeae9da81 Merge pull request #2279 from lonvia/add-index-for-continued-indexing
Add index for continued indexing
2021-04-17 11:51:21 +02:00
Sarah Hoffmann
2ca11ccc6b add tests for continuing import 2021-04-17 11:10:36 +02:00
Sarah Hoffmann
d74ae669e3 add support index when continuing import at index phase
Indexing scans the placex table sequentially during indexing
on the initial import. That is okay because we know that all
rows need to be processed anywhere. When continuing the import,
however, a large part might already be indexed, so that the
process spends a lot of time going through rows that are no
longer of interest. Create a supporting index for all unindexed
rows to speed up the scan. This is the same index as used later
for updates.
2021-04-17 11:07:04 +02:00
Sarah Hoffmann
9fabc5572d Merge pull request #2278 from lonvia/remove-transistion-functions
Remove transition functions
2021-04-17 10:13:33 +02:00
Sarah Hoffmann
da98a2102a remove transition functions from Python 2021-04-16 18:41:14 +02:00
Sarah Hoffmann
fb3353b854 Merge pull request #2277 from lonvia/update-osm2pgsql
Update osm2pgsql to current master
2021-04-16 17:40:43 +02:00
Sarah Hoffmann
b7e5c54593 remove PHP code for transition functions 2021-04-16 17:28:51 +02:00
Sarah Hoffmann
68beec5590 remove installation of PHP util scripts 2021-04-16 17:09:40 +02:00
Sarah Hoffmann
6ba06d1eb4 Merge pull request #2276 from lonvia/port-country-code-creation-to-python
Port country code creation to python
2021-04-16 16:57:04 +02:00
Sarah Hoffmann
0f11e311c4 add test for new postcode import function 2021-04-16 16:11:20 +02:00
Sarah Hoffmann
886a01c796 port function to compute initial postcodes to Python 2021-04-16 16:11:20 +02:00
Sarah Hoffmann
a632b9f86a Merge pull request #2275 from lonvia/switch-to-absolute-imports
Use absolute imports in Python code
2021-04-16 15:04:10 +02:00
Sarah Hoffmann
76b1885595 use absolute imports in Python code
Relative imports are no longer officially recommended.
2021-04-16 14:20:09 +02:00
Sarah Hoffmann
c55b409cf6 update osm2pgsql to current master (fixes version output) 2021-04-15 10:24:01 +02:00
Sarah Hoffmann
c64193f839 Merge pull request #2263 from AntoJvlt/special-phrases-autoupdate
Implemented auto update of special phrases while importing them
2021-04-15 10:13:25 +02:00
Sarah Hoffmann
28a2a795ba Merge pull request #2270 from lonvia/simplify-place-boundary-merge
Simplify matching between place and boundary names
2021-04-15 10:12:53 +02:00
Sarah Hoffmann
e90adfc7c3 adapt database check to new index layout 2021-04-14 17:52:59 +02:00
Sarah Hoffmann
16267dc021 add migration for new placenode geometry index 2021-04-14 17:52:59 +02:00
Sarah Hoffmann
e7266b52ae simplify name matching between boundary and place node
Instead of normalising the names simply compare them in lower
case. This removes the dependency on the tokenizer for
linking boundaries and nodes. When looking up the linked places
by place type also allow that one name is simply contained in the
other. This catches the frequent case where one of the names has
an addendum (e.g. Newport vs. City of Newport).

Drops the special index for the name lookup and insted relies
on a slightly extended version of the geometry index used for
reverse lookup. Saves around 100MB on a planet.
2021-04-14 17:52:59 +02:00
Sarah Hoffmann
dc02610408 Merge pull request #2269 from lonvia/fix-actions
github actions: reintroduce postgresql repo
2021-04-14 17:50:02 +02:00
Sarah Hoffmann
dc1bfe4a93 github actions: reintroduce postgresql repo 2021-04-14 17:25:44 +02:00
Sarah Hoffmann
cf69daaafb Merge pull request #2264 from darkshredder/tiger-data-tests
Fix:  Error if last statements is wrong and improved tests in tiger data import
2021-04-14 10:56:12 +02:00
Darkshredder
49ee7505ed Fix: Removed error if endstatement is wrong and improved tests 2021-04-13 15:44:12 +05:30
AntoJvlt
ae2b2cb9a5 Tests added for the auto update of special phrases during import 2021-04-12 14:35:29 +02:00
AntoJvlt
8c2f287ce4 Implemented auto update of special phrases while importing them 2021-04-12 14:30:48 +02:00
Sarah Hoffmann
2351f36315 Merge pull request #2260 from AntoJvlt/fix-load-languages-special-phrases
Fix default languages loading for special phrases import
2021-04-11 23:09:45 +02:00
AntoJvlt
5ecae10713 Fix default languages loading 2021-04-11 22:26:31 +02:00
Sarah Hoffmann
2e3d657794 Merge pull request #2258 from darkshredder/code-coverage
Disabled Code coverage status checks
2021-04-10 21:19:55 +02:00
Darkshredder
90f990b806 CodeCov comment only when codecoverage changes 2021-04-10 22:28:29 +05:30
Darkshredder
7666d48409 Disabled Coverage status checks 2021-04-10 20:44:52 +05:30
Sarah Hoffmann
be4cb190e8 add badge for codecov 2021-04-10 16:57:39 +02:00
Sarah Hoffmann
2f4eca8c46 Merge pull request #2252 from darkshredder/code-coverage
Added Code coverage support using Codecov
2021-04-10 16:37:12 +02:00
Sarah Hoffmann
71564fa1de split LANGUAGES parameter before use
The user supplies the languages as a comma-separated list.
2021-04-09 17:48:28 +02:00
Sarah Hoffmann
ce08cb6cd7 add migration information for new configuration format 2021-04-08 11:01:46 +02:00
Sarah Hoffmann
1f0cf6311a Merge pull request #2256 from lonvia/remove-reverseinplan-option
Remove ReverseInPlan option
2021-04-08 10:54:16 +02:00
Sarah Hoffmann
1db468b6c3 remove special handling for reversed queries in getGroupedSearches
getGroupedSearches is guaranteed not to be called with reversed
structured queries, so there is no need to have special exclusion
code.
2021-04-08 10:35:14 +02:00
Sarah Hoffmann
534de5ba81 remove reverseInPlan option from Geocode
Disabling query reversal is no longer possible in the configuration,
so there is no need to keep this as an option. Reversal is
automatically disabled for structured search only.
2021-04-08 10:19:27 +02:00
Sarah Hoffmann
492186716f prepare 3.7.0 release 2021-04-06 21:23:29 +02:00
Sarah Hoffmann
07fda48cee docs: minor spelling corrections 2021-04-06 16:09:53 +02:00
Sarah Hoffmann
4b31be5203 docs: unpacking tiger data is no longer necessary 2021-04-06 15:56:08 +02:00
Sarah Hoffmann
5d69c7ade1 Merge pull request #2250 from lonvia/save-transliterated-housenumbers
Switch to saving transliterated housenumbers in placex
2021-04-05 15:48:22 +02:00
Darkshredder
2bfea15fdc Fixed BDD tests coverage reports 2021-04-05 06:30:31 +05:30
Sarah Hoffmann
96b0699621 add migration for transliterated housenumbers 2021-04-04 15:26:47 +02:00
Sarah Hoffmann
6cbef84cad use new transliteration in initial housenumber word computation
The new create_housenumber_id() function splits housenumber
lists correctly. Otherwise there is no difference.
2021-04-04 15:26:47 +02:00
Sarah Hoffmann
55fcc44c8c correctly handle housenumber lists
Lists are now standardised to use a semicolon separator.
2021-04-04 15:26:47 +02:00
Sarah Hoffmann
16a66b5326 move transliteration of housenumbers into indexing
Housenumbers are now saved in transliterated form in the housenumber
column. This saves the transliteration step during lookup.
2021-04-04 15:26:47 +02:00
Sarah Hoffmann
3590e76a1c tests for finding non-ascii housenumbers 2021-04-04 15:26:47 +02:00
Sarah Hoffmann
0ec3fdd3ba return housenumbers always from address field
This means that we can use normalized versions of the
housenumber in the housenumber field as it is no longer
a user visible field.
2021-04-04 15:26:47 +02:00
Sarah Hoffmann
c0f0b66509 Merge pull request #2248 from darkshredder/special-term-test
Added Test for TokenSpecialTerm
2021-04-03 18:31:01 +02:00
Darkshredder
0f9df32d11 Added Test for TokenSpecialTerm 2021-04-02 04:49:05 +05:30
Sarah Hoffmann
a370c8be4b Merge pull request #2247 from lonvia/index-for-housenumber-lookup
Index for housenumber lookup
2021-04-01 18:35:00 +02:00
Sarah Hoffmann
d6e0bc698e add recommendation for Postgresql 11+ 2021-04-01 17:10:44 +02:00
Sarah Hoffmann
8d8b1d4307 use non-key index to speed up housenumber search
On Postgresql versions 11+ add an index to speed up the lookup
of housenumbers for terms found in search_name. This is really
just a band-aid around the query planer's interpretation of the
query.
2021-04-01 17:10:44 +02:00
Darkshredder
771b3377c0 Added code-cov Support for Code Coverage 2021-03-31 05:00:03 +05:30
Sarah Hoffmann
8dbfdd59b0 Merge pull request #2243 from darkshredder/XML-format-fix
Fixed: XML format: more_url points to localhost, not base URL
2021-03-30 09:19:01 +02:00
Sarah Hoffmann
cd03882536 Merge pull request #2244 from AntoJvlt/import-special-phrases-tests-cleaning
Cleaned tests for special phrases.
2021-03-30 09:17:27 +02:00
Darkshredder
0b154a2a1a Added HTTP_HOST to if statement 2021-03-30 03:02:55 +05:30
AntoJvlt
e82de99e5a Cleaned tests of exceptions and fix phrase_settings.json test file name. 2021-03-29 22:07:29 +02:00
Darkshredder
27b379c1e3 fixed: XML format: more_url points to localhost, not base URL 2021-03-30 01:02:43 +05:30
Sarah Hoffmann
f9517e9143 Merge pull request #2234 from darkshredder/add-man-page
Added Manual page for Nominatim tool
2021-03-29 14:25:10 +02:00
Sarah Hoffmann
e05dee6df5 allow sorting by housenumbers for rare street names
Usually we don't narrow down search results by house number when
only a street name is given because there may be a lot of rows
to cross check when the street name is very frequent. However,
when it is known to be rare, the housenumber check may be done
anyway.

Fixes #2238.
2021-03-29 12:06:51 +02:00
Darkshredder
3fad492c6f Update manpage after rebase 2021-03-29 14:27:06 +05:30
Darkshredder
b7d6ae93e3 Nominatim/cli.py rebase fixes 2021-03-29 14:16:41 +05:30
Darkshredder
21b1b75b08 Rebase with master 2021-03-29 14:00:45 +05:30
Darkshredder
bbe0353b23 fixed indentation and used sed to remove AUTHORS section 2021-03-29 13:57:13 +05:30
Darkshredder
51e2654cd2 Added Manual page and fixed documentation 2021-03-29 13:57:13 +05:30
Sarah Hoffmann
09b2510219 Merge pull request #2228 from AntoJvlt/import-special-phrases-porting-python
Import special phrases porting python
2021-03-29 09:49:35 +02:00
AntoJvlt
57ce75eb67 Change command 'import-special-phrases --from-wiki' to 'special-phrases --import-from-wiki'. 2021-03-26 02:22:38 +01:00
AntoJvlt
cde9389e75 Errors fixes, Cleaning code, Improvement and addition of tests 2021-03-26 01:53:33 +01:00
AntoJvlt
2c19bd5ea3 Encapsulation of tools/special_phrases.py into SpecialPhrasesImporter class and add new tests. 2021-03-25 21:13:57 +01:00
AntoJvlt
ff34198569 Code cleaning, tests simplification and use of python3-icu package 2021-03-23 23:56:39 +01:00
AntoJvlt
919469c8fe Updated documentation for PyICU support 2021-03-23 23:34:19 +01:00
AntoJvlt
1ce8b530cd Introduction of PyICU for transliteration in python. Reversed changes in normalization.sql. 2021-03-23 23:34:16 +01:00
AntoJvlt
2fb6018078 Added wrapper in specialphrases.php to call corresponding nominatim command. 2021-03-23 23:30:42 +01:00
AntoJvlt
6d56cbb3e8 Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file. 2021-03-23 23:30:39 +01:00
AntoJvlt
1a93319093 Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file. 2021-03-23 23:27:56 +01:00
Sarah Hoffmann
28b4fb12b6 Merge pull request #2233 from lonvia/index-for-postcode-ids
Create postcode id index earlier
2021-03-23 09:18:10 +01:00
Sarah Hoffmann
5dabc0aca8 create postcode id index earlier
Now that the indexer takes care of indexing the postcode tables,
the id index is needed to find the rows to index.
2021-03-22 22:24:56 +01:00
Sarah Hoffmann
4f1bdde32e Merge pull request #2231 from mtmail/correct-cli-help-page
nominatim -h was printing wrong text for lookup and details
2021-03-21 16:52:20 +01:00
Sarah Hoffmann
a08ca5b1b5 avoid division by zero in progress meter
On Windows systems the timer may not be accurate enough to measure
the time between init() and done(). Avoid computing statistics with
a diff time of 0 in such cases.

Fixes #2230.
2021-03-21 16:47:22 +01:00
marc tobias
87d5883ddb nominatim -h was priting wrong text for lookup and details 2021-03-21 16:06:41 +01:00
AntoJvlt
d5acade4db Deleted specialphrases.php and phrase_settings.php 2021-03-20 19:48:05 +01:00
AntoJvlt
9d1c23e4f5 Updated specialphrases_testdb.sql 2021-03-20 19:17:03 +01:00
AntoJvlt
17cb59efbd Ported functions for the import of special phrases from php to python.
- the command is now --import-special-phrases
- the output is not an sql file anymore, data are directly imported to the database.
- the little part on the documentation (section data import) has been modified.
2021-03-20 19:11:50 +01:00
Sarah Hoffmann
118befd7d7 bdd tests: make indexing less verbose
Do not print progress info for indexing when there is an error
in the BDD tests.
2021-03-20 10:39:29 +01:00
Sarah Hoffmann
0d9fe6e49c Merge pull request #2219 from lonvia/bdd-test-remove-php
BDD tests: run all setup via nominatim Python library
2021-03-17 11:40:34 +01:00
Sarah Hoffmann
ebae3553e0 bdd: run all setup via nominatim Python library
Drops all calls to PHP utility functions. nominatim cli functions
are used where possible, to stay as close to the final code as
possible with the tests.

By removing the PHP calls, the test code now only uses osm2pgsql and
the database module from the build directory.
2021-03-16 22:20:41 +01:00
Sarah Hoffmann
d3ff831b8a Merge pull request #2216 from lonvia/fix-reverse-interpolation
Reverse: do not prefer interpolations over closer housenumbers
2021-03-15 14:08:54 +01:00
Sarah Hoffmann
4d7c5ec089 reverse: do not prefer interpolations over closer housenumbers
Always look up the closest housenumber before looking up
interpolations. This ensures that closer housenumbers are
preferred over interpolations.

Fixes #2214.
2021-03-15 10:50:04 +01:00
Sarah Hoffmann
81a6b746b8 Merge pull request #2212 from darkshredder/country-name
Ported createCountryNames() to python and Added tests
2021-03-15 09:36:06 +01:00
Darkshredder
f356a75a24 Add setup.php 2021-03-14 15:02:30 +05:30
Sarah Hoffmann
7212fa8630 fix template variable name 2021-03-13 12:05:53 +01:00
Sarah Hoffmann
6cabc44841 Merge pull request #2213 from lonvia/tweak-search-weights
Some more tweaking of the ranking of search interpretations
2021-03-12 15:47:36 +01:00
Darkshredder
b108bd1c1e Linting fix 2021-03-12 18:28:47 +05:30
Darkshredder
077a8c1f95 refactored tests and made changes to code for easy readibility 2021-03-12 18:23:20 +05:30
Darkshredder
7a874d5b97 Ported createCountryNames() to python and added tests 2021-03-12 10:28:41 +05:30
Sarah Hoffmann
9086a794a1 Merge pull request #2204 from darkshredder/tiger-data
Ported tiger-data-import to Python and Added Tarball Support
2021-03-11 22:48:38 +01:00
Sarah Hoffmann
6dd2b9c2ec do not mix partial names with other words
As soon as a housenumber, postcode, etc. appear, the name term
must obviously be closed and no further partial terms can be
appended.
2021-03-11 22:44:49 +01:00
Sarah Hoffmann
3fbe4511f9 make linter happy 2021-03-11 21:14:23 +01:00
Sarah Hoffmann
3933fc3ad3 avoid multi-term partials in names
Names are either full words or single-word partial names.
Searching for multi-word partials yields exactly the same
result as with full words.
2021-03-11 20:42:37 +01:00
Sarah Hoffmann
00b05e2394 higher penalty for special searches
Adds a general higher penalty for special search term and an
additional one if the term is anywhere but the beginning or the
end. Also housenumbers and special searches together are less
likely.
2021-03-11 20:37:51 +01:00
Sarah Hoffmann
d5e8c5e975 do not mix partial and full name terms
If NameNonSearch already contains a partial term, then a
full term must not be added to the Name list anymore.
2021-03-11 20:22:54 +01:00
Sarah Hoffmann
478dfb0639 add one-rank penalty for using partial search
Ensures that full matches are preferred over partial ones even when
the full word consists of only one term.
2021-03-11 17:52:44 +01:00
Sarah Hoffmann
f498e40208 fix result splitting for last search group
When we are in the final iteration of the search groups, it is not
possible to further delay the results. Unconditionally use the
results with the best rank instead.
2021-03-11 17:14:46 +01:00
Sarah Hoffmann
182f5f5d7b give preference to full words in address, too
Full word terms are already preferred for the name part. Adding
only one-word partials to the address, makes it impossible to
give a similar preference for the address part. Each term adds
a rank penalty. The problem here is that we interpret the query
forwards and backwards. Having different penalty systems for
name and address means that the same term ends up with different
penalties and that often leads to interpretations of the wrong
direction being in the way.
2021-03-11 15:03:36 +01:00
Darkshredder
e5719de657 Added fixture for sql_preprocessor and fixed some issues 2021-03-11 15:39:17 +05:30
Darkshredder
8486a83cf5 Added test for tarfile 2021-03-10 18:14:17 +05:30
Darkshredder
ccfad57fca Added test and removed runlegacyscript 2021-03-10 17:18:12 +05:30
Darkshredder
64128b699a fixed linting, refactored threaded sql handling and removed importTigerData() function 2021-03-10 13:28:29 +05:30
Darkshredder
4080fbb95c Test fixes 2021-03-09 01:00:56 +05:30
Darkshredder
14ec83c886 Linting fixes 2021-03-08 23:10:49 +05:30
Darkshredder
122c4618b9 Linting fixes 2021-03-08 22:59:51 +05:30
Darkshredder
2af82975cd Ported tiger-data-import to python and Added Tarball Support 2021-03-08 21:57:56 +05:30
Sarah Hoffmann
35f4695b67 Merge pull request #2200 from lonvia/migrations-for-current-version
Introduce a command for database migration
2021-03-08 10:14:03 +01:00
Sarah Hoffmann
3c9e09545e documentation for new migration command 2021-03-06 16:38:37 +01:00
Sarah Hoffmann
764a41b973 automatic migration from 3.6 release
Adds a 'admin --migrate' command that checks for the current
database version and runs any necessary migrations. Also
has migrations going back to 3.6.
2021-03-06 16:36:57 +01:00
Sarah Hoffmann
9d103503f7 Merge pull request #2197 from lonvia/use-jinja-for-sql-preprocessing
Use jinja2 for SQL preprocessing
2021-03-04 16:36:18 +01:00
Sarah Hoffmann
09f4d767e4 port index creation to python
Also switches to jinja-based preprocessing, which allows to
simplify the SQL files. Use 'if not exists' where possible
so that the step can be rerun to fix missing indexes.
2021-03-04 11:11:47 +01:00
Sarah Hoffmann
dd301cf5ac indexer: ANALYSE must be run outside transactions 2021-03-04 11:06:33 +01:00
Sarah Hoffmann
eacabb0e96 move table creation to jinja-based preprocessing 2021-03-03 22:07:51 +01:00
Sarah Hoffmann
6cda021d9b add new jinja2 requirement 2021-03-03 17:51:08 +01:00
Sarah Hoffmann
d2bd6aa78d introduce jinja2 for preprocessing SQL
Replaces various hand-crafted replacements of varying format with
a single Jinja2 templating mechanism. Allows full access to
configuration if necessary.
2021-03-03 17:51:08 +01:00
Sarah Hoffmann
6b306f30b6 Merge pull request #2194 from grischard/patch-1
Fix typo in .github/actions/build-nominatim/action.yml
2021-03-03 11:34:12 +01:00
Guillaume Rischard
c48fd18344 Update action.yml 2021-03-03 11:20:21 +01:00
Sarah Hoffmann
8ea7e04363 Merge pull request #2192 from lonvia/database-versioning
Introduce database versioning
2021-03-02 15:57:46 +01:00
Sarah Hoffmann
32c2d2b248 document new status fields 2021-03-01 22:21:37 +01:00
Sarah Hoffmann
111cca8c9a return database version with status API 2021-03-01 22:17:16 +01:00
Sarah Hoffmann
7ae9c3a9f0 add database_version setting to tests 2021-03-01 21:49:33 +01:00
Sarah Hoffmann
bf4320a7d6 do not depend on cmdline parameter for creating partition tables
The partition numbers in use only depend on the entries in search_name.
2021-03-01 21:28:39 +01:00
Sarah Hoffmann
3a0a4b9175 save software version in the database
The version represents the software version that was used to
import the data.
2021-03-01 20:35:15 +01:00
Sarah Hoffmann
4faefe156c report software version of status call 2021-03-01 16:47:19 +01:00
Sarah Hoffmann
86273f5e2a introduce database patch level for version
This will be needed later for automatic migrations.
2021-03-01 16:46:19 +01:00
Sarah Hoffmann
b4f64aa770 make sure that calls to PHP legacy scripts are fatal on error 2021-03-01 16:10:45 +01:00
Sarah Hoffmann
976c5e9121 introduce table for in-database properties
Adds a simple table where settings for the database can be
saved. This is useful for state that must not change after
import.
2021-03-01 16:09:17 +01:00
Sarah Hoffmann
db663dd92f remove unused import 2021-03-01 09:26:08 +01:00
Sarah Hoffmann
90a5d23016 use tmp_path fixture in config tests 2021-03-01 09:24:04 +01:00
Sarah Hoffmann
99e35d256a fix typo 2021-03-01 09:07:49 +01:00
Sarah Hoffmann
e14e7c6235 Merge pull request #2186 from lonvia/port-import-to-python
Move setup procedure to Python
2021-02-27 12:09:23 +01:00
Sarah Hoffmann
b46adbad22 make sure psql always finishes
If an execption is raised by other means, we still have to close
the stdin pipe to psql to make sure that it exits and releases its
connection to the database.
2021-02-27 10:24:40 +01:00
Sarah Hoffmann
afabbeb546 older versions of Postgresql need explicit return type 2021-02-27 09:46:42 +01:00
Sarah Hoffmann
d14a3df10f do not truncate search_name in reverse-only mode 2021-02-27 09:46:42 +01:00
Sarah Hoffmann
9feb84e426 actions: add psutil dependency 2021-02-26 16:50:09 +01:00
Sarah Hoffmann
c7f40e3cee fix verbose flag for PHP wrapper scripts
The flag must come after the command.
2021-02-26 16:49:32 +01:00
Sarah Hoffmann
dd03aeb966 bdd: use python library where possible
Replace calls to PHP scripts with direct calls into the
nominatim Python library where possible. This speed up
tests quite a bit.
2021-02-26 16:14:29 +01:00
Sarah Hoffmann
15b5906790 move setup function to python
There are still back-calls to PHP for some of the sub-steps.
These needs some larger refactoring to be moved to Python.
2021-02-26 15:02:39 +01:00
Sarah Hoffmann
3ee8d9fa75 properly close connections of indexer after use 2021-02-26 12:10:54 +01:00
Sarah Hoffmann
57db5819ef prot load-data function to python 2021-02-25 21:32:40 +01:00
Sarah Hoffmann
3c186f8030 add a function for the intial indexing run
Also moves postcodes to fully parallel indexing.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
db5e78c879 remove unused partitionfunction function 2021-02-25 18:42:54 +01:00
Sarah Hoffmann
c7fd0a7af4 port wikipedia importance functions to python 2021-02-25 18:42:54 +01:00
Sarah Hoffmann
32683f73c7 move import-data option to native python
This adds a new dependecy to the Python psutil package.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
7222235579 introduce custom object for cmdline arguments
Allows to define special functions over the arguments.

Also splits CLI tests in two files as they have become too many.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
f6e894a53a port database setup function to python
Hide the former PHP functions in a transition command until
they are removed.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
b93ec2522e use psql for executing sql files
This allows to run larger files without needing to keep
them in memory.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
af7226393a add function to set up libpq environment
Instead of parsing the DSN for each external libpq program we
are going to execute, provide a function that feeds them all
necessary parameters through the environment.

osm2pgsql is the first user.
2021-02-25 18:42:54 +01:00
Sarah Hoffmann
e520613362 convert connect() into a context manager 2021-02-25 18:42:54 +01:00
Sarah Hoffmann
204fe20b4b Merge pull request #2185 from lonvia/fix-deadlock-handling-for-psycopg27
Improve deadlock detection for various versions of psycopg2
2021-02-25 18:39:40 +01:00
Sarah Hoffmann
a1f0fc1a10 improve deadlock detection for various versions of psycopg2
Psycopg2 has changed the kind of exception that is emitted on
deadlocks between versions 2.7 and 2.8. The code was already
trying to catch both kind of errors but because the
psycopg2.errors package is unknown in 2.7 and below, the
code would throw an exception on anything but a deadlock error.

This commit wraps the deadlock handling into a context manager
to avoid code duplication and uses module imports to detect if
the new error codes are available.

Also sets the required psycopg2 version to 2.7 or bigger as
versions below are difficult to test.
2021-02-25 18:11:16 +01:00
Sarah Hoffmann
68c3862270 Merge pull request #2182 from lonvia/change-error-for-details
Return 404 for details when no object is found in database
2021-02-23 09:09:35 +01:00
Sarah Hoffmann
5b7483ada5 return 404 for details when no bject is found in database
Fixes #2157.
2021-02-22 16:28:29 +01:00
Sarah Hoffmann
72b01148d2 Merge pull request #2181 from lonvia/port-more-tool-functions-to-python
Port more tool functions to python
2021-02-22 16:11:21 +01:00
Sarah Hoffmann
971df231b0 avoid os.environ as default valie 2021-02-19 19:29:57 +01:00
Sarah Hoffmann
4b32cbe518 fix return code for check database run with 'not applicable' 2021-02-19 18:32:00 +01:00
Sarah Hoffmann
f08078ccca bdd tests: directly call python code for setup-website 2021-02-19 18:20:55 +01:00
Sarah Hoffmann
389138abfe port setup-website to python 2021-02-19 17:51:06 +01:00
Sarah Hoffmann
a0ae4945cd add unit tests for new check_database code 2021-02-18 20:36:11 +01:00
Sarah Hoffmann
b169e4c88c port check-database function to python
This change also adapts the hints to use the nominatim tool.
Slightly changed checks, so that they are just as effective on
a frozen database.
2021-02-18 17:32:30 +01:00
Sarah Hoffmann
a60c34bded use a frozen DB for API tests
This way we also test that dropping does the right thing.
2021-02-17 22:35:27 +01:00
Sarah Hoffmann
153dbb71b8 remove unused code 2021-02-17 22:25:23 +01:00
Sarah Hoffmann
101a1f895d port freeze function to python 2021-02-17 21:43:15 +01:00
Sarah Hoffmann
bd27310c68 Merge pull request #2173 from lonvia/penality-for-housenumberless-places
Increase penalty for places without housenumber
2021-02-17 17:52:59 +01:00
Sarah Hoffmann
42ecd535b3 Merge pull request #2174 from lonvia/disable-jit-for-osm2pgsql-again
Disable JIT and parallel execution for osm2pgsql updates again
2021-02-16 21:32:57 +01:00
Sarah Hoffmann
c9838a02ce disable JIT and parallel execution for osm2pgsql updates again
The gazetteer output doesn't disable these functions when
writing to the place table but the triggers may contain
operations that cause misplanning for the query planner.
2021-02-16 18:23:47 +01:00
Sarah Hoffmann
7ebcf602ac add simple test for result splitting with multiple ranks 2021-02-16 17:59:12 +01:00
Sarah Hoffmann
8eb85f1340 increase penalty for places without housenumber
Results where the housenumber was dropped are an unlikely result
when they refer to something other than a street. Therefore
increase their result rank so that other matches are tried first
before choosing them as a result.

Improves #2167.
2021-02-16 17:47:06 +01:00
Sarah Hoffmann
2a8e3741fa Merge pull request #2166 from mtmail/tiger-2020
documentation: 2020 TIGER data got released
2021-02-16 14:45:14 +01:00
Sarah Hoffmann
684378722c Merge pull request #2171 from lonvia/update-vagrant-scripts-for-make-install
Update vagrant scripts for make install
2021-02-16 14:42:38 +01:00
Sarah Hoffmann
286a686f88 switch vagrant scripts to make install 2021-02-16 12:04:34 +01:00
Sarah Hoffmann
7360e6c5df use file copy on older cmake to install osm2pgsql
Fixes #2170.
2021-02-16 11:06:14 +01:00
Sarah Hoffmann
fbe7be760b ignore failure to get replication date 2021-02-14 12:17:30 +01:00
marc tobias
a3ce89aeff documentation: 2020 TIGER data got released 2021-02-12 23:57:12 +01:00
Sarah Hoffmann
6a7e0d652b Merge pull request #2164 from lonvia/add-make-install
Make Nominatim installable
2021-02-12 10:32:26 +01:00
Sarah Hoffmann
7cc4c53adb always return 0 for updates unless there is an error
This is more in line with previous behavioru than returning
a status code when no updates are available.
2021-02-11 10:33:49 +01:00
Sarah Hoffmann
24b13a7a87 docs: adapt check-database command 2021-02-10 21:55:04 +01:00
Sarah Hoffmann
b6c2dbf69c actions: remove install directories before import
This ensures that any dangling references to the build
or source directory are caught by the CI.
2021-02-10 17:59:52 +01:00
Sarah Hoffmann
0e0e9a6809 need test database for analysing cli test 2021-02-10 16:19:51 +01:00
Sarah Hoffmann
ed60154552 actions: test import with installed version of Nominatim 2021-02-10 16:17:52 +01:00
Sarah Hoffmann
85589cf7dc add 'make install' to installation instructions 2021-02-10 11:15:21 +01:00
Sarah Hoffmann
99dcd10d3f test for existance of country grid in cmake already
Given that the file potentially gets installed, it needs to be
present during build time already. Checking during the import
is too late.
2021-02-10 10:40:36 +01:00
Sarah Hoffmann
745ae02f47 make installation targets conditional to what is built 2021-02-10 10:04:07 +01:00
Sarah Hoffmann
b6bd11f292 add make install target
Installation includes PHP andPython libraries, settings, the basic
country data, the postgresql module and our custom version of
osm2pgsql. The latter is installed in our private library directory
so that it does not get in the way of a potentially installed
osm2pgsql from the distribution.
2021-02-09 21:04:42 +01:00
Sarah Hoffmann
c60a0784ea adapt unit tests to new directory structure 2021-02-09 20:13:00 +01:00
Sarah Hoffmann
3cb6f3e460 use DataDir constant for data only
So far the data directory constant has pointed to the source
directory to be usable with different subdirectories. Now only
the data subdirectory itself is being used with the constant,
so point to the directory directly.
2021-02-09 20:04:08 +01:00
Sarah Hoffmann
de37dc9300 forgot to replace one occurence of sql_dir 2021-02-09 19:32:05 +01:00
Sarah Hoffmann
8ffd7d9243 remove unused BINDIR constant 2021-02-09 19:30:31 +01:00
Sarah Hoffmann
298ed11261 introduce constant for configuration directory
This replaces {data_dir}/settings throughout the code, so that
the configuration may be placed somewhere else in the directory
structure (e.g. in /etc).
2021-02-09 18:45:45 +01:00
Sarah Hoffmann
b9517c99ae rename sql directory to lib-sql
Also introduces a separate constant for the sql directory, so that
it can be put separately from the rest of the data if required.
2021-02-09 15:26:56 +01:00
Sarah Hoffmann
db3ced17bb rename lib to lib-php 2021-02-09 11:52:07 +01:00
Sarah Hoffmann
248b4cddab update osm2pgsql (disable install rule) 2021-02-09 09:48:50 +01:00
Sarah Hoffmann
d81e152804 integrate analyse of indexing into nominatim tool 2021-02-08 22:22:49 +01:00
Sarah Hoffmann
0cbf98c020 consolidate warm and db-check into single admin command 2021-02-08 21:05:06 +01:00
Sarah Hoffmann
195f9f5ef3 split cli.py by subcommands
Reduces file size below 1000 lines.
2021-02-08 17:23:05 +01:00
Sarah Hoffmann
a759c5b75b move website into php library directory 2021-02-08 12:00:34 +01:00
Sarah Hoffmann
7dfe645b5f move postcode table setup to sql/
Also moves the call to the setup from the setup-db
step to the calculate-postcodes step. The tables also need
no longer be accessible by the webservice.
2021-02-08 11:53:01 +01:00
Sarah Hoffmann
ca3283cbaa remove unused SQL script 2021-02-08 11:28:24 +01:00
Sarah Hoffmann
861e67dfe8 fix off-by-one error in replication download 2021-02-04 17:04:04 +01:00
Sarah Hoffmann
82ef02cd1a Merge pull request #2161 from lonvia/timeout-for-replication
Reintroduce timeout for replication file download
2021-02-04 16:52:24 +01:00
Sarah Hoffmann
948217d5e9 reintroduce timeout for replication file download
This ports the --socket-timeout parameter from
pyosmium-get-changes which ensures that the update
process eventually times out on hanging network connections.
2021-02-04 11:47:11 +01:00
Sarah Hoffmann
6cc06828db Merge pull request #2160 from lonvia/introduce-project-dir
Officially introduce and recommend use of a project directory
2021-02-04 09:52:59 +01:00
Sarah Hoffmann
0b2abfb115 replace make serve with nominatim serve command
With the website directory now tied to the project directory instead
of the build directory, it is no longer possible to use make for
running the web server.
2021-02-03 16:34:31 +01:00
Sarah Hoffmann
b2f8fb6201 add migration info for status table 2021-02-03 14:13:09 +01:00
Sarah Hoffmann
e2329c03fe Revert "increase splitting for large geometries"
This reverts commit 559fe513fa.

Increasing the splitting results in geometries where with rounding
issues at the split points, so that contain operations do not
work as expected anymore.

Fixes #2137.
2021-02-03 10:23:38 +01:00
Sarah Hoffmann
9bca670b4e adapt quick start instructions in README to project dir 2021-02-03 10:17:22 +01:00
Sarah Hoffmann
cb06d1f4ca do not overwrite custom set module paths
Given that the module is now copied to the project directory
when no module path is set, we need the information that the
module path is empty. Therefore hand in the default module path
in a separate variable.
2021-02-02 18:31:25 +01:00
Sarah Hoffmann
36447c488a print project directory before running any command 2021-02-02 11:19:31 +01:00
Sarah Hoffmann
69092030cd make phpcs happy 2021-02-02 11:15:56 +01:00
Sarah Hoffmann
109aa9c428 actions: switch to using separate project dir
Also fixes reverse-only import which not run at all.
2021-02-02 11:03:09 +01:00
Sarah Hoffmann
1d97816c53 docs: add hint about putting the nominatim tool into the PATH 2021-02-02 10:56:40 +01:00
Sarah Hoffmann
7591c4fb42 copy database module on install
When no explicity database module is configured, then the
module is now copied into the project directory and used from
there. This means that Nominatim can be updated to a new
version of the module while existing installation keep their
version of normalisation.
2021-02-02 10:56:40 +01:00
Sarah Hoffmann
60cbeb165e hand in absolute path to nominatim tool to php scripts 2021-02-02 10:56:40 +01:00
Sarah Hoffmann
bddfc109f8 refer to new nominatim tool in configuration comments 2021-02-02 10:56:40 +01:00
Sarah Hoffmann
b05c379b39 change the default location for external data to project dir 2021-02-02 10:56:40 +01:00
Sarah Hoffmann
7ba5283fe8 actions: revert to reletive paths for caching 2021-02-02 10:37:18 +01:00
Sarah Hoffmann
98fe5af07d actions: remove setting custom .env
It only set the pyosmium-get-changes binary which is no longer
needed.
2021-02-02 10:35:30 +01:00
Sarah Hoffmann
59cb1d6c27 remove pyosmium-get-changes detection from cmake
pyosmium-get-changes is not longer used.
2021-02-02 10:33:15 +01:00
Sarah Hoffmann
0ad1b28497 Merge pull request #2155 from lonvia/port-regresh-to-python
Port replication and part of the refrsh function to native Python
2021-02-01 11:50:05 +01:00
Sarah Hoffmann
5f63d4ca1f print nice summary after updates 2021-02-01 10:34:31 +01:00
Sarah Hoffmann
90aaab77fc fix linting issues 2021-01-30 16:42:25 +01:00
Sarah Hoffmann
7158433cd3 disable warning about non-toplevel import
They are needed here so nominatim can be run when osmium
is not installed. Everything except replication will work fine.
2021-01-30 16:29:28 +01:00
Sarah Hoffmann
e629a175ed introduce custom UsageError
This is a exception to be thrown when the error occures because
of bad user data. We don't want to print a full stack trace in
these cases but just tell the user what went wrong.
2021-01-30 16:20:10 +01:00
Sarah Hoffmann
45ea73913f remove setting for PYOSMIUM_BINARY
pyosmium is now called as a library from the python code,
so that pyosmium-get-changes is no longer needed.
2021-01-30 15:55:04 +01:00
Sarah Hoffmann
01e0fd7e13 whitelist pyosmium for pylint 2021-01-30 15:52:49 +01:00
Sarah Hoffmann
4cb6dc01f3 port replication update function to python 2021-01-30 15:50:34 +01:00
Sarah Hoffmann
8f0885f6cb port check-for-update function to python 2021-01-28 14:50:14 +01:00
Sarah Hoffmann
beb0fa0727 Merge pull request #2153 from rizkyarlin/patch-1
fix indentation
2021-01-28 09:06:33 +01:00
Muh. Rizky Eka Arlin
436cb9229b fix indentation 2021-01-28 14:21:54 +08:00
Sarah Hoffmann
d78f0ba804 port replication initialisation to Python 2021-01-26 22:50:54 +01:00
Sarah Hoffmann
5b46fcad8e convert functon creation to python
The new functions always creates normal and partitioned functions.
Also adds specialised connection and cursor classes for adding
frequently used helper functions.
2021-01-26 22:50:54 +01:00
Sarah Hoffmann
94fa7162be port address level computation to Python
Also adds simple tests for correct table creation.
2021-01-26 22:50:54 +01:00
Sarah Hoffmann
e6c2842b66 move update code for postcode and word count to Python
Adds also tests for the new function to execute a SQL script.
2021-01-26 22:50:54 +01:00
Sarah Hoffmann
e6d9485c4a cli: import python modules for commands on demand
Given that only one command will be executed in the end, it is
not necessary to import what amounts to the whole library. This
becomes in particular important for update functions that have
a dependency on pyosmium. The dependency can remain optional for
people not using updates.
2021-01-26 22:50:54 +01:00
Sarah Hoffmann
30cd2f2280 remove API comparison util
This is outdated and unmaintained. There are tools out there
which can do this better. Try, for example
https://github.com/radarlabs/api-diff
2021-01-26 22:46:35 +01:00
Sarah Hoffmann
2c909c1f0c Merge pull request #2147 from lonvia/tests-for-python-code
Add basic set of tests for Python code
2021-01-21 10:07:50 +01:00
Sarah Hoffmann
063a4cb403 cli indexer tests need a fake database
The Indexer constructor opens a connection to the given database.
2021-01-20 21:30:27 +01:00
Sarah Hoffmann
42ec67f63c add more tests for CLI parameter parser 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
8c02786820 add tests for indexer 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
c26f323bf5 add simple tests for CLI parsing 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
041ae67fd9 optionally hand in command line arguments to CLI functions
Allows easier testing.
2021-01-20 21:30:27 +01:00
Sarah Hoffmann
bfa6580ad5 use pytest mocking functions for manipulating os.environ 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
52b76d1d01 add tests for Python exec_utils 2021-01-20 21:30:27 +01:00
Sarah Hoffmann
a3767f9142 Merge pull request #2146 from mtmail/two-typos
correct parameter name in query CLI
2021-01-20 21:29:49 +01:00
marc tobias
f62c784102 correct parameter name in query CLI 2021-01-20 21:09:41 +01:00
Sarah Hoffmann
ffc221a87f Merge pull request #2145 from lonvia/cli-query-functions
Add interface to search via command line tool
2021-01-20 09:00:45 +01:00
Sarah Hoffmann
8cf54a1317 add API functions to nominatim tool 2021-01-19 19:38:46 +01:00
Sarah Hoffmann
77e287f669 rename nominatim.admin to nominatim.tools 2021-01-19 19:38:46 +01:00
Sarah Hoffmann
5d95a72758 probe for php_cgi in cmake to be used for querying 2021-01-19 19:38:46 +01:00
Sarah Hoffmann
3475e1dfd6 Merge pull request #2143 from lonvia/integrate-indexer-into-nominatim-tool
Integrate indexer into nominatim tool
2021-01-19 08:42:22 +01:00
Sarah Hoffmann
504922ffbe remove old nominatim.py in favour of 'nominatim index'
The PHP scripts need to know the position of the nominatim
tool in order to call it. This is handed in as environment
variable, so it can be set by the Python script.
2021-01-18 15:43:27 +01:00
Sarah Hoffmann
c77877a934 implementaion of 'nominatim index' 2021-01-18 15:43:27 +01:00
Sarah Hoffmann
27977411e9 move indexing function into its own Python module
This makes it mow a standard function of our new Python
library instead of a stand-alone program.
2021-01-18 15:43:27 +01:00
Sarah Hoffmann
b79c79fa73 add function to get a DSN for psycopg
Converts the PHP DSN syntax into psycopg syntax when necessary.
2021-01-18 15:43:27 +01:00
Sarah Hoffmann
cd0001b55a Merge pull request #2142 from lonvia/update-bdd-api-tests
Update BDD API tests
2021-01-18 15:40:50 +01:00
Sarah Hoffmann
340e7f7210 bdd: complete coverage for API tests
Also removes some functions that are no longer used and
fixes debug output where the tests found an issue.
2021-01-17 16:12:06 +01:00
Sarah Hoffmann
f9c43137c9 remove unused output formatting functions 2021-01-16 17:39:49 +01:00
Sarah Hoffmann
171ed36e36 bdd: remove duplicated tests 2021-01-16 16:57:28 +01:00
Sarah Hoffmann
c6c907d451 bdd: clean up and extend API tests for details
- remove duplicates created by replacing HTML tests
  with JSON tests
- add tests for newer functions for returning geometries
  and hierarchies
2021-01-16 12:04:13 +01:00
Sarah Hoffmann
19ab038724 collect coverage for /website directory as well 2021-01-15 20:27:14 +01:00
Sarah Hoffmann
1c26fd489d Merge pull request #2139 from lonvia/add-pytest
Introduce unit testing for Python code
2021-01-15 17:37:36 +01:00
Sarah Hoffmann
e8cfba1b10 pytest may also be installed as py-test[-3] 2021-01-15 17:22:31 +01:00
Sarah Hoffmann
496a3d29db enable pytest testing in CI 2021-01-15 15:33:53 +01:00
Sarah Hoffmann
438ed431dd add documentation for new pytest tests 2021-01-15 15:18:45 +01:00
Sarah Hoffmann
f1f0032758 add pytest as a test goal in cmake 2021-01-15 15:09:36 +01:00
Sarah Hoffmann
eb3b789855 add initial pytest test for Configuration 2021-01-15 14:42:03 +01:00
Sarah Hoffmann
c077050855 Merge pull request #2136 from lonvia/introduce-pylint
Introduce pylint for code style checking for Python.
2021-01-15 14:39:26 +01:00
Sarah Hoffmann
d9998bfab3 pylint may be available as pylint3 or pylint 2021-01-15 10:52:25 +01:00
Sarah Hoffmann
7cf9d459d6 use check parameter of subprocess.run
...instead of checking on our own.

Also increase required version of Python to 3.5 because of
subprocess.run().
2021-01-15 10:43:04 +01:00
Sarah Hoffmann
de724aa576 add pylint to list of required linting tools
With pylint being run in the CI, passing it is required now.
2021-01-15 10:43:04 +01:00
Sarah Hoffmann
8e53f63036 fix errors reported by pylint 2021-01-15 08:57:00 +01:00
Sarah Hoffmann
565356613a Merge pull request #2135 from lonvia/python-frontend
Introduce new 'nominatim' all-in-one command-line tool
2021-01-15 08:56:07 +01:00
Sarah Hoffmann
eda0900c8e fix typo 2021-01-14 20:30:27 +01:00
Sarah Hoffmann
3dd67083b2 replace Symfony dotenv dependency with Python dotenv 2021-01-14 18:31:18 +01:00
Sarah Hoffmann
2f73bb3643 bdd: directly call utility scripts in lib
This removes the dependency on php-symfony-dotenv for the tests.
2021-01-14 18:19:22 +01:00
Sarah Hoffmann
9348fc5e15 move dotenv parsing to installed PHP scripts
This means that the php-symfony-dotenv library is now only needed
when using the legacy scripts. This includes the BDD tests which
currently still rely on the PHP utils.
2021-01-14 18:06:22 +01:00
Sarah Hoffmann
97710ee9d1 use cli tool for github CI 2021-01-14 16:35:01 +01:00
Sarah Hoffmann
9619cb3fe5 forward cli tool return value as exit code 2021-01-14 14:36:41 +01:00
Sarah Hoffmann
1c1e951826 adapt documentation to new nominatim cli tool 2021-01-14 12:12:38 +01:00
Sarah Hoffmann
88c57b4dc8 maller command execution fixes 2021-01-14 12:03:49 +01:00
Sarah Hoffmann
ba13cfd9ff make sure that environment variables have highest prio 2021-01-14 11:12:45 +01:00
Sarah Hoffmann
1ff8751caa liniting of new python code 2021-01-14 10:19:21 +01:00
Sarah Hoffmann
98dbc84836 add wrapper calls for all nominatim tool functions 2021-01-14 09:37:47 +01:00
Sarah Hoffmann
0847964a27 avoid accessing constants when getting data from env
When a setting can be read from the environment variable, avoid
accessing the internal defaults. This way the scripts can be
accessed directly in \lib as long as the environment is set up
correctly with full defaults.
2021-01-14 09:37:04 +01:00
Sarah Hoffmann
bc09d7aedb fix access to environment variable 2021-01-14 09:29:43 +01:00
Sarah Hoffmann
04690ad8c4 implement warming in new cli tool
Adds infrastructure for calling the legacy PHP scripts. As the
CONST_* values cannot be set from the python script, hand the values
in via secret environment variables instead. These are all
temporary hacks for the transition phase to python code.
2021-01-13 18:25:15 +01:00
Sarah Hoffmann
ec636111ba warm.php needs constant setup for queries
Warming is done using the query classes and therefore the same
copy-over from dotenv settings to CONST_* parameters is needed
as for query.php.
2021-01-13 18:12:53 +01:00
Sarah Hoffmann
e467b956ff set CONST_LibDir directly from the source scripts
Now that the source scripts have been moved to \lib, they
can determine the position of the PHP library relative to
themselves.
2021-01-13 17:00:38 +01:00
Sarah Hoffmann
ff5a237200 move PHP utilities into the lib directory
These are not called directly as programs but used in a library
fashion by the installed utilities. So the library directory
is a better place.
2021-01-13 14:44:45 +01:00
Sarah Hoffmann
d6bcb7c8b7 consolidate cli interface to single tool 2021-01-13 10:11:58 +01:00
Sarah Hoffmann
57f5e6d898 create skeleton for new CLI tools 2021-01-12 22:21:20 +01:00
Sarah Hoffmann
612fd50612 add skeleton for new Nominatim executables 2021-01-12 10:17:28 +01:00
Sarah Hoffmann
a74e736283 Merge pull request #2132 from lonvia/reduce-api-testdb
Reduce BDD API test database to Liechtenstein
2021-01-11 10:42:22 +01:00
Sarah Hoffmann
86cd5ddd65 also run BDD API tests in CI 2021-01-09 17:58:06 +01:00
Sarah Hoffmann
812de0545d test can be run all in one go with make 2021-01-09 17:57:30 +01:00
Sarah Hoffmann
3bed5516da update documentation for new BDD API tests 2021-01-09 17:54:45 +01:00
Sarah Hoffmann
0495dbe756 bdd: add new API test data
Make all data necessary for API tests directly available in the
repository.
2021-01-09 17:01:33 +01:00
Sarah Hoffmann
5d656891ba bdd: convert API tests to smaller test db
Changes BDD API tests to restrict themselves to
Liechtenstein. One test moved to DB as no appropriate
data is available.
2021-01-09 16:59:46 +01:00
Sarah Hoffmann
74122dc965 bdd: improve assert output for API query checks
Adds wrapper function for checking address parts and
more explanation strings to asserts.
2021-01-09 16:58:37 +01:00
Sarah Hoffmann
ee18a511c6 bdd: import API test DB as part of step setup
In the future, the BDD tests will simply set up the required
test database themselves. Like with the template database, it
is not reimported when it already exists unless that is explicitly
forced.

Makes most of the API tests currently fail because they still
point to old test data.
2021-01-07 11:51:38 +01:00
Sarah Hoffmann
da20881096 Merge pull request #2129 from lonvia/cleanup-bdd-tests
Clean up Python support code for BDD tests
2021-01-07 09:10:40 +01:00
Sarah Hoffmann
aaabb46f20 add symphony dotenv to prerequisites list 2021-01-07 08:56:52 +01:00
Sarah Hoffmann
49142eb6e5 use relative dir for sources for phpunit 2021-01-07 08:55:15 +01:00
Sarah Hoffmann
73cbb6eb9a bdd: clean up DB ops steps
Adds comments and modernizes code.
2021-01-06 16:37:32 +01:00
Sarah Hoffmann
1f29475fa5 bdd: move column comparison in separate file
Introduces a new class DBRow that encapsulates the comparison
functions. This also is responsible for formatting more informative
assert messages. place and placex steps are unified.
2021-01-06 12:28:09 +01:00
Sarah Hoffmann
d586b95ff1 bdd: move nominitim id reader to separate file 2021-01-05 16:00:48 +01:00
Sarah Hoffmann
25557e5f14 bdd: factor out reindexing on updates 2021-01-05 15:17:46 +01:00
Sarah Hoffmann
197870e67a bdd: move place table inserter into separate file
Also simplifies usage by implementing a function that inserts
a complete table row.
2021-01-05 12:12:59 +01:00
Sarah Hoffmann
b8e39d2dde bdd: move scene setup to OSM data steps
The step has nothing to do with the database.
2021-01-05 11:42:28 +01:00
Sarah Hoffmann
5dfa76a610 bdd: switch to auto commit mode
Put the connection to the test database into auto-commit mode
and get rid of the explicit commits. Also use cursors always in
context managers and unify the two implementations that copy
data from the place table.
2021-01-05 11:42:28 +01:00
Sarah Hoffmann
58c471c627 bdd: remove class for lazy formatting
assert in combination with format() does the right thing and calls
the __str__() method only when an assertion hits.
2021-01-05 10:39:44 +01:00
Sarah Hoffmann
213bf7d19d bdd: rename db_ops steps
Now all files implementing steps are called steps_*.py.
2021-01-05 10:20:00 +01:00
Sarah Hoffmann
12ae8a4ed3 bdd: move output format computation into response 2021-01-05 10:17:59 +01:00
Sarah Hoffmann
8a93f8ed94 bdd: move Response classes in own file and simplify
Removes most of the duplicated parse functions, introduces
a common assert_field function with a more expressive error
message.
2021-01-05 10:03:47 +01:00
Sarah Hoffmann
2712c5f90e bdd: rename and clean up osm_data steps
Move common OPL creation code into a function and remove
unused imports.
2021-01-04 20:17:17 +01:00
Sarah Hoffmann
72587b08fa bdd: move external process execution in separate func 2021-01-04 19:58:59 +01:00
Sarah Hoffmann
faa85ded50 bdd: move NominatimEnvironment into separate file
Also cleans up and modernizes the code and adds documentation.
2021-01-04 17:54:51 +01:00
Sarah Hoffmann
14e5bc7a17 bdd: move grid generation code into geometry factory 2021-01-04 17:04:47 +01:00
Sarah Hoffmann
f727620859 bdd: move geoemtry creation into separate file
Also renames the OsmDataFactory in the more appropriate
GeometryFactory and modernizes code for python3.
2021-01-04 16:34:40 +01:00
Sarah Hoffmann
843d3a137c remove stale code for python2 2021-01-04 14:14:34 +01:00
Sarah Hoffmann
e4691005e2 Merge pull request #2125 from lonvia/independent-project-directory
Allow for truely independent project directory
2021-01-04 14:10:24 +01:00
Sarah Hoffmann
4aba70caee create a temporary project dir for tests
The project directory contains the website script as
configured through the test configuration. This means
that tests are now completely independet of any
configuration that may be contained in the build
directory.

Also removes the hack to inject additional settings via
a environment variable.
2021-01-04 11:39:45 +01:00
Sarah Hoffmann
5e989b9296 configure osm2pgsql and module location via cmake
The default location of osm2pgsql and the postgresql module
is decided at compile/installation time and is not necessarily
in the project directory.

With this change it is now possible to have a project directory
that is completely separate from the build directory.
2021-01-04 11:37:56 +01:00
Sarah Hoffmann
cba2d252c8 Merge pull request #2124 from lonvia/remove-nose
Remove nose dependency for tests
2021-01-03 21:04:59 +01:00
Sarah Hoffmann
2ecec19df0 remove nose requirement from documentation 2021-01-03 17:23:44 +01:00
Sarah Hoffmann
4ca7197826 replace nose assertions with simple asserts 2021-01-03 17:21:24 +01:00
Sarah Hoffmann
a8ec250993 Merge pull request #2119 from mtmail/check-import-finished-when-tables-droped
utils/check_import_finished: skip some checks when setup ran with --drop
2020-12-22 15:57:48 +01:00
Sarah Hoffmann
f3e0e401fd Merge pull request #2118 from mtmail/vagrant-ubuntu-dotenv
Vagrant ubuntu: install dotenv package
2020-12-22 15:54:48 +01:00
marc tobias
d60f89867b utils/check_import_finished: skip some checks when setup ran with --drop 2020-12-21 20:12:31 +01:00
marc tobias
b133f2bc4c Vagrant ubuntu: install dotenv package 2020-12-21 20:10:13 +01:00
Sarah Hoffmann
301fd7f7e8 Merge pull request #2115 from lonvia/use-dotenv
Switch configuration to dotenv
2020-12-21 11:33:38 +01:00
Sarah Hoffmann
45148c7078 switch documentation to describing dotenv 2020-12-20 12:09:27 +01:00
Sarah Hoffmann
3c75194448 adapt instructions for creating the test db to dotenv 2020-12-20 11:53:19 +01:00
Sarah Hoffmann
f218e20522 mark CentOS installation instructions as broken
Getting symfony-dotenv installed on CentOS is a major pain,
so just mark it broken instead.

Still sSwitch the config format to dotenv already.
2020-12-20 11:35:29 +01:00
Sarah Hoffmann
33b038ce6f tests: always create the config file
There is also one database test that uses the API functions.
2020-12-19 17:55:46 +01:00
Sarah Hoffmann
f62c65e9d9 adapt php tests to new directory constants 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
867baab3d1 make phpcs happy 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
63ad0cb498 github actions: need dotenv 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
433017b990 move creation of website scripts to setup script
Instead of creating the website wrapper scripts with cmake,
they are now created when --setup-website is called. The
setup of the configuration constants is directly embedded
into the scripts. This means we can get rid of the separate
settings-frontend.php. More importantly however, it means
that it is now possible to set up multiple website directories
from the same build directory.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
d97aed8741 adapt tests to new dotenv environment
DB tests now can simply set the environment to change configuration
variables. API tests still rely on a configuration file.

Also, query.php needs to set up the CONST_* variables to work with
the query scripts. That is a tiny bit messy and duplicates code
but this part will need to be reworked later.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
06d89e1d47 fix various typos 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
8676e45d88 remove old default settings 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
992d3faac8 switch all utils to initialising dotenv 2020-12-19 14:33:04 +01:00
Sarah Hoffmann
0947b61808 switch remaining settings to dotenv format
CONST_Search_AreaPolygons and CONST_Search_ReversePlanForAll have
been removed completely.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
d43f30903c use explicit DSN for website scripts
Website scripts have no access to the dotenv variables, so use
the DSN constant instead when connecting to the database.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
15a1666f8a replace database settings with dotenv variant
As we can't refer to the project root dir in the module path, the
module path may now also be a relative directory which is then
taken as being relative to the project root path.

Moves the checkModulePresence() function into the Setup class, so
that it can work on the computed absolute module path.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
25bdd7c6d9 introduce dotenv parsing for setup.php
This adds the notion of a project directory. This is the directory
that holds all necessary files for one specific installation of
Nominatim. Dotenv looks for an .env file in this directory and
adds it to the global environment together with the defaults from
Nominatim's data directory.

Add's symfony's dotenv library as a new dependency.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
ac116980ac make HTTP proxy setup explicit
The setup relies on the project configuration which we want to
explicitly set up in later steps. Therefore proxy setup needs to
be done explicitly as well. There is the added bonus that the
setup is done only for the utils which try to call outside.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
b5480f6e36 reorganise path settings in config
CONST_BasePath is split into separate configuration variables
for binaries, libraries and data. These variables as well as
the installation path are now set in the executable directly and
no longer configurable via project settings.

This is the first step towards an installable software. The
executables should know per installation where to find their
necessary data to execute. Project configuration needs to be
restricted to settings that really concern the specific Nominatim
installation.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
17a8cc5e29 use /usr/bin/env for python script
Makes it easier to use the script with a virtualenv setup.
2020-12-19 14:33:04 +01:00
Sarah Hoffmann
aeeee0d5da Merge pull request #2112 from lonvia/fix-tests-for-php-8
work around failing CI tests
2020-12-18 14:25:50 +01:00
Sarah Hoffmann
de03a0f924 work around failing CI tests
Force use of phpunit7 to avoid an issue with different sort order.
2020-12-18 10:58:09 +01:00
Sarah Hoffmann
5528918d5d Issue templates: require postgres config modifications 2020-12-18 10:37:01 +01:00
Sarah Hoffmann
9e0d5cb669 Issue templates: more commenting of instructions 2020-12-16 08:41:20 +01:00
Sarah Hoffmann
11622b2863 Issue templates: put instructions into comments 2020-12-16 08:38:04 +01:00
821 changed files with 102991 additions and 15720 deletions

2
.github/FUNDING.yml vendored Normal file
View File

@@ -0,0 +1,2 @@
github: lonvia
custom: "https://nominatim.org/funding/"

View File

@@ -7,16 +7,16 @@ assignees: ''
---
Before opening a new feature request, please search through the open issue to check that your request hasn't been reported already.
<!-- Before opening a new feature request, please search through the open issue to check that your request hasn't been reported already. -->
**Is your feature request related to a problem? Please describe.**
A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
**Describe the solution you'd like**
A clear and concise description of what you want to happen.
<!-- A clear and concise description of what you want to happen. -->
**Describe alternatives you've considered**
A clear and concise description of any alternative solutions or features you've considered.
<!-- A clear and concise description of any alternative solutions or features you've considered. -->
**Additional context**
Add any other context or screenshots about the feature request here.
<!-- Add any other context or screenshots about the feature request here. -->

View File

@@ -7,21 +7,23 @@ assignees: ''
---
<!-- Note: this template is for reporting problems with searching. If you have found an issue with the data, you need to report/fix the issue directly in OpenStreetMap. See https://www.openstreetmap.org/fixthemap for details. -->
## What did you search for?
Please try to provide a link to your search. You can go to https://nominatim.openstreetmap.org and repeat your search there. If you originally found the issue somewhere else, please tell us what software/website you were using.
<!-- Please try to provide a link to your search. You can go to https://nominatim.openstreetmap.org and repeat your search there. If you originally found the issue somewhere else, please tell us what software/website you were using. -->
## What result did you get?
## What result did you expect?
**Is the result in the right place and just named wrongly?**
**When the result in the right place and just named wrongly:**
Please tell us the display name you expected.
<!-- Please tell us the display name you expected. -->
**Is the result missing completely?**
**When the result missing completely:**
Make sure that the data you are looking for is in OpenStreetMap. Provide a link to the OpenStreetMap object or if you cannot get it, a link to the map on https://openstreetmap.org where you expect the result to be.
<!-- Make sure that the data you are looking for is in OpenStreetMap. Provide a link to the OpenStreetMap object or if you cannot get it, a link to the map on https://openstreetmap.org where you expect the result to be.
To get the link to the OSM object, you can try the following:
@@ -30,7 +32,8 @@ To get the link to the OSM object, you can try the following:
* Click on the question mark on the right side of the map. You get a question cursor. Use it to click on the map where your object is located.
* Find the object of interest in the list that appears on the left side.
* Click on the object and report back the URL that the browser shows.
-->
## Further details
Anything else we should know about the search. Particularities with addresses in the area etc.
<!-- Anything else we should know about the search. Particularities with addresses in the area etc. -->

View File

@@ -7,13 +7,16 @@ assignees: ''
---
___Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first.___
<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first.
Do not send screen shots! Copy any console output directly into the issue.
-->
**Describe the bug**
A clear and concise description of what the bug is.
<!-- A clear and concise description of what the bug is.-->
**To Reproduce**
Please describe what you did to get to the issue.
<!-- Please describe what you did to get to the issue. -->
**Software Environment (please complete the following information):**
- Nominatim version:
@@ -25,7 +28,15 @@ Please describe what you did to get to the issue.
- RAM:
- number of CPUs:
- type and size of disks:
- bare metal/AWS/other cloud service:
**Postgresql Configuration:**
<!-- List any configuration items you changed in your postgresql configuration. -->
**Nominatim Configuration:**
<!-- List the contents of your customized `.env` file. -->
**Additional context**
Add any other context about the problem here.
<!-- Add any other context about the problem here. -->

View File

@@ -1,28 +1,52 @@
name: 'Build Nominatim'
inputs:
flavour:
description: 'Version of Ubuntu to install on'
required: false
default: 'ubuntu-20'
cmake-args:
description: 'Additional options to hand to cmake'
required: false
default: ''
lua:
description: 'Version of Lua to use'
required: false
default: '5.3'
runs:
using: "composite"
steps:
- name: Install prerequisits
run: sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium
- name: Clean out the disk
run: |
sudo rm -rf /opt/hostedtoolcache/go /opt/hostedtoolcache/CodeQL /usr/lib/jvm /usr/local/share/chromium /usr/local/lib/android
df -h
shell: bash
- name: Install${{ matrix.flavour }} prerequisites
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
pip3 install sqlalchemy psycopg
fi
shell: bash
env:
FLAVOUR: ${{ inputs.flavour }}
CMAKE_ARGS: ${{ inputs.cmake-args }}
LUA_VERSION: ${{ inputs.lua }}
- name: Configure
run: mkdir build && cd build && cmake ..
run: mkdir build && cd build && cmake $CMAKE_ARGS ../Nominatim
shell: bash
env:
CMAKE_ARGS: ${{ inputs.cmake-args }}
- name: Build
run: |
make -j2 all
./utils/setup.php --setup-website
sudo make install
shell: bash
working-directory: build
- name: Download dependencies
run: |
if [ ! -f data/country_osm_grid.sql.gz ]; then
wget --no-verbose -O data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
fi
shell: bash

View File

@@ -14,13 +14,17 @@ runs:
steps:
- name: Remove existing PostgreSQL
run: |
sudo apt-get update -qq
sudo apt-get purge -yq postgresql*
sudo apt install curl ca-certificates gnupg
curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/apt.postgresql.org.gpg >/dev/null
sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
sudo apt-get update -qq
shell: bash
- name: Install PostgreSQL
run: |
sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER} postgresql-server-dev-${PGVER}
sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER}
shell: bash
env:
PGVER: ${{ inputs.postgresql-version }}

View File

@@ -3,118 +3,346 @@ name: CI Tests
on: [ push, pull_request ]
jobs:
tests:
runs-on: ubuntu-20.04
strategy:
matrix:
postgresql: [9.5, 13]
include:
- postgresql: 9.5
postgis: 2.5
- postgresql: 13
postgis: 3
create-archive:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
submodules: true
submodules: true
- name: Get Date
id: get-date
run: |
echo "::set-output name=date::$(/bin/date -u "+%Y%W")"
shell: bash
- uses: actions/cache@v2
- uses: actions/cache@v3
with:
path: |
data/country_osm_grid.sql.gz
monaco-latest.osm.pbf
key: nominatim-data-${{ steps.get-date.outputs.date }}
data/country_osm_grid.sql.gz
key: nominatim-country-data-1
- uses: ./.github/actions/setup-postgresql
- name: Package tarball
run: |
if [ ! -f data/country_osm_grid.sql.gz ]; then
wget --no-verbose -O data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
fi
cd ..
tar czf nominatim-src.tar.bz2 Nominatim
mv nominatim-src.tar.bz2 Nominatim
- name: 'Upload Artifact'
uses: actions/upload-artifact@v3
with:
name: full-source
path: nominatim-src.tar.bz2
retention-days: 1
tests:
needs: create-archive
strategy:
matrix:
flavour: [oldstuff, "ubuntu-20", "ubuntu-22"]
include:
- flavour: oldstuff
ubuntu: 20
postgresql: '9.6'
postgis: '2.5'
php: '7.3'
lua: '5.1'
- flavour: ubuntu-20
ubuntu: 20
postgresql: 13
postgis: 3
php: '7.4'
lua: '5.3'
- flavour: ubuntu-22
ubuntu: 22
postgresql: 15
postgis: 3
php: '8.1'
lua: '5.3'
runs-on: ubuntu-${{ matrix.ubuntu }}.04
steps:
- uses: actions/download-artifact@v3
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
tools: phpunit:9, phpcs, composer
ini-values: opcache.jit=disable
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/setup-python@v4
with:
python-version: 3.7
if: matrix.flavour == 'oldstuff'
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: ${{ matrix.postgresql }}
postgis-version: ${{ matrix.postgis }}
- uses: ./.github/actions/build-nominatim
- name: Install test prerequsites
run: |
sudo apt-get install -y -qq php-codesniffer python3-tidylib
sudo pip3 install behave nose
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: ${{ matrix.flavour }}
lua: ${{ matrix.lua }}
- name: Install test prerequsites (behave from apt)
run: sudo apt-get install -y -qq python3-behave
if: matrix.flavour == 'ubuntu-20'
- name: Install test prerequsites (behave from pip)
run: pip3 install behave==1.2.6
if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')
- name: Install test prerequsites (from apt for Ununtu 2x)
run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
if: matrix.flavour != 'oldstuff'
- name: Install newer pytest-asyncio
run: pip3 install -U pytest-asyncio
if: matrix.flavour == 'ubuntu-20'
- name: Install test prerequsites (from pip for Ubuntu 18)
run: pip3 install pytest pytest-asyncio uvicorn
if: matrix.flavour == 'oldstuff'
- name: Install Python webservers
run: pip3 install falcon starlette
- name: Install latest pylint
run: pip3 install -U pylint asgi_lifespan
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
- name: Python linting
run: python3 -m pylint nominatim
working-directory: Nominatim
- name: PHP unit tests
run: phpunit ./
working-directory: test/php
working-directory: Nominatim/test/php
- name: Python unit tests
run: python3 -m pytest test/python
working-directory: Nominatim
- name: BDD tests
run: behave -DREMOVE_TEMPLATE=1 --format=progress3 db osm2pgsql
working-directory: test/bdd
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
working-directory: Nominatim/test/bdd
import:
- name: Install mypy and typechecking info
run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions
if: matrix.flavour != 'oldstuff'
- name: Python static typechecking
run: python3 -m mypy --strict nominatim
working-directory: Nominatim
if: matrix.flavour != 'oldstuff'
legacy-test:
needs: create-archive
runs-on: ubuntu-20.04
steps:
- uses: actions/checkout@v2
- uses: actions/download-artifact@v3
with:
submodules: true
name: full-source
- name: Get Date
id: get-date
run: |
echo "::set-output name=date::$(/bin/date -u "+%Y%W")"
shell: bash
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: actions/cache@v2
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
path: |
data/country_osm_grid.sql.gz
monaco-latest.osm.pbf
key: nominatim-data-${{ steps.get-date.outputs.date }}
php-version: '7.4'
- uses: ./.github/actions/setup-postgresql
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: 13
postgis-version: 3
- uses: ./.github/actions/build-nominatim
- name: Create configuration
run: |
echo '<?php' > settings/local.php
echo " @define('CONST_Pyosmium_Binary', '/usr/lib/python3-pyosmium/pyosmium-get-changes');" >> settings/local.php
working-directory: build
- name: Install Postgresql server dev
run: sudo apt-get install postgresql-server-dev-13
- name: Download import data
- uses: ./Nominatim/.github/actions/build-nominatim
with:
cmake-args: -DBUILD_MODULE=on
- name: Install test prerequsites
run: sudo apt-get install -y -qq python3-behave
- name: BDD tests (legacy tokenizer)
run: |
if [ ! -f monaco-latest.osm.pbf ]; then
wget --no-verbose https://download.geofabrik.de/europe/monaco-latest.osm.pbf
fi
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy --format=progress3
working-directory: Nominatim/test/bdd
python-api-test:
needs: create-archive
runs-on: ubuntu-22.04
steps:
- uses: actions/download-artifact@v3
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: 15
postgis-version: 3
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: 'ubuntu-22'
- name: Install test prerequsites
run: sudo apt-get install -y -qq python3-behave
- name: Install Python webservers
run: pip3 install starlette asgi_lifespan httpx
- name: BDD tests (starlette)
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=starlette --format=progress3
working-directory: Nominatim/test/bdd
install:
runs-on: ubuntu-latest
needs: create-archive
strategy:
matrix:
name: [Ubuntu-20, Ubuntu-22]
include:
- name: Ubuntu-20
image: "ubuntu:20.04"
ubuntu: 20
install_mode: install-apache
- name: Ubuntu-22
image: "ubuntu:22.04"
ubuntu: 22
install_mode: install-apache
container:
image: ${{ matrix.image }}
env:
LANG: en_US.UTF-8
defaults:
run:
shell: sudo -Hu nominatim bash --noprofile --norc -eo pipefail {0}
steps:
- name: Prepare container (Ubuntu)
run: |
export APT_LISTCHANGES_FRONTEND=none
export DEBIAN_FRONTEND=noninteractive
apt-get update -qq
apt-get install -y git sudo wget
ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
shell: bash
- name: Setup import user
run: |
useradd -m nominatim
echo 'nominatim ALL=(ALL:ALL) NOPASSWD: ALL' > /etc/sudoers.d/nominiatim
echo "/home/nominatim/Nominatim/vagrant/Install-on-${OS}.sh no $INSTALL_MODE" > /home/nominatim/vagrant.sh
shell: bash
env:
OS: ${{ matrix.name }}
INSTALL_MODE: ${{ matrix.install_mode }}
- uses: actions/download-artifact@v3
with:
name: full-source
path: /home/nominatim
- name: Install Nominatim
run: |
export USERNAME=nominatim
export USERHOME=/home/nominatim
export NOSYSTEMD=yes
export HAVE_SELINUX=no
tar xf nominatim-src.tar.bz2
. vagrant.sh
working-directory: /home/nominatim
- name: Prepare import environment
run: |
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
mv Nominatim/settings/flex-base.lua flex-base.lua
mv Nominatim/settings/import-extratags.lua import-extratags.lua
mv Nominatim/settings/taginfo.lua taginfo.lua
rm -rf Nominatim
mkdir data-env-reverse
working-directory: /home/nominatim
- name: Print version
run: nominatim --version
working-directory: /home/nominatim/nominatim-project
- name: Print taginfo
run: lua taginfo.lua
working-directory: /home/nominatim
- name: Collect host OS information
run: nominatim admin --collect-os-info
working-directory: /home/nominatim/nominatim-project
- name: Import
run: php ./utils/setup.php --osm-file ../monaco-latest.osm.pbf --osm2pgsql-cache 500 --all
working-directory: build
run: nominatim import --osm-file ../test.pbf
working-directory: /home/nominatim/nominatim-project
- name: Import special phrases
run: php ./utils/specialphrases.php --wiki-import | psql -d nominatim
working-directory: build
run: nominatim special-phrases --import-from-wiki
working-directory: /home/nominatim/nominatim-project
- name: Check import
run: php ./utils/check_import_finished.php
working-directory: build
- name: Check full import
run: nominatim admin --check-database
working-directory: /home/nominatim/nominatim-project
- name: Warm up database
run: nominatim admin --warm
working-directory: /home/nominatim/nominatim-project
- name: Prepare update (Ubuntu)
run: apt-get install -y python3-pip
shell: bash
- name: Run update
run: |
php ./utils/update.php --init-updates
php ./utils/update.php --import-osmosis
working-directory: build
pip3 install --user osmium
nominatim replication --init
NOMINATIM_REPLICATION_MAX_DIFF=1 nominatim replication --once
working-directory: /home/nominatim/nominatim-project
- name: Clean up database
run: nominatim refresh --postcodes --word-tokens
working-directory: /home/nominatim/nominatim-project
- name: Run reverse-only import
run : |
dropdb nominatim
php ./utils/setup.php --osm-file ../monaco-latest.osm.pbf --reverse-only --all
working-directory: build
echo 'NOMINATIM_DATABASE_DSN="pgsql:dbname=reverse"' >> .env
nominatim import --osm-file ../test.pbf --reverse-only --no-updates
working-directory: /home/nominatim/data-env-reverse
- name: Check reverse-only import
run: nominatim admin --check-database
working-directory: /home/nominatim/data-env-reverse
- name: Clean up database (reverse-only import)
run: nominatim refresh --postcodes --word-tokens
working-directory: /home/nominatim/nominatim-project

8
.gitignore vendored
View File

@@ -1,11 +1,9 @@
*.log
*.pyc
build
settings/local.php
docs/develop/*.png
data/wiki_import.sql
data/wiki_specialphrases.sql
data/osmosischange.osc
build
.vagrant
data/country_osm_grid.sql.gz

23
.mypy.ini Normal file
View File

@@ -0,0 +1,23 @@
[mypy]
plugins = sqlalchemy.ext.mypy.plugin
[mypy-sanic_cors.*]
ignore_missing_imports = True
[mypy-icu.*]
ignore_missing_imports = True
[mypy-asyncpg.*]
ignore_missing_imports = True
[mypy-datrie.*]
ignore_missing_imports = True
[mypy-dotenv.*]
ignore_missing_imports = True
[mypy-falcon.*]
ignore_missing_imports = True
[mypy-geoalchemy2.*]
ignore_missing_imports = True

18
.pylintrc Normal file
View File

@@ -0,0 +1,18 @@
[MASTER]
extension-pkg-whitelist=osmium,falcon
ignored-modules=icu,datrie
[MESSAGES CONTROL]
[TYPECHECK]
# closing added here because it sometimes triggers a false positive with
# 'with' statements.
ignored-classes=NominatimArgs,closing
# 'too-many-ancestors' is triggered already by deriving from UserDict
# 'not-context-manager' disabled because it causes false positives once
# typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager,use-dict-literal,chained-comparison,attribute-defined-outside-init
good-names=i,j,x,y,m,t,fd,db,cc,x1,x2,y1,y2,pt,k,v,nr

16
AUTHORS
View File

@@ -1,15 +1,15 @@
Nominatim was written by:
Brian Quinion
Sarah Hoffmann
Marc Tobias Metten
* Brian Quinion
* Sarah Hoffmann
* Marc Tobias Metten
markigail
gemo1011
IrlJidel
Frederik Ramm
* markigail
* AntoJvlt
* gemo1011
* darkshredder
and many more.
For a full list of contributors see
For a full list of contributors see the Git logs or visit
https://github.com/openstreetmap/Nominatim/graphs/contributors

View File

@@ -6,7 +6,7 @@
#
#-----------------------------------------------------------------------------
cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
@@ -18,14 +18,25 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 3)
set(NOMINATIM_VERSION_MINOR 6)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION_MAJOR 4)
set(NOMINATIM_VERSION_MINOR 3)
set(NOMINATIM_VERSION_PATCH 1)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")
# Setting GIT_HASH
find_package(Git)
if (GIT_FOUND)
execute_process(
COMMAND "${GIT_EXECUTABLE}" log -1 --format=%h
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
OUTPUT_VARIABLE GIT_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
endif()
#-----------------------------------------------------------------------------
# Configuration
@@ -33,10 +44,12 @@ add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")
set(BUILD_IMPORTER on CACHE BOOL "Build everything for importing/updating the database")
set(BUILD_API on CACHE BOOL "Build everything for the API server")
set(BUILD_MODULE on CACHE BOOL "Build PostgreSQL module")
set(BUILD_MODULE off CACHE BOOL "Build PostgreSQL module for legacy tokenizer")
set(BUILD_TESTS on CACHE BOOL "Build test suite")
set(BUILD_DOCS on CACHE BOOL "Build documentation")
set(BUILD_MANPAGE on CACHE BOOL "Build Manual Page")
set(BUILD_OSM2PGSQL on CACHE BOOL "Build osm2pgsql (expert only)")
set(INSTALL_MUNIN_PLUGINS on CACHE BOOL "Install Munin plugins for supervising Nominatim")
#-----------------------------------------------------------------------------
# osm2pgsql (imports/updates only)
@@ -50,27 +63,17 @@ if (BUILD_IMPORTER AND BUILD_OSM2PGSQL)
endif()
set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
set(BUILD_TESTS off)
set(WITH_LUA off CACHE BOOL "")
add_subdirectory(osm2pgsql)
set(BUILD_TESTS ${BUILD_TESTS_SAVED})
endif()
#-----------------------------------------------------------------------------
# python and pyosmium (imports/updates only)
# python (imports/updates only)
#-----------------------------------------------------------------------------
if (BUILD_IMPORTER)
find_package(PythonInterp 3)
find_program(PYOSMIUM pyosmium-get-changes)
if (NOT EXISTS "${PYOSMIUM}")
set(PYOSMIUM_PATH "")
message(WARNING "pyosmium-get-changes not found (required for updates)")
else()
set(PYOSMIUM_PATH "${PYOSMIUM}")
message(STATUS "Using pyosmium-get-changes at ${PYOSMIUM_PATH}")
endif()
find_package(PythonInterp 3.7 REQUIRED)
endif()
#-----------------------------------------------------------------------------
@@ -86,8 +89,9 @@ if (BUILD_API OR BUILD_IMPORTER)
# sanity check if PHP binary exists
if (NOT EXISTS ${PHP_BIN})
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
else()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
#-----------------------------------------------------------------------------
@@ -95,70 +99,21 @@ endif()
#-----------------------------------------------------------------------------
if (BUILD_IMPORTER)
set(CUSTOMSCRIPTS
utils/check_import_finished.php
utils/country_languages.php
utils/importWikipedia.php
utils/export.php
utils/query.php
utils/setup.php
utils/specialphrases.php
utils/update.php
utils/warm.php
)
find_file(COUNTRY_GRID_FILE country_osm_grid.sql.gz
PATHS ${PROJECT_SOURCE_DIR}/data
NO_DEFAULT_PATH
DOC "Location of the country grid file."
)
foreach (script_source ${CUSTOMSCRIPTS})
configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl
${PROJECT_BINARY_DIR}/${script_source})
endforeach()
if (NOT COUNTRY_GRID_FILE)
message(FATAL_ERROR "\nYou need to download the country_osm_grid first:\n"
" wget -O ${PROJECT_SOURCE_DIR}/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz")
endif()
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl
${PROJECT_BINARY_DIR}/nominatim)
endif()
#-----------------------------------------------------------------------------
# webserver scripts (API only)
#-----------------------------------------------------------------------------
if (BUILD_API)
set(WEBSITESCRIPTS
website/deletable.php
website/details.php
website/lookup.php
website/polygons.php
website/reverse.php
website/search.php
website/status.php
)
foreach (script_source ${WEBSITESCRIPTS})
configure_file(${PROJECT_SOURCE_DIR}/cmake/website.tmpl
${PROJECT_BINARY_DIR}/${script_source})
endforeach()
set(WEBPATHS css images js)
foreach (wp ${WEBPATHS})
execute_process(
COMMAND ln -sf ${PROJECT_SOURCE_DIR}/website/${wp} ${PROJECT_BINARY_DIR}/website/
)
endforeach()
add_custom_target(serve
php -S 127.0.0.1:8088
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/website
)
add_custom_target(serve-global
php -S 0.0.0.0:8088
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}/website
)
endif()
#-----------------------------------------------------------------------------
# default settings
#-----------------------------------------------------------------------------
configure_file(${PROJECT_SOURCE_DIR}/settings/defaults.php
${PROJECT_BINARY_DIR}/settings/settings.php)
#-----------------------------------------------------------------------------
# Tests
#-----------------------------------------------------------------------------
@@ -168,21 +123,60 @@ if (BUILD_TESTS)
set(TEST_BDD db osm2pgsql api)
foreach (test ${TEST_BDD})
add_test(NAME bdd_${test}
COMMAND behave ${test}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/bdd)
set_tests_properties(bdd_${test}
PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
endforeach()
find_program(PYTHON_BEHAVE behave)
find_program(PYLINT NAMES pylint3 pylint)
find_program(PYTEST NAMES pytest py.test-3 py.test)
find_program(PHPCS phpcs)
find_program(PHPUNIT phpunit)
add_test(NAME php
COMMAND phpunit ./
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/php)
if (PYTHON_BEHAVE)
message(STATUS "Using Python behave binary ${PYTHON_BEHAVE}")
foreach (test ${TEST_BDD})
add_test(NAME bdd_${test}
COMMAND ${PYTHON_BEHAVE} ${test}
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/bdd)
set_tests_properties(bdd_${test}
PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
endforeach()
else()
message(WARNING "behave not found. BDD tests disabled." )
endif()
add_test(NAME phpcs
COMMAND phpcs --report-width=120 --colors lib website utils
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
if (PHPUNIT)
message(STATUS "Using phpunit binary ${PHPUNIT}")
add_test(NAME php
COMMAND ${PHPUNIT} ./
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/php)
else()
message(WARNING "phpunit not found. PHP unit tests disabled." )
endif()
if (PHPCS)
message(STATUS "Using phpcs binary ${PHPCS}")
add_test(NAME phpcs
COMMAND ${PHPCS} --report-width=120 --colors lib-php
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "phpcs not found. PHP linting tests disabled." )
endif()
if (PYLINT)
message(STATUS "Using pylint binary ${PYLINT}")
add_test(NAME pylint
COMMAND ${PYLINT} nominatim
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pylint not found. Python linting tests disabled.")
endif()
if (PYTEST)
message(STATUS "Using pytest binary ${PYTEST}")
add_test(NAME pytest
COMMAND ${PYTEST} test/python
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
else()
message(WARNING "pytest not found. Python tests disabled." )
endif()
endif()
#-----------------------------------------------------------------------------
@@ -200,3 +194,91 @@ endif()
if (BUILD_DOCS)
add_subdirectory(docs)
endif()
#-----------------------------------------------------------------------------
# Manual page
#-----------------------------------------------------------------------------
if (BUILD_MANPAGE)
add_subdirectory(man)
endif()
#-----------------------------------------------------------------------------
# Installation
#-----------------------------------------------------------------------------
include(GNUInstallDirs)
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
set(NOMINATIM_MUNINDIR ${CMAKE_INSTALL_FULL_DATADIR}/munin/plugins)
if (BUILD_IMPORTER)
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
install(PROGRAMS ${PROJECT_BINARY_DIR}/installed.bin
DESTINATION ${CMAKE_INSTALL_BINDIR}
RENAME nominatim)
install(DIRECTORY nominatim
DESTINATION ${NOMINATIM_LIBDIR}/lib-python
FILES_MATCHING PATTERN "*.py"
PATTERN "paths.py" EXCLUDE
PATTERN __pycache__ EXCLUDE)
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
install(FILES ${PROJECT_BINARY_DIR}/paths-py.installed
DESTINATION ${NOMINATIM_LIBDIR}/lib-python/nominatim
RENAME paths.py)
install(DIRECTORY lib-sql DESTINATION ${NOMINATIM_LIBDIR})
install(FILES ${COUNTRY_GRID_FILE}
data/words.sql
DESTINATION ${NOMINATIM_DATADIR})
endif()
if (BUILD_OSM2PGSQL)
if (${CMAKE_VERSION} VERSION_LESS 3.13)
# Installation of subdirectory targets was only introduced in 3.13.
# So just copy the osm2pgsql file for older versions.
install(PROGRAMS ${PROJECT_BINARY_DIR}/osm2pgsql/osm2pgsql
DESTINATION ${NOMINATIM_LIBDIR})
else()
install(TARGETS osm2pgsql RUNTIME DESTINATION ${NOMINATIM_LIBDIR})
endif()
endif()
if (BUILD_MODULE)
install(PROGRAMS ${PROJECT_BINARY_DIR}/module/nominatim.so
DESTINATION ${NOMINATIM_LIBDIR}/module)
endif()
if (BUILD_API)
install(DIRECTORY lib-php DESTINATION ${NOMINATIM_LIBDIR})
endif()
install(FILES settings/env.defaults
settings/address-levels.json
settings/phrase-settings.json
settings/import-admin.lua
settings/import-street.lua
settings/import-address.lua
settings/import-full.lua
settings/import-extratags.lua
settings/flex-base.lua
settings/icu_tokenizer.yaml
settings/country_settings.yaml
DESTINATION ${NOMINATIM_CONFIGDIR})
install(DIRECTORY settings/icu-rules
DESTINATION ${NOMINATIM_CONFIGDIR})
install(DIRECTORY settings/country-names
DESTINATION ${NOMINATIM_CONFIGDIR})
if (INSTALL_MUNIN_PLUGINS)
install(FILES munin/nominatim_importlag
munin/nominatim_query_speed
munin/nominatim_requests
DESTINATION ${NOMINATIM_MUNINDIR})
endif()

View File

@@ -36,7 +36,7 @@ Nominatim historically hasn't followed a particular coding style but we
are in process of consolidating the style. The following rules apply:
* Python code uses the official Python style
* indention
* indentation
* SQL use 2 spaces
* all other file types use 4 spaces
* [BSD style](https://en.wikipedia.org/wiki/Indent_style#Allman_style) for braces
@@ -49,22 +49,54 @@ are in process of consolidating the style. The following rules apply:
* for PHP variables use CamelCase with a prefixing letter indicating the type
(i - integer, f - float, a - array, s - string, o - object)
The coding style is enforced with PHPCS and can be tested with:
The coding style is enforced with PHPCS and pylint. It can be tested with:
```
phpcs --report-width=120 --colors .
phpcs --report-width=120 --colors .
pylint3 --extension-pkg-whitelist=osmium nominatim
```
## Testing
Before submitting a pull request make sure that the following tests pass:
Before submitting a pull request make sure that the tests pass:
```
cd test/bdd
behave -DBUILDDIR=<builddir> db osm2pgsql
cd build
make test
```
```
cd test/php
phpunit ./
```
## Releases
Nominatim follows semantic versioning. Major releases are done for large changes
that require (or at least strongly recommend) a reimport of the databases.
Minor releases can usually be applied to exisiting databases. Patch releases
contain bug fixes only and are released from a separate branch where the
relevant changes are cherry-picked from the master branch.
Checklist for releases:
* [ ] increase version in `nominatim/version.py` and CMakeLists.txt
* [ ] update `ChangeLog` (copy information from patch releases from release branch)
* [ ] complete `docs/admin/Migration.md`
* [ ] update EOL dates in `SECURITY.md`
* [ ] commit and make sure CI tests pass
* [ ] test migration
* download, build and import previous version
* migrate using master version
* run updates using master version
* [ ] prepare tarball:
* `git clone --recursive https://github.com/osm-search/Nominatim` (switch to right branch!)
* `rm -r .git* osm2pgsql/.git*`
* copy country data into `data/`
* add version to base directory and package
* [ ] upload tarball to https://nominatim.org
* [ ] prepare documentation
* check out new docs branch
* change git checkout instructions to tarball download instructions or adapt version on existing ones
* build documentation and copy to https://github.com/osm-search/nominatim-org-site
* add new version to history
* [ ] check release tarball
* download tarball as per new documentation instructions
* compile and import Nominatim
* run `nominatim --version` to confirm correct version
* [ ] tag new release and add a release on github.com

211
ChangeLog
View File

@@ -1,3 +1,214 @@
4.3.1
* reintroduce result rematching
* improve search of multi-part names
* fix accidentally switched meaning of --reverse-only and --search-only in
warm command
4.3.0
* fix failing importance recalculation command
* fix merging of linked names into unnamed boundaries
* fix a number of corner cases with interpolation splitting resulting in
invalid geometries
* fix failure in website generation when password contains curly brackets
* fix broken use of ST_Project in PostGIS 3.4
* new NOMINATIM_SEARCH_WITHIN_COUNTRIES setting to restrict reverse lookups
to known countries (thanks @alfmarcua)
* allow negative OSM IDs (thanks @alfmarcua)
* disallow import of Tiger data in a frozen DB
* avoid UPDATE to change settings to be compatible with r/o DBs (thanks @t-tomek)
* update bundled osm2pgsql to 1.9.2
* reorganise osm2pgsql flex style and make it the default
* exclude names ending in :wikipedia from indexing
* no longer accept comma as a list separator in name tags
* process forward dependencies on update to catch updates in geometries
of ways and relations
* fix handling of isolated silent letters during transliteration
* no longer assign postcodes to large linear features like rivers
* introduce nominatim.paths module for finding data and libraries
* documentation layout changed to material theme
* new documentation section for library
* various smaller fixes to existing documentation
(thanks @woodpeck, @bloom256, @biswajit-k)
* updates to vagrant install scripts, drop support for Ubunut 18
(thanks @n-timofeev)
* removed obsolete configuration variables from env.defaults
* add script for generating a taginfo description (thanks @biswajit-k)
* modernize Python code around BDD test and add testing of Python frontend
* lots of new BDD tests for API output
4.2.3
* fix deletion handling for 'nominatim add-data'
* adapt place_force_delete() to new deletion handling
* flex style: avoid dropping of postcode areas
* fix update errors on address interpolation handling
4.2.2
* extend flex-style library to fully support all default styles
* fix handling of Hebrew aleph
* do not assign postcodes to rivers
* fix string matching in PHP code
* update osm2pgsql (various updates to flex)
* fix slow query when deleting places on update
* fix CLI details query
* fix recalculation of importance values
* fix polygon simplification in reverse results
* add class/type information to reverse geocodejson result
* minor improvements to default tokenizer configuration
* various smaller fixes to documentation
4.2.1
* fix XSS vulnerability in debug view
4.2.0
* add experimental support for osm2pgsql flex style
* introduce secondary importance value to be retrieved from a raster data file
(currently still unused, to replace address importance, thanks to @tareqpi)
* add new report tool `nominatim admin --collect-os-info`
(thanks @micahcochran, @tareqpi)
* reorganise index to improve lookup performance and size
* run index creation after import in parallel
* run ANALYZE more selectively to speed up continuation of indexing
* fix crash on update when addr:interpolation receives an illegal value
* fix minimum number of retrieved results to be at least 10
* fix search for combinations of special term + name (e.g Hotel Bellevue)
* do not return interpolations without a parent street on reverse search
* improve invalidation of linked places on updates
* fix address parsing for interpolation lines
* make sure socket timeouts are respected during replication
(working around a bug in some versions of pyosmium)
* update bundled osm2pgsql to 1.7.1
* add support for PostgreSQL 15
* typing fixes to work with latest type annotations from typeshed
* smaller improvements to documentation (thanks to @mausch)
4.1.1
* fix XSS vulnerability in debug view
4.1.0
* switch to ICU tokenizer as default
* add housenumber normalization and support optional spaces during search
* add postcode format checking and support optional spaces during search
* add function for cleaning housenumbers in word table
* add updates/deletion of country names imported from OSM
* linked places no longer overwrite names from a place permanently
* move default country name configuration into yaml file (thanks @tareqpi)
* more compact layout for interpolation and TIGER tables
* introduce mutations to ICU tokenizer (used for German umlauts)
* support reinitializing a full project directory with refresh --website
* fix various issues with linked places on updates
* add support for external sanitizers and token analyzers
* add CLI commands for forced indexing
* add CLI command for version report
* add offline import mode
* change geocodejson to return a feature class in the 'type' field
* add ISO3166-2 to address output (thanks @I70l0teN4ik)
* improve parsing and matching of addr: tags
* support relations as street members of associatedStreet
* better ranking for address results from TIGER data
* adapt rank classification to changed tag usage in OSM
* update bundled osm2pgsql to 1.6.0
* add typing information to Python code
* improve unit test coverage
* reorganise and speed up code for BDD tests, drop support for scenes
* move PHP unit tests to PHP 9.5
* extensive typo fixes in documentation (thanks @woodpeck,@StephanGeorg,
@amandasaurus, @nslxndr, @stefkiourk, @Luflosi, @kianmeng)
* drop official support for installation on CentOS
* add installation instructions for Ubuntu 22.04
* add support for PHP8
* add setup instructions for updates and systemd
* drop support for PostgreSQL 9.5
4.0.2
* fix XSS vulnerability in debug view
4.0.1
* fix initialisation error in replication script
* ICU tokenizer: avoid any special characters in word tokens
* better error message when API php script does not exist
* fix quoting of house numbers in SQL queries
* small fixes and improvements in search query parsing
* add documentation for moving the database to a different machine
4.0.0
* refactor name token computation and introduce ICU tokenizer
* name processing now happens in the indexer outside the DB
* reorganizes abbreviation handling and moves it to the indexing phases
* adds preprocessing of names
* add country-specific ranking for Spain, Slovakia
* partially switch to using SP-GIST indexes
* better updating of dependent addresses for name changes in streets
* remove unused/broken tables for external housenumbers
* move external postcodes to CSV format and no longer save them in tables
(adds support for postcodes for arbitrary countries)
* remove postcode helper entries from placex (thanks @AntoJvlt)
* change required format for TIGER data to CSV
* move configuration of default languages from wiki into config file
* expect customized configuration files in project directory by default
* disable search API for reverse-only import (thanks @darkshredder)
* port most of maintenance/import code to Python and remove PHP utils
* add catch-up mode for replication
* add updating of special phrases (thanks @AntoJvlt)
* add support for special phrases in CSV files (thanks @AntoJvlt)
* switch to case-independent matching between place and boundary names
* remove disabling of reverse query parsing
* minor tweaks to search algorithm to avoid more false positives
* major overhaul of the administrator and developer documentation
* add security disclosure policy
* add testing of installation scripts via CI
* drop support for Python < 3.6 and Postgresql < 9.5
3.7.3
* fix XSS vulnerability in debug view
3.7.2
* fix database check for reverse-only imports
* do not error out in status API result when import date is missing
* add array_key_last function for PHP < 7.3 (thanks to @woodpeck)
* fix more url when server name is unknown (thanks to @mogita)
* commit changes to replication log table
3.7.1
* fix smaller issues with special phrases import (thanks @AntoJvlt)
* add index to speed up continued indexing during import
* fix index on location_property_tiger(parent_place_id) (thanks @changpingc)
* make sure Python code is backward-compatible with Python 3.5
* various documentation fixes
3.7.0
* switch to dotenv for configuration file
* introduce 'make install' (reorganising most of the code)
* introduce nominatim tool as replacement for various php scripts
* introduce project directories and allow multiple installations from same build
* clean up BDD tests: drop nose, reorganise step code
* simplify test database for API BDD tests and autoinstall database
* port most of the code for command-line tools to Python
(thanks to @darkshredder and @AntoJvlt)
* add tests for all tooling
* replace pyosmium-get-changes with custom internal implementation using
pyosmium
* improve search for queries with housenumber and partial terms
* add database versioning
* use jinja2 for preprocessing SQL files
* introduce automatic migrations
* reverse fix preference of interpolations over housenumbers
* parallelize indexing of postcodes
* add non-key indexes to speed up housenumber + street searches
* switch housenumber field in placex to save transliterated names
3.6.0
* add full support for searching by and displaying of addr:* tags

View File

@@ -1,4 +1,5 @@
[![Build Status](https://github.com/osm-search/Nominatim/workflows/CI%20Tests/badge.svg)](https://github.com/osm-search/Nominatim/actions?query=workflow%3A%22CI+Tests%22)
[![codecov](https://codecov.io/gh/osm-search/Nominatim/branch/master/graph/badge.svg?token=8P1LXrhCMy)](https://codecov.io/gh/osm-search/Nominatim)
Nominatim
=========
@@ -19,20 +20,11 @@ https://nominatim.org/release-docs/develop/ .
Installation
============
**Nominatim is a complex piece of software and runs in a complex environment.
Installing and running Nominatim is something for experienced system
administrators only who can do some trouble-shooting themselves. We are sorry,
but we can not provide installation support. We are all doing this in our free
time and there is just so much of that time to go around. Do not open issues in
our bug tracker if you need help. You can ask questions on the mailing list
(see below) or on [help.openstreetmap.org](https://help.openstreetmap.org/).**
The latest stable release can be downloaded from https://nominatim.org.
There you can also find [installation instructions for the release](https://nominatim.org/release-docs/latest/admin/Installation), as well as an extensive [Troubleshooting/FAQ section](https://nominatim.org/release-docs/latest/admin/Faq/).
Detailed installation instructions for the development version can be
found at [nominatim.org](https://nominatim.org/release-docs/develop/admin/Installation)
as well.
[Detailed installation instructions for current master](https://nominatim.org/release-docs/develop/admin/Installation)
can be found at nominatim.org as well.
A quick summary of the necessary steps:
@@ -42,12 +34,15 @@ A quick summary of the necessary steps:
cd build
cmake ..
make
sudo make install
2. Get OSM data and import:
2. Create a project directory, get OSM data and import:
./build/utils/setup.php --osm-file <your planet file> --all
mkdir nominatim-project
cd nominatim-project
nominatim import --osm-file <your planet file>
3. Point your webserver to the ./build/website directory.
3. Point your webserver to the nominatim-project/website directory.
License
@@ -59,13 +54,14 @@ The source code is available under a GPLv2 license.
Contributing
============
Contributions are welcome. For details see [contribution guide](CONTRIBUTING.md).
Both bug reports and pull requests are welcome.
Contributions, bugreport and pull requests are welcome.
For details see [contribution guide](CONTRIBUTING.md).
Mailing list
============
Questions and help
==================
For questions you can join the geocoding mailing list, see
https://lists.openstreetmap.org/listinfo/geocoding
For questions, community help and discussions you can use the
[Github discussions forum](https://github.com/osm-search/Nominatim/discussions)
or join the
[geocoding mailing list](https://lists.openstreetmap.org/listinfo/geocoding).

40
SECURITY.md Normal file
View File

@@ -0,0 +1,40 @@
# Security Policy
## Supported Versions
All Nominatim releases receive security updates for two years.
The following table lists the end of support for all currently supported
versions.
| Version | End of support for security updates |
| ------- | ----------------------------------- |
| 4.3.x | 2025-09-07 |
| 4.2.x | 2024-11-24 |
| 4.1.x | 2024-08-05 |
| 4.0.x | 2023-11-02 |
## Reporting a Vulnerability
If you believe, you have found an issue in Nominatim that has implications on
security, please send a description of the issue to **security@nominatim.org**.
You will receive an acknowledgement of your mail within 3 work days where we
also notify you of the next steps.
## How we Disclose Security Issues
** The following section only applies to security issues found in released
versions. Issues that concern the master development branch only will be
fixed immediately on the branch with the corresponding PR containing the
description of the nature and severity of the issue. **
Patches for identified security issues are applied to all affected versions and
new minor versions are released. At the same time we release a statement at
the [Nominatim blog](https://nominatim.org/blog/) describing the nature of the
incident. Announcements will also be published at the
[geocoding mailinglist](https://lists.openstreetmap.org/listinfo/geocoding).
## List of Previous Incidents
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)

View File

@@ -1,6 +1,6 @@
# Install Nominatim in a virtual machine for development and testing
This document describes how you can install Nominatim inside a Ubuntu 16
This document describes how you can install Nominatim inside a Ubuntu 22
virtual machine on your desktop/laptop (host machine). The goal is to give
you a development environment to easily edit code and run the test suite
without affecting the rest of your system.
@@ -42,9 +42,9 @@ is.
```
# inside the virtual machine:
cd build
wget --no-verbose --output-document=/tmp/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf
./utils/setup.php --osm-file /tmp/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | tee monaco.$$.log
cd nominatim-project
wget --no-verbose --output-document=monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf
nominatim import --osm-file monaco.osm.pbf 2>&1 | tee monaco.$$.log
```
To repeat an import you'd need to delete the database first
@@ -56,7 +56,7 @@ is.
## Development
Vagrant maps the virtual machine's port 8089 to your host machine. Thus you can
see Nominatim in action on [locahost:8089](http://localhost:8089/nominatim/).
see Nominatim in action on [localhost:8089](http://localhost:8089/nominatim/).
You edit code on your host machine in any editor you like. There is no need to
restart any software: just refresh your browser window.
@@ -69,8 +69,7 @@ installation.
PHP errors are written to `/var/log/apache2/error.log`.
With `echo` and `var_dump()` you write into the output (HTML/XML/JSON) when
you either add `&debug=1` to the URL (preferred) or set
`@define('CONST_Debug', true);` in `settings/local.php`.
you either add `&debug=1` to the URL.
In the Python BDD test you can use `logger.info()` for temporary debug
statements.
@@ -130,6 +129,10 @@ and then
Yes, Vagrant and Virtualbox can be installed on MS Windows just fine. You need a 64bit
version of Windows.
##### Will it run on Apple Silicon?
You might need to replace Virtualbox with [Parallels](https://www.parallels.com/products/desktop/).
There is no free/open source version of Parallels.
##### Why Monaco, can I use another country?
@@ -141,11 +144,12 @@ No. Long running Nominatim installations will differ once new import features (o
bug fixes) get added since those usually only get applied to new/changed data.
Also this document skips the optional Wikipedia data import which affects ranking
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation) for details.
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation)
for details.
##### Why Ubuntu? Can I test CentOS/Fedora/CoreOS/FreeBSD?
There is a Vagrant script for CentOS available, but the Nominatim directory
There used to be a Vagrant script for CentOS available, but the Nominatim directory
isn't symlinked/mounted to the host which makes development trickier. We used
it mainly for debugging installation with SELinux.
@@ -154,14 +158,17 @@ are slightly different, e.g. the name of the package manager, Apache2 package
name, location of files. We chose Ubuntu because that is closest to the
nominatim.openstreetmap.org production environment.
You can configure/download other Vagrant boxes from [https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
You can configure/download other Vagrant boxes from
[https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
##### How can I connect to an existing database?
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com` and port `5432`. The Postgres username is `postgres`. You can edit `settings/local.php` and point Nominatim to it.
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com`
and port `5432`. The Postgres username is `postgres`. You can edit the `.env` in your
project directory and point Nominatim to it.
NOMINATIM_DATABASE_DSN="pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
No data import or restarting necessary.
If the Postgres installation is behind a firewall, you can try
@@ -169,11 +176,12 @@ If the Postgres installation is behind a firewall, you can try
ssh -L 9999:localhost:5432 your-username@your-server.com
inside the virtual machine. It will map the port to `localhost:9999` and then
you edit `settings/local.php` with
you edit `.env` file with
@define('CONST_Database_DSN', 'pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it');
NOMINATIM_DATABASE_DSN="pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it"
To access postgres directly remember to specify the hostname, e.g. `psql --host localhost --port 9999 nominatim_it`
To access postgres directly remember to specify the hostname,
e.g. `psql --host localhost --port 9999 nominatim_it`
##### My computer is slow and the import takes too long. Can I start the virtual machine "in the cloud"?

88
Vagrantfile vendored
View File

@@ -17,6 +17,14 @@ Vagrant.configure("2") do |config|
checkout = "no"
end
config.vm.provider "hyperv" do |hv, override|
hv.memory = 2048
hv.linked_clone = true
if ENV['CHECKOUT'] != 'y' then
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: "smb", smb_host: ENV['SMB_HOST'] || ENV['COMPUTERNAME']
end
end
config.vm.provider "virtualbox" do |vb, override|
vb.gui = false
vb.memory = 2048
@@ -34,7 +42,34 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "ubuntu", primary: true do |sub|
config.vm.define "ubuntu22", primary: true do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "ubuntu22-apache" do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout, "install-apache"]
end
end
config.vm.define "ubuntu22-nginx" do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "ubuntu20" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -43,7 +78,7 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "ubuntu-apache" do |sub|
config.vm.define "ubuntu20-apache" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -52,7 +87,7 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "ubuntu-nginx" do |sub|
config.vm.define "ubuntu20-nginx" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -60,51 +95,4 @@ Vagrant.configure("2") do |config|
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "ubuntu18" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "ubuntu18-apache" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout, "install-apache"]
end
end
config.vm.define "ubuntu18-nginx" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "centos7" do |sub|
sub.vm.box = "centos/7"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Centos-7.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "centos" do |sub|
sub.vm.box = "generic/centos8"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Centos-8.sh"
s.privileged = false
s.args = [checkout]
end
end
end

15
cmake/paths-py.tmpl Normal file
View File

@@ -0,0 +1,15 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Path settings for extra data used by Nominatim (installed version).
"""
from pathlib import Path
PHPLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-php').resolve()
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()

View File

@@ -1,4 +0,0 @@
#!@PHP_BIN@ -Cq
<?php
require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
require_once(CONST_BasePath.'/@script_source@');

13
cmake/tool-installed.tmpl Normal file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/env python3
import sys
import os
sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
from nominatim import cli
from nominatim import version
version.GIT_COMMIT_HASH = '@GIT_HASH@'
exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql'))

13
cmake/tool.tmpl Executable file
View File

@@ -0,0 +1,13 @@
#!/usr/bin/env python3
import sys
import os
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
from nominatim import cli
from nominatim import version
version.GIT_COMMIT_HASH = '@GIT_HASH@'
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql'))

View File

@@ -1,5 +0,0 @@
<?php
@define('CONST_Debug', (isset($_GET['debug']) && $_GET['debug']));
require_once(dirname(dirname(__FILE__)).'/settings/settings-frontend.php');
require_once(CONST_BasePath.'/@script_source@');

File diff suppressed because one or more lines are too long

View File

@@ -1,26 +0,0 @@
-- This data contains Ordnance Survey data © Crown copyright and database right 2010.
-- Code-Point Open contains Royal Mail data © Royal Mail copyright and database right 2010.
-- OS data may be used under the terms of the OS OpenData licence:
-- http://www.ordnancesurvey.co.uk/oswebsite/opendata/licence/docs/licence.pdf
SET statement_timeout = 0;
SET client_encoding = 'UTF8';
SET standard_conforming_strings = off;
SET check_function_bodies = false;
SET client_min_messages = warning;
SET escape_string_warning = off;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
CREATE TABLE gb_postcode (
id integer,
postcode character varying(9),
geometry geometry,
CONSTRAINT enforce_dims_geometry CHECK ((st_ndims(geometry) = 2)),
CONSTRAINT enforce_srid_geometry CHECK ((st_srid(geometry) = 4326))
);

View File

@@ -1,16 +0,0 @@
SET statement_timeout = 0;
SET client_encoding = 'UTF8';
SET check_function_bodies = false;
SET client_min_messages = warning;
SET search_path = public, pg_catalog;
SET default_tablespace = '';
SET default_with_oids = false;
CREATE TABLE us_postcode (
postcode text,
x double precision,
y double precision
);

View File

@@ -29787,7 +29787,7 @@ st 5557484
-- prefill word table
select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;
-- copy the word frequencies

View File

@@ -10,6 +10,8 @@ set (DOC_SOURCES
admin
develop
api
customize
library
index.md
extra.css
styles.css
@@ -22,11 +24,12 @@ foreach (src ${DOC_SOURCES})
endforeach()
ADD_CUSTOM_TARGET(doc
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-8.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-8.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
)
ADD_CUSTOM_TARGET(serve-doc
COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
)

View File

@@ -5,106 +5,102 @@ your Nominatim database. It is assumed that you have already successfully
installed the Nominatim software itself, if not return to the
[installation page](Installation.md).
## Importing multiple regions
## Importing multiple regions (without updates)
To import multiple regions in your database, you need to configure and run `utils/import_multiple_regions.sh` file. This script will set up the update directory which has the following structure:
To import multiple regions in your database you can simply give multiple
OSM files to the import command:
```
nominatim import --osm-file file1.pbf --osm-file file2.pbf
```
If you already have imported a file and want to add another one, you can
use the add-data function to import the additional data as follows:
```
nominatim add-data --file <FILE>
nominatim refresh --postcodes
nominatim index -j <NUMBER OF THREADS>
```
Please note that adding additional data is always significantly slower than
the original import.
## Importing multiple regions (with updates)
If you want to import multiple regions _and_ be able to keep them up-to-date
with updates, then you can use the scripts provided in the `utils` directory.
These scripts will set up an `update` directory in your project directory,
which has the following structure:
```bash
update
   ├── europe
   │   ├── andorra
   │   │   └── sequence.state
   │   └── monaco
   │   └── sequence.state
   └── tmp
├── combined.osm.pbf
└── europe
├── andorra-latest.osm.pbf
└── monaco-latest.osm.pbf
├── europe
├── andorra
│ │ └── sequence.state
└── monaco
└── sequence.state
└── tmp
└── europe
├── andorra-latest.osm.pbf
└── monaco-latest.osm.pbf
```
The `sequence.state` files will contain the sequence ID, which will be used by pyosmium to get updates. The tmp folder is used for import dump.
The `sequence.state` files contain the sequence ID for each region. They will
be used by pyosmium to get updates. The `tmp` folder is used for import dump and
can be deleted once the import is complete.
### Configuring multiple regions
The file `import_multiple_regions.sh` needs to be edited as per your requirement:
1. List of countries. eg:
COUNTRIES="europe/monaco europe/andorra"
2. Path to Build directory. eg:
NOMINATIMBUILD="/srv/nominatim/build"
3. Path to Update directory. eg:
UPDATEDIR="/srv/nominatim/update"
4. Replication URL. eg:
BASEURL="https://download.geofabrik.de"
DOWNCOUNTRYPOSTFIX="-latest.osm.pbf"
!!! tip
If your database already exists and you want to add more countries, replace the setting up part
`${SETUPFILE} --osm-file ${UPDATEDIR}/tmp/combined.osm.pbf --all 2>&1`
with `${UPDATEFILE} --import-file ${UPDATEDIR}/tmp/combined.osm.pbf --index --index-instances N 2>&1`
where N is the numbers of CPUs in your system.
### Setting up multiple regions
Run the following command from your Nominatim directory after configuring the file.
Create a project directory as described for the
[simple import](Import.md#creating-the-project-directory). If necessary,
you can also add an `.env` configuration with customized options. In particular,
you need to make sure that `NOMINATIM_REPLICATION_UPDATE_INTERVAL` and
`NOMINATIM_REPLICATION_RECHECK_INTERVAL` are set according to the update
interval of the extract server you use.
bash ./utils/import_multiple_regions.sh
Copy the scripts `utils/import_multiple_regions.sh` and `utils/update_database.sh`
into the project directory.
!!! danger "Important"
This file uses osmium-tool. It must be installed before executing the import script.
Installation instructions can be found [here](https://osmcode.org/osmium-tool/manual.html#installation).
Now customize both files as per your requirements
### Updating multiple regions
To import multiple regions in your database, you need to configure and run ```utils/update_database.sh```.
This uses the update directory set up while setting up the DB.
### Configuring multiple regions
The file `update_database.sh` needs to be edited as per your requirement:
1. List of countries. eg:
1. List of countries. e.g.
COUNTRIES="europe/monaco europe/andorra"
2. Path to Build directory. eg:
2. URL to the service providing the extracts and updates. eg:
NOMINATIMBUILD="/srv/nominatim/build"
3. Path to Update directory. eg:
UPDATEDIR="/srv/nominatim/update"
4. Replication URL. eg:
BASEURL="https://download.geofabrik.de"
DOWNCOUNTRYPOSTFIX="-updates"
DOWNCOUNTRYPOSTFIX="-latest.osm.pbf"
5. Followup can be set according to your installation. eg: For Photon,
5. Followup in the update script can be set according to your installation.
E.g. for Photon,
FOLLOWUP="curl http://localhost:2322/nominatim-update"
will handle the indexing.
To start the initial import, change into the project directory and run
```
bash import_multiple_regions.sh
```
### Updating the database
Run the following command from your Nominatim directory after configuring the file.
Change into the project directory and run the following command:
bash ./utils/update_database.sh
bash update_database.sh
This will get diffs from the replication server, import diffs and index the database. The default replication server in the script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
This will get diffs from the replication server, import diffs and index
the database. The default replication server in the
script ([Geofabrik](https://download.geofabrik.de)) provides daily updates.
## Importing Nominatim to an external PostgreSQL database
## Using an external PostgreSQL database
You can install Nominatim using a database that runs on a different server when
you have physical access to the file system on the other server. Nominatim
@@ -112,6 +108,11 @@ uses a custom normalization library that needs to be made accessible to the
PostgreSQL server. This section explains how to set up the normalization
library.
!!! note
The external module is only needed when using the legacy tokenizer.
If you have chosen the ICU tokenizer, then you can ignore this section
and follow the standard import documentation.
### Option 1: Compiling the library on the database server
The most sure way to get a working library is to compile it on the database
@@ -154,7 +155,7 @@ Make sure that the PostgreSQL server package is installed on the machine
the PostgreSQL server itself.
Download and compile Nominatim as per standard instructions. Once done, you find
the nomrmalization library in `build/module/nominatim.so`. Copy the file to
the normalization library in `build/module/nominatim.so`. Copy the file to
the database server at a location where it is readable and executable by the
PostgreSQL server process.
@@ -162,11 +163,53 @@ PostgreSQL server process.
On the client side you now need to configure the import to point to the
correct location of the library **on the database server**. Add the following
line to your your `settings/local.php` file:
line to your your `.env` file:
```php
@define('CONST_Database_Module_Path', '<directory on the database server where nominatim.so resides>');
NOMINATIM_DATABASE_MODULE_PATH="<directory on the database server where nominatim.so resides>"
```
Now change the `CONST_Database_DSN` to point to your remote server and continue
to follow the [standard instructions for importing](/admin/Import).
Now change the `NOMINATIM_DATABASE_DSN` to point to your remote server and continue
to follow the [standard instructions for importing](Import.md).
## Moving the database to another machine
For some configurations it may be useful to run the import on one machine, then
move the database to another machine and run the Nominatim service from there.
For example, you might want to use a large machine to be able to run the import
quickly but only want a smaller machine for production because there is not so
much load. Or you might want to do the import once and then replicate the
database to many machines.
The important thing to keep in mind when transferring the Nominatim installation
is that you need to transfer the database _and the project directory_. Both
parts are essential for your installation.
The Nominatim database can be transferred using the `pg_dump`/`pg_restore` tool.
Make sure to use the same version of PostgreSQL and PostGIS on source and
target machine.
!!! note
Before creating a dump of your Nominatim database, consider running
`nominatim freeze` first. Your database looses the ability to receive further
data updates but the resulting database is only about a third of the size
of a full database.
Next install Nominatim on the target machine by following the standard installation
instructions. Again, make sure to use the same version as the source machine.
Create a project directory on your destination machine and set up the `.env`
file to match the configuration on the source machine. Finally run
nominatim refresh --website
to make sure that the local installation of Nominatim will be used.
If you are using the legacy tokenizer you might also have to switch to the
PostgreSQL module that was compiled on your target machine. If you get errors
that PostgreSQL cannot find or access `nominatim.so` then rerun
nominatim refresh --functions
on the target machine to update the the location of the module.

View File

@@ -1,22 +1,23 @@
# Deploying Nominatim
# Deploying Nominatim using the PHP frontend
The Nominatim API is implemented as a PHP application. The `website/` directory
in the build directory contains the configured website. You can serve this
in the project directory contains the configured website. You can serve this
in a production environment with any web server that is capable to run
PHP scripts.
This section gives a quick overview on how to configure Apache and Nginx to
serve Nominatim. It is not meant as a full system administration guide on how
to run a web service. Please refer to the documentation of
[Apache](http://httpd.apache.org/docs/current/) and
[Apache](https://httpd.apache.org/docs/current/) and
[Nginx](https://nginx.org/en/docs/)
for background information on configuring the services.
!!! Note
Throughout this page, we assume that your Nominatim build directory is
located in `/srv/nominatim/build` and the source code in
`/srv/nominatim/Nominatim`. If you have put it somewhere else, you
need to adjust the commands and configuration accordingly.
Throughout this page, we assume your Nominatim project directory is
located in `/srv/nominatim-project` and you have installed Nominatim
using the default installation prefix `/usr/local`. If you have put it
somewhere else, you need to adjust the commands and configuration
accordingly.
We further assume that your web server runs as user `www-data`. Older
versions of CentOS may still use the user name `apache`. You also need
@@ -29,7 +30,7 @@ web server user. You can check that the permissions are correct by accessing
on of the php files as the web server user:
``` sh
sudo -u www-data head -n 1 /srv/nominatim/build/website/search.php
sudo -u www-data head -n 1 /srv/nominatim-project/website/search.php
```
If this shows a permission error, then you need to adapt the permissions of
@@ -40,11 +41,11 @@ web server access. At a minimum the following SELinux labelling should be done
for Nominatim:
``` sh
sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/Nominatim/(website|lib|settings)(/.*)?"
sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/build/(website|settings)(/.*)?"
sudo semanage fcontext -a -t lib_t "/srv/nominatim/build/module/nominatim.so"
sudo restorecon -R -v /srv/nominatim/Nominatim
sudo restorecon -R -v /srv/nominatim/build
sudo semanage fcontext -a -t httpd_sys_content_t "/usr/local/nominatim/lib/lib-php(/.*)?"
sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim-project/website(/.*)?"
sudo semanage fcontext -a -t lib_t "/srv/nominatim-project/module/nominatim.so"
sudo restorecon -R -v /usr/local/lib/nominatim
sudo restorecon -R -v /srv/nominatim-project
```
## Nominatim with Apache
@@ -65,13 +66,13 @@ Make sure your Apache configuration contains the required permissions for the
directory and create an alias:
``` apache
<Directory "/srv/nominatim/build/website">
<Directory "/srv/nominatim-project/website">
Options FollowSymLinks MultiViews
AddType text/html .php
DirectoryIndex search.php
Require all granted
</Directory>
Alias /nominatim /srv/nominatim/build/website
Alias /nominatim /srv/nominatim-project/website
```
After making changes in the apache config you need to restart apache.
@@ -81,7 +82,7 @@ The website should now be available on `http://localhost/nominatim`.
### Installing the required packages
Nginx has no built-in PHP interpreter. You need to use php-fpm as a deamon for
Nginx has no built-in PHP interpreter. You need to use php-fpm as a daemon for
serving PHP cgi.
On Ubuntu/Debian install nginx and php-fpm with:
@@ -98,7 +99,7 @@ Unix socket instead, change the pool configuration
``` ini
; Replace the tcp listener and add the unix socket
listen = /var/run/php-fpm.sock
listen = /var/run/php-fpm-nominatim.sock
; Ensure that the daemon runs as the correct user
listen.owner = www-data
@@ -110,7 +111,7 @@ Tell nginx that php files are special and to fastcgi_pass to the php-fpm
unix socket by adding the location definition to the default configuration.
``` nginx
root /srv/nominatim/build/website;
root /srv/nominatim-project/website;
index search.php;
location / {
try_files $uri $uri/ @php;
@@ -120,7 +121,7 @@ location @php {
fastcgi_param SCRIPT_FILENAME "$document_root$uri.php";
fastcgi_param PATH_TRANSLATED "$document_root$uri.php";
fastcgi_param QUERY_STRING $args;
fastcgi_pass unix:/var/run/php-fpm.sock;
fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index index.php;
include fastcgi_params;
}
@@ -130,7 +131,7 @@ location ~ [^/]\.php(/|$) {
if (!-f $document_root$fastcgi_script_name) {
return 404;
}
fastcgi_pass unix:/var/run/php-fpm.sock;
fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index search.php;
include fastcgi.conf;
}
@@ -139,3 +140,9 @@ location ~ [^/]\.php(/|$) {
Restart the nginx and php-fpm services and the website should now be available
at `http://localhost/`.
## Nominatim with other webservers
Users have created instructions for other webservers:
* [Caddy](https://github.com/osm-search/Nominatim/discussions/2580)

View File

@@ -0,0 +1,122 @@
# Deploying the Nominatim Python frontend
The Nominatim can be run as a Python-based
[ASGI web application](https://asgi.readthedocs.io/en/latest/). You have the
choice between [Falcon](https://falcon.readthedocs.io/en/stable/)
and [Starlette](https://www.starlette.io/) as the ASGI framework.
This section gives a quick overview on how to configure Nginx to serve
Nominatim. Please refer to the documentation of
[Nginx](https://nginx.org/en/docs/) for background information on how
to configure it.
!!! Note
Throughout this page, we assume your Nominatim project directory is
located in `/srv/nominatim-project` and you have installed Nominatim
using the default installation prefix `/usr/local`. If you have put it
somewhere else, you need to adjust the commands and configuration
accordingly.
We further assume that your web server runs as user `www-data`. Older
versions of CentOS may still use the user name `apache`. You also need
to adapt the instructions in this case.
### Installing the required packages
The recommended way to deploy a Python ASGI application is to run
the ASGI runner [uvicorn](https://uvicorn.org/)
together with [gunicorn](https://gunicorn.org/) HTTP server. We use
Falcon here as the web framework.
Create a virtual environment for the Python packages and install the necessary
dependencies:
``` sh
sudo apt install virtualenv
virtualenv /srv/nominatim-venv
/srv/nominatim-venv/bin/pip install SQLAlchemy PyICU psycopg[binary] \
psycopg2-binary python-dotenv PyYAML falcon uvicorn gunicorn
```
### Setting up Nominatim as a systemd job
Next you need to set up the service that runs the Nominatim frontend. This is
easiest done with a systemd job.
Create the following file `/etc/systemd/system/nominatim.service`:
``` systemd
[Unit]
Description=Nominatim running as a gunicorn application
After=network.target
Requires=nominatim.socket
[Service]
Type=simple
Environment="PYTHONPATH=/usr/local/lib/nominatim/lib-python/"
User=www-data
Group=www-data
WorkingDirectory=/srv/nominatim-project
ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker nominatim.server.falcon.server:run_wsgi
ExecReload=/bin/kill -s HUP $MAINPID
StandardOutput=append:/var/log/gunicorn-nominatim.log
StandardError=inherit
PrivateTmp=true
TimeoutStopSec=5
KillMode=mixed
[Install]
WantedBy=multi-user.target
```
This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
its own Python process using
[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
connections to the database to serve requests in parallel.
Make the new service known to systemd and start it:
``` sh
sudo systemctl daemon-reload
sudo systemctl enable nominatim
sudo systemctl start nominatim
```
This sets the service up, so that Nominatim is automatically started
on reboot.
### Configuring nginx
To make the service available to the world, you need to proxy it through
nginx. Add the following definition to the default configuration:
``` nginx
upstream nominatim_service {
server unix:/run/nominatim.sock fail_timeout=0;
}
server {
listen 80;
listen [::]:80;
root /var/www/html;
index /search;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_redirect off;
proxy_pass http://nominatim_service;
}
}
```
Reload nginx with
```
sudo systemctl reload nginx
```
and you should be able to see the status of your server under
`http://localhost/status`.

View File

@@ -16,7 +16,7 @@ was killed. If it looks like this:
then you can resume with the following command:
```sh
./utils/setup.php --index --create-search-indices --create-country-names
nominatim import --continue indexing
```
If the reported rank is 26 or higher, you can also safely add `--index-noanalyse`.
@@ -31,7 +31,7 @@ list for hints.
If it happened during index creation you can try rerunning the step with
```sh
./utils/setup.php --create-search-indices --ignore-errors
nominatim import --continue indexing
```
Otherwise it's best to start the full setup from the beginning.
@@ -79,7 +79,7 @@ When running the import you may get a version mismatch:
pg_config seems to use bad includes sometimes when multiple versions
of PostgreSQL are available in the system. Make sure you remove the
server development libraries (`postgresql-server-dev-9.5` on Ubuntu)
server development libraries (`postgresql-server-dev-13` on Ubuntu)
and recompile (`cmake .. && make`).
@@ -93,7 +93,7 @@ on a non-managed machine.
### I see the error: "function transliteration(text) does not exist"
Reinstall the nominatim functions with `setup.php --create--functions`
Reinstall the nominatim functions with `nominatim refresh --functions`
and check for any errors, e.g. a missing `nominatim.so` file.
### I see the error: "ERROR: mmap (remap) failed"
@@ -106,14 +106,10 @@ If you are using a flatnode file, then it may also be that the underlying
filesystem does not fully support 'mmap'. A notable candidate is virtualbox's
vboxfs.
### I see the error: "clang: Command not found" on CentOS
On CentOS 7 users reported `/opt/rh/llvm-toolset-7/root/usr/bin/clang: Command not found`.
Double-check clang is installed. Instead of `make` try running `make CLANG=true`.
### nominatim UPDATE failed: ERROR: buffer 179261 is not owned by resource owner Portal
Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168) during the initial import of the database. It's
Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168)
during the initial import of the database. It's
something PostgreSQL internal Nominatim doesn't control. And PostgreSQL forums
suggest it's threading related but definitely some kind of crash of a process.
Users reported either rebooting the server, different hardware or just trying
@@ -125,22 +121,6 @@ The server cannot access your database. Add `&debug=1` to your URL
to get the full error message.
### On CentOS the website shows "Could not connect to server"
`could not connect to server: No such file or directory`
On CentOS v7 the PostgreSQL server is started with `systemd`. Check if
`/usr/lib/systemd/system/httpd.service` contains a line `PrivateTmp=true`. If
so then Apache cannot see the `/tmp/.s.PGSQL.5432` file. It's a good security
feature, so use the
[preferred solution](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
However, you can solve this the quick and dirty way by commenting out that line and then run
sudo systemctl daemon-reload
sudo systemctl restart httpd
### Website reports "DB Error: insufficient permissions"
The user the webserver, e.g. Apache, runs under needs to have access to the
@@ -180,9 +160,6 @@ by everybody, e.g.
Try `chmod a+r nominatim.so; chmod a+x nominatim.so`.
When running SELinux, make sure that the
[context is set up correctly](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
When you recently updated your operating system, updated PostgreSQL to
a new version or moved files (e.g. the build directory) you should
recreate `nominatim.so`. Try
@@ -202,7 +179,7 @@ See the installation instructions for a full list of required packages.
### I forgot to delete the flatnodes file before starting an import.
That's fine. For each import the flatnodes file get overwritten.
See [https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage]()
See [https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage](https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage)
for more information.
@@ -211,11 +188,3 @@ for more information.
### Can I import negative OSM ids into Nominatim?
See [this question of Stackoverflow](https://help.openstreetmap.org/questions/64662/nominatim-flatnode-with-negative-id).
### Missing XML or text declaration
The website might show: `XML Parsing Error: XML or text declaration not at start of entity Location.`
Make sure there are no spaces at the beginning of your `settings/local.php` file.

View File

@@ -1,22 +1,55 @@
# Importing the Database
The following instructions explain how to create a Nominatim database
from an OSM planet file and how to keep the database up to date. It
is assumed that you have already successfully installed the Nominatim
software itself, if not return to the [installation page](Installation.md).
from an OSM planet file. It is assumed that you have already successfully
installed the Nominatim software itself and the `nominatim` tool can be found
in your `PATH`. If this is not the case, return to the
[installation page](Installation.md).
## Configuration setup in settings/local.php
## Creating the project directory
The Nominatim server can be customized via the file `settings/local.php`
in the build directory. Note that this is a PHP file, so it must always
start like this:
Before you start the import, you should create a project directory for your
new database installation. This directory receives all data that is related
to a single Nominatim setup: configuration, extra data, etc. Create a project
directory apart from the Nominatim software and change into the directory:
<?php
```
mkdir ~/nominatim-planet
cd ~/nominatim-planet
```
without any leading spaces.
In the following, we refer to the project directory as `$PROJECT_DIR`. To be
able to copy&paste instructions, you can export the appropriate variable:
There are lots of configuration settings you can tweak. Have a look
at `settings/default.php` for a full list. Most should have a sensible default.
```
export PROJECT_DIR=~/nominatim-planet
```
The Nominatim tool assumes per default that the current working directory is
the project directory but you may explicitly state a different directory using
the `--project-dir` parameter. The following instructions assume that you run
all commands from the project directory.
!!! tip "Migration Tip"
Nominatim used to be run directly from the build directory until version 3.6.
Essentially, the build directory functioned as the project directory
for the database installation. This setup still works and can be useful for
development purposes. It is not recommended anymore for production setups.
Create a project directory that is separate from the Nominatim software.
### Configuration setup in `.env`
The Nominatim server can be customized via an `.env` configuration file in the
project directory. This is a file in [dotenv](https://github.com/theskumar/python-dotenv)
format which looks the same as variable settings in a standard shell environment.
You can also set the same configuration via environment variables. All
settings have a `NOMINATIM_` prefix to avoid conflicts with other environment
variables.
There are lots of configuration settings you can tweak. A full reference
can be found in the chapter [Configuration Settings](../customize/Settings.md).
Most should have a sensible default.
#### Flatnode files
@@ -24,9 +57,9 @@ If you plan to import a large dataset (e.g. Europe, North America, planet),
you should also enable flatnode storage of node locations. With this
setting enabled, node coordinates are stored in a simple file instead
of the database. This will save you import time and disk storage.
Add to your `settings/local.php`:
Add to your `.env`:
@define('CONST_Osm2pgsql_Flatnode_File', '/path/to/flatnode.file');
NOMINATIM_FLATNODE_FILE="/path/to/flatnode.file"
Replace the second part with a suitable path on your system and make sure
the directory exists. There should be at least 75GB of free space.
@@ -38,27 +71,32 @@ the directory exists. There should be at least 75GB of free space.
Wikipedia can be used as an optional auxiliary data source to help indicate
the importance of OSM features. Nominatim will work without this information
but it will improve the quality of the results if this is installed.
This data is available as a binary download:
This data is available as a binary download. Put it into your project directory:
cd $NOMINATIM_SOURCE_DIR/data
wget https://www.nominatim.org/data/wikimedia-importance.sql.gz
cd $PROJECT_DIR
wget https://nominatim.org/data/wikimedia-importance.sql.gz
The file is about 400MB and adds around 4GB to the Nominatim database.
!!! tip
If you forgot to download the wikipedia rankings, you can also add
importances after the import. Download the files, then run
`./utils/setup.php --import-wikipedia-articles`
and `./utils/update.php --recompute-importance`.
If you forgot to download the wikipedia rankings, then you can
also add importances after the import. Download the SQL files, then
run `nominatim refresh --wiki-data --importance`. Updating
importances for a planet will take a couple of hours.
### Great Britain, USA postcodes
### External postcodes
Nominatim can use postcodes from an external source to improve searches that
involve a GB or US postcode. This data can be optionally downloaded:
Nominatim can use postcodes from an external source to improve searching with
postcodes. We provide precomputed postcodes sets for the US (using TIGER data)
and the UK (using the [CodePoint OpenData set](https://osdatahub.os.uk/downloads/open/CodePointOpen).
This data can be optionally downloaded into the project directory:
cd $NOMINATIM_SOURCE_DIR/data
wget https://www.nominatim.org/data/gb_postcode_data.sql.gz
wget https://www.nominatim.org/data/us_postcode_data.sql.gz
cd $PROJECT_DIR
wget https://nominatim.org/data/gb_postcodes.csv.gz
wget https://nominatim.org/data/us_postcodes.csv.gz
You can also add your own custom postcode sources, see
[Customization of postcodes](../customize/Postcodes.md).
## Choosing the data to import
@@ -74,7 +112,7 @@ If you only need geocoding for a smaller region, then precomputed OSM extracts
are a good way to reduce the database size and import time.
[Geofabrik](https://download.geofabrik.de) offers extracts for most countries.
They even have daily updates which can be used with the update process described
[in the next section](../Update). There are also
[in the next section](Update.md). There are also
[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading).
Please be aware that some extracts are not cut exactly along the country
@@ -86,22 +124,33 @@ that Nominatim cannot compute the areas for some administrative areas.
About half of the data in Nominatim's database is not really used for serving
the API. It is only there to allow the data to be updated from the latest
changes from OSM. For many uses these dynamic updates are not really required.
If you don't plan to apply updates, the dynamic part of the database can be
safely dropped using the following command:
If you don't plan to apply updates, you can run the import with the
`--no-updates` parameter. This will drop the dynamic part of the database as
soon as it is not required anymore.
You can also drop the dynamic part later using the following command:
```
./utils/setup.php --drop
nominatim freeze
```
Note that you still need to provide for sufficient disk space for the initial
import. So this option is particularly interesting if you plan to transfer the
database or reuse the space later.
!!! warning
The data structure for updates are also required when adding additional data
after the import, for example [TIGER housenumber data](../customize/Tiger.md).
If you plan to use those, you must not use the `--no-updates` parameter.
Do a normal import, add the external data and once you are done with
everything run `nominatim freeze`.
### Reverse-only Imports
If you only want to use the Nominatim database for reverse lookups or
if you plan to use the installation only for exports to a
[photon](https://photon.komoot.de/) database, then you can set up a database
[photon](https://photon.komoot.io/) database, then you can set up a database
without search indexes. Add `--reverse-only` to your setup command above.
This saves about 5% of disk space.
@@ -112,19 +161,19 @@ Nominatim normally sets up a full search database containing administrative
boundaries, places, streets, addresses and POI data. There are also other
import styles available which only read selected data:
* **settings/import-admin.style**
* **admin**
Only import administrative boundaries and places.
* **settings/import-street.style**
* **street**
Like the admin style but also adds streets.
* **settings/import-address.style**
* **address**
Import all data necessary to compute addresses down to house number level.
* **settings/import-full.style**
* **full**
Default style that also includes points of interest.
* **settings/import-extratags.style**
* **extratags**
Like the full style but also adds most of the OSM tags into the extratags
column.
The style can be changed with the configuration `CONST_Import_Style`.
The style can be changed with the configuration `NOMINATIM_IMPORT_STYLE`.
To give you an idea of the impact of using the different styles, the table
below gives rough estimates of the final database size after import of a
@@ -143,8 +192,8 @@ full | 54h | 640 GB | 330 GB
extratags | 54h | 650 GB | 340 GB
You can also customize the styles further.
A [description of the style format](../develop/Import.md#configuring-the-import)
can be found in the development section.
A [description of the style format](../customize/Import-Styles.md)
can be found in the customization guide.
## Initial import of the data
@@ -153,12 +202,15 @@ can be found in the development section.
[Geofabrik](https://download.geofabrik.de).
Download the data to import. Then issue the following command
from the **build directory** to start the import:
from the **project directory** to start the import:
```sh
./utils/setup.php --osm-file <data file> --all 2>&1 | tee setup.log
nominatim import --osm-file <data file> 2>&1 | tee setup.log
```
The **project directory** is the one that you have set up at the beginning.
See [creating the project directory](#creating-the-project-directory).
### Notes on full planet imports
Even on a perfectly configured machine
@@ -176,7 +228,7 @@ to load the OSM data into the PostgreSQL database. This step is very demanding
in terms of RAM usage. osm2pgsql and PostgreSQL are running in parallel at
this point. PostgreSQL blocks at least the part of RAM that has been configured
with the `shared_buffers` parameter during
[PostgreSQL tuning](Installation#postgresql-tuning)
[PostgreSQL tuning](Installation.md#postgresql-tuning)
and needs some memory on top of that. osm2pgsql needs at least 2GB of RAM for
its internal data structures, potentially more when it has to process very large
relations. In addition it needs to maintain a cache for node locations. The size
@@ -192,94 +244,89 @@ MB. Make sure you leave enough RAM for PostgreSQL and osm2pgsql as mentioned
above. If the system starts swapping or you are getting out-of-memory errors,
reduce the cache size or even consider using a flatnode file.
### Verify the import
Run this script to verify all required tables and indices got created successfully.
### Testing the installation
Run this script to verify that all required tables and indices got created
successfully.
```sh
./utils/check_import_finished.php
nominatim admin --check-database
```
### Setting up the website
Now you can try out your installation by executing a simple query on the
command line:
Run the following command to set up the configuration file for the API frontend
`settings/settings-frontend.php`. These settings are used in website/*.php files.
``` sh
nominatim search --query Berlin
```
or, when you have a reverse-only installation:
``` sh
nominatim reverse --lat 51 --lon 45
```
If you want to run Nominatim as a service, you need to make a choice between
running the traditional PHP frontend or the new experimental Python frontend.
Make sure you have installed the right packages as per
[Installation](Installation.md#software).
#### Testing the PHP frontend
You can run a small test server with the PHP frontend like this:
```sh
./utils/setup.php --setup-website
nominatim serve
```
!!! Note
This step is not necessary if you use `--all` option while setting up the DB.
Now you can try out your installation by running:
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
or, for reverse-only installations a reverse query,
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the PHP frontend](Deployment-PHP.md).
#### Testing the Python frontend
To run the test server against the Python frontend, you must choose a
web framework to use, either starlette or falcon. Make sure the appropriate
packages are installed. Then run
``` sh
nominatim serve --engine falcon
```
or
``` sh
nominatim serve --engine starlette
```
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
or, for reverse-only installations a reverse query,
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the Python frontend](Deployment-Python.md).
## Enabling search by category phrases
To be able to search for places by their type using
[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to import these key phrases like this:
```sh
make serve
nominatim special-phrases --import-from-wiki
```
This runs a small test server normally used for development. You can use it
to verify that your installation is working. Go to
`http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
To run Nominatim via webservers like Apache or nginx, please read the
[Deployment chapter](Deployment.md).
## Tuning the database
Accurate word frequency information for search terms helps PostgreSQL's query
planner to make the right decisions. Recomputing them can improve the performance
of forward geocoding in particular under high load. To recompute word counts run:
```sh
./utils/update.php --recompute-word-counts
```
This will take a couple of hours for a full planet installation. You can
also defer that step to a later point in time when you realise that
performance becomes an issue. Just make sure that updates are stopped before
running this function.
If you want to be able to search for places by their type through
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to enable these key phrases like this:
./utils/specialphrases.php --wiki-import > specialphrases.sql
psql -d nominatim -f specialphrases.sql
Note that this command downloads the phrases from the wiki link above. You
need internet access for the step.
## Installing Tiger housenumber data for the US
Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
address set to complement the OSM house number data in the US. You can add
TIGER data to your own Nominatim instance by following these steps. The
entire US adds about 10GB to your database.
1. Get preprocessed TIGER 2019 data and unpack it into the
data directory in your Nominatim sources:
cd Nominatim/data
wget https://nominatim.org/data/tiger2019-nominatim-preprocessed.tar.gz
tar xf tiger2019-nominatim-preprocessed.tar.gz
2. Import the data into your Nominatim database:
./utils/setup.php --import-tiger-data
3. Enable use of the Tiger data in your `settings/local.php` by adding:
@define('CONST_Use_US_Tiger_Data', true);
4. Apply the new settings:
```sh
./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
```
See the [developer's guide](../develop/data-sources.md#us-census-tiger) for more
information on how the data got preprocessed.
You can also import special phrases from a csv file, for more
information please see the [Customization part](../customize/Special-Phrases.md).

View File

@@ -4,10 +4,8 @@ This page contains generic installation instructions for Nominatim and its
prerequisites. There are also step-by-step instructions available for
the following operating systems:
* [Ubuntu 22.04](../appendix/Install-on-Ubuntu-22.md)
* [Ubuntu 20.04](../appendix/Install-on-Ubuntu-20.md)
* [Ubuntu 18.04](../appendix/Install-on-Ubuntu-18.md)
* [CentOS 8](../appendix/Install-on-Centos-8.md)
* [CentOS 7.2](../appendix/Install-on-Centos-7.md)
These OS-specific instructions can also be found in executable form
in the `vagrant/` directory.
@@ -17,12 +15,18 @@ and can't offer support.
* [Docker](https://github.com/mediagis/nominatim-docker)
* [Docker on Kubernetes](https://github.com/peter-evans/nominatim-k8s)
* [Kubernetes with Helm](https://github.com/robjuz/helm-charts/blob/master/charts/nominatim/README.md)
* [Ansible](https://github.com/synthesio/infra-ansible-nominatim)
## Prerequisites
### Software
!!! Warning
For larger installations you **must have** PostgreSQL 11+ and PostGIS 3+
otherwise import and queries will be slow to the point of being unusable.
Query performance has marked improvements with PostgreSQL 13+ and PostGIS 3.2+.
For compiling:
* [cmake](https://cmake.org/)
@@ -30,23 +34,43 @@ For compiling:
* [proj](https://proj.org/)
* [bzip2](http://www.bzip.org/)
* [zlib](https://www.zlib.net/)
* [ICU](http://site.icu-project.org/)
* [nlohmann/json](https://json.nlohmann.me/)
* [Boost libraries](https://www.boost.org/), including system and filesystem
* PostgreSQL client libraries
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
For running Nominatim:
* [PostgreSQL](https://www.postgresql.org) (9.3+)
* [PostGIS](https://postgis.net) (2.2+)
* [Python 3](https://www.python.org/)
* [Psycopg2](https://www.psycopg.org)
* [PHP](https://php.net) (7.0 or later)
* [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
* [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
* [Python 3](https://www.python.org/) (3.7+)
* [Psycopg2](https://www.psycopg.org) (2.7+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
* [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
* [PyICU](https://pypi.org/project/PyICU/)
* [PyYaml](https://pyyaml.org/) (5.1+)
* [datrie](https://github.com/pytries/datrie)
When running the PHP frontend:
* [PHP](https://php.net) (7.3+)
* PHP-pgsql
* PHP-intl (bundled with PHP)
For running continuous updates:
* [pyosmium](https://osmcode.org/pyosmium/) (with Python 3)
* [pyosmium](https://osmcode.org/pyosmium/)
For running the experimental Python frontend:
* one of the following web frameworks:
* [falcon](https://falconframework.org/) (3.0+)
* [starlette](https://www.starlette.io/)
* [uvicorn](https://www.uvicorn.org/)
For dependencies for running tests and building documentation, see
the [Development section](../develop/Development-Environment.md).
@@ -54,15 +78,15 @@ the [Development section](../develop/Development-Environment.md).
### Hardware
A minimum of 2GB of RAM is required or installation will fail. For a full
planet import 64GB of RAM or more are strongly recommended. Do not report
planet import 128GB of RAM or more are strongly recommended. Do not report
out of memory problems if you have less than 64GB RAM.
For a full planet install you will need at least 900GB of hard disk space.
For a full planet install you will need at least 1TB of hard disk space.
Take into account that the OSM database is growing fast.
Fast disks are essential. Using NVME disks is recommended.
Even on a well configured machine the import of a full planet takes
around 2 days. On traditional spinning disks, 7-8 days are more realistic.
around 2 days. When using traditional SSDs, 4-5 days are more realistic.
## Tuning the PostgreSQL database
@@ -76,8 +100,7 @@ your `postgresql.conf` file.
work_mem = (50MB)
effective_cache_size = (24GB)
synchronous_commit = off
checkpoint_segments = 100 # only for postgresql <= 9.4
max_wal_size = 1GB # postgresql > 9.4
max_wal_size = 1GB
checkpoint_timeout = 10min
checkpoint_completion_target = 0.9
@@ -95,15 +118,6 @@ you might consider setting:
and even reduce `autovacuum_work_mem` further. This will reduce the amount
of memory that autovacuum takes away from the import process.
For the initial import, you should also set:
fsync = off
full_page_writes = off
Don't forget to reenable them after the initial import or you risk database
corruption.
## Downloading and building Nominatim
### Downloading the latest release
@@ -117,13 +131,13 @@ If you want to install latest development version from github, make sure to
also check out the osm2pgsql subproject:
```
git clone --recursive git://github.com/openstreetmap/Nominatim.git
git clone --recursive https://github.com/openstreetmap/Nominatim.git
```
The development version does not include the country grid. Download it separately:
```
wget -O Nominatim/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
wget -O Nominatim/data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
```
### Building Nominatim
@@ -142,6 +156,27 @@ build at the same level as the Nominatim source directory run:
```
cmake ../Nominatim
make
sudo make install
```
!!! warning
The default installation no longer compiles the PostgreSQL module that
is needed for the legacy tokenizer from older Nominatim versions. If you
are upgrading an older database or want to run the
[legacy tokenizer](../customize/Tokenizers.md#legacy-tokenizer) for
some other reason, you need to enable the PostgreSQL module via
cmake: `cmake -DBUILD_MODULE=on ../Nominatim`. To compile the module
you need to have the server development headers for PostgreSQL installed.
On Ubuntu/Debian run: `sudo apt install postgresql-server-dev-<postgresql version>`
Nominatim installs itself into `/usr/local` per default. To choose a different
installation directory add `-DCMAKE_INSTALL_PREFIX=<install root>` to the
cmake command. Make sure that the `bin` directory is available in your path
in that case, e.g.
```
export PATH=<install root>/bin:$PATH
```
Now continue with [importing the database](Import.md).

75
docs/admin/Maintenance.md Normal file
View File

@@ -0,0 +1,75 @@
This chapter describes the various operations the Nominatim database administrator
may use to clean and maintain the database. None of these operations is mandatory
but they may help improve the performance and accuracy of results.
## Updating postcodes
Command: `nominatim refresh --postcodes`
Postcode centroids (aka 'calculated postcodes') are generated by looking at all
postcodes of a country, grouping them and calculating the geometric centroid.
There is currently no logic to deal with extreme outliers (typos or other
mistakes in OSM data). There is also no check if a postcodes adheres to a
country's format, e.g. if Swiss postcodes are 4 digits.
When running regular updates, postcodes results can be improved by running
this command on a regular basis. Note that only the postcode table and the
postcode search terms are updated. The postcode that is assigned to each place
is only updated when the place is updated.
The command takes around 70min to run on the planet and needs ca. 40GB of
temporary disk space.
## Updating word counts
Command: `nominatim refresh --word-counts`
Nominatim keeps frequency statistics about all search terms it indexes. These
statistics are currently used to optimise queries to the database. Thus better
statistics mean better performance. Word counts are created once after import
and are usually sufficient even when running regular updates. You might want
to rerun the statistics computation when adding larger amounts of new data,
for example, when adding an additional country via `nominatim add-data`.
## Forcing recomputation of places and areas
Command: `nominatim refresh --data-object [NWR]<id> --data-area [NWR]<id>`
When running replication updates, Nominatim tries to recompute the search
and address information for all places that are affected by a change. But it
needs to restrict the total number of changes to make sure it can keep up
with the minutely updates. Therefore it will refrain from propagating changes
that affect a lot of objects.
The administrator may force an update of places in the database.
`nominatim refresh --data-object` invalidates a single OSM object.
`nominatim refresh --data-area` invalidates an OSM object and all dependent
objects. That are usually the places that inside its area or around the
center of the object. Both commands expect the OSM object as an argument
of the form OSM type + OSM id. The type must be `N` (node), `W` (way) or
`R` (relation).
After invalidating the object, indexing must be run again. If continuous
update are running in the background, the objects will be recomputed together
with the next round of updates. Otherwise you need to run `nominatim index`
to finish the recomputation.
## Removing large deleted objects
Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up.
There is currently no command to do that. You can use the following SQL
query to force a deletion on all objects that have been deleted more than
a certain timespan ago (here: 1 month):
```sql
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
and age(p.indexed_date) > '1 month'::interval
```

View File

@@ -1,10 +1,123 @@
# Database Migrations
This page describes database migrations necessary to update existing databases
to newer versions of Nominatim.
Since version 3.7.0 Nominatim offers automatic migrations. Please follow
the following steps:
SQL statements should be executed from the PostgreSQL commandline. Execute
`psql nominatim` to enter command line mode.
* stop any updates that are potentially running
* update Nominatim to the newer version
* go to your project directory and run `nominatim admin --migrate`
* (optionally) restart updates
Below you find additional migrations and hints about other structural and
breaking changes. **Please read them before running the migration.**
!!! note
If you are migrating from a version <3.6, then you still have to follow
the manual migration steps up to 3.6.
## 4.2.0 -> 4.3.0
### New indexes for reverse lookup
The reverse lookup algorithm has changed slightly to improve performance.
This change needs a different index in the database. The required index
will be automatically build during migration. Until the new index is available
performance of the /reverse endpoint is significantly reduced. You should
therefore either remove traffic from the machine before attempting a
version update or create the index manually **before** starting the update
using the following SQL:
```sql
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
WHERE rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND linked_place_id is null AND osm_type = 'N';
```
## 4.0.0 -> 4.1.0
### ICU tokenizer is the new default
Nominatim now installs the [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer)
by default. This only has an effect on newly installed databases. When
updating older databases, it keeps its installed tokenizer. If you still
run with the legacy tokenizer, make sure to compile Nominatim with the
PostgreSQL module, see [Installation](Installation.md#building-nominatim).
### geocodejson output changed
The `type` field of the geocodejson output has changed. It now contains
the address class of the object instead of the value of the OSM tag. If
your client has used the `type` field, switch them to read `osm_value`
instead.
## 3.7.0 -> 4.0.0
### NOMINATIM_PHRASE_CONFIG removed
Custom blacklist configurations for special phrases now need to be handed
with the `--config` parameter to `nominatim special-phrases`. Alternatively
you can put your custom configuration in the project directory in a file
named `phrase-settings.json`.
Version 3.8 also removes the automatic converter for the php format of
the configuration in older versions. If you are updating from Nominatim < 3.7
and still work with a custom `phrase-settings.php`, you need to manually
convert it into a json format.
### PHP utils removed
The old PHP utils have now been removed completely. You need to switch to
the appropriate functions of the nominatim command line tool. See
[Introducing `nominatim` command line tool](#introducing-nominatim-command-line-tool)
below.
## 3.6.0 -> 3.7.0
### New format and name of configuration file
The configuration for an import is now saved in a `.env` file in the project
directory. This file follows the dotenv format. For more information, see
the [installation chapter](Import.md#configuration-setup-in-env).
To migrate to the new system, create a new project directory, add the `.env`
file and port your custom configuration from `settings/local.php`. Most
settings are named similar and only have received a `NOMINATIM_` prefix.
Use the default settings in `settings/env.defaults` as a reference.
### New location for data files
External data files for Wikipedia importance, postcodes etc. are no longer
expected to reside in the source tree by default. Instead they will be searched
in the project directory. If you have an automated setup script you must
either adapt the download location or explicitly set the location of the
files to the old place in your `.env`.
### Introducing `nominatim` command line tool
The various php utilities have been replaced with a single `nominatim`
command line tool. Make sure to adapt any scripts. There is no direct 1:1
matching between the old utilities and the commands of nominatim CLI. The
following list gives you a list of nominatim sub-commands that contain
functionality of each script:
* ./utils/setup.php: `import`, `freeze`, `refresh`
* ./utils/update.php: `replication`, `add-data`, `index`, `refresh`
* ./utils/specialphrases.php: `special-phrases`
* ./utils/check_import_finished.php: `admin`
* ./utils/warm.php: `admin`
* ./utils/export.php: `export`
Try `nominatim <command> --help` for more information about each subcommand.
`./utils/query.php` no longer exists in its old form. `nominatim search`
provides a replacement but returns different output.
### Switch to normalized house numbers
The housenumber column in the placex table uses now normalized version.
The automatic migration step will convert the column but this may take a
very long time. It is advisable to take the machine offline while doing that.
## 3.5.0 -> 3.6.0
@@ -24,7 +137,7 @@ done
The debugging UI is no longer directly provided with Nominatim. Instead we
now provide a simple Javascript application. Please refer to
[Setting up the Nominatim UI](../Setup-Nominatim-UI) for details on how to
[Setting up the Nominatim UI](Setup-Nominatim-UI.md) for details on how to
set up the UI.
The icons served together with the API responses have been moved to the
@@ -68,6 +181,14 @@ configuration file, run the following command after updating:
./utils/setup.php --setup-website
```
### Update SQL code
To update the SQL code to the leatest version run:
```
./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
```
## 3.4.0 -> 3.5.0
### New Wikipedia/Wikidata importance tables

View File

@@ -10,20 +10,20 @@ installation. For more details, please also have a look at the
## Installing nominatim-ui
nominatim-ui does not need any special installation, just download, configure
and run it.
We provide regular releases of nominatim-ui that contain the packaged website.
They do not need any special installation. Just download, configure
and run it. Grab the latest release from
[nominatim-ui's Github release page](https://github.com/osm-search/nominatim-ui/releases)
and unpack it. You can use `nominatim-ui-x.x.x.tar.gz` or `nominatim-ui-x.x.x.zip`.
Clone the source from github:
git clone https://github.com/osm-search/nominatim-ui
Copy the example configuration into the right place:
Next you need to adapt the UI to your installation. Custom settings need to be
put into `dist/theme/config.theme.js`. At a minimum you need to
set `Nominatim_API_Endpoint` to point to your Nominatim installation:
cd nominatim-ui
cp dist/config.example.js dist/config.js
echo "Nominatim_Config.Nominatim_API_Endpoint='https://myserver.org/nominatim/';" > dist/theme/config.theme.js
Now adapt the configuration to your needs. You need at least
to change the `Nominatim_API_Endpoint` to point to your Nominatim installation.
For the full set of available settings, have a look at `dist/config.defaults.js`.
Then you can just test it locally by spinning up a webserver in the `dist`
directory. For example, with Python:
@@ -161,24 +161,16 @@ directory like this:
# If no endpoint is given, then use search.
RewriteRule ^(/|$) "search.php"
# If format-html is explicity requested, forward to the UI.
# If format-html is explicitly requested, forward to the UI.
RewriteCond %{QUERY_STRING} "format=html"
RewriteRule ^([^/]+).php ui/$1.html [R,END]
# Same but .php suffix is missing.
RewriteCond %{QUERY_STRING} "format=html"
RewriteRule ^([^/]+) ui/$1.html [R,END]
RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
# If no format parameter is there then forward anything
# but /reverse and /lookup to the UI.
RewriteCond %{QUERY_STRING} "!format="
RewriteCond %{REQUEST_URI} "!/lookup"
RewriteCond %{REQUEST_URI} "!/reverse"
RewriteRule ^([^/]+).php ui/$1.html [R,END]
# Same but .php suffix is missing.
RewriteCond %{QUERY_STRING} "!format="
RewriteCond %{REQUEST_URI} "!/lookup"
RewriteCond %{REQUEST_URI} "!/reverse"
RewriteRule ^([^/]+) ui/$1.html [R,END]
RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
</Directory>
```

View File

@@ -1,67 +1,232 @@
# Updating the Database
There are many different ways to update your Nominatim database.
The following section describes how to keep it up-to-date with Pyosmium.
For a list of other methods see the output of `./utils/update.php --help`.
The following section describes how to keep it up-to-date using
an [online replication service for OpenStreetMap data](https://wiki.openstreetmap.org/wiki/Planet.osm/diffs)
For a list of other methods to add or update data see the output of
`nominatim add-data --help`.
!!! important
If you have configured a flatnode file for the import, then you
need to keep this flatnode file around for updates.
#### Installing the newest version of Pyosmium
### Installing the newest version of Pyosmium
It is recommended to install Pyosmium via pip. Make sure to use python3.
The replication process uses
[Pyosmium](https://docs.osmcode.org/pyosmium/latest/updating_osm_data.html)
to download update data from the server.
It is recommended to install Pyosmium via pip.
Run (as the same user who will later run the updates):
```sh
pip3 install --user osmium
```
Nominatim needs a tool called `pyosmium-get-changes` which comes with
Pyosmium. You need to tell Nominatim where to find it. Add the
following line to your `settings/local.php`:
### Setting up the update process
@define('CONST_Pyosmium_Binary', '/home/user/.local/bin/pyosmium-get-changes');
The path above is fine if you used the `--user` parameter with pip.
Replace `user` with your user name.
#### Setting up the update process
Next the update needs to be initialised. By default Nominatim is configured
Next the update process needs to be initialised. By default Nominatim is configured
to update using the global minutely diffs.
If you want a different update source you will need to add some settings
to `settings/local.php`. For example, to use the daily country extracts
to `.env`. For example, to use the daily country extracts
diffs for Ireland from Geofabrik add the following:
// base URL of the replication service
@define('CONST_Replication_Url', 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates');
// How often upstream publishes diffs
@define('CONST_Replication_Update_Interval', '86400');
// How long to sleep if no update found yet
@define('CONST_Replication_Recheck_Interval', '900');
# base URL of the replication service
NOMINATIM_REPLICATION_URL="https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates"
# How often upstream publishes diffs (in seconds)
NOMINATIM_REPLICATION_UPDATE_INTERVAL=86400
# How long to sleep if no update found yet (in seconds)
NOMINATIM_REPLICATION_RECHECK_INTERVAL=900
To set up the update process now run the following command:
./utils/update.php --init-updates
nominatim replication --init
It outputs the date where updates will start. Recheck that this date is
what you expect.
The `--init-updates` command needs to be rerun whenever the replication service
is changed.
The `replication --init` command needs to be rerun whenever the replication
service is changed.
#### Updating Nominatim
### Updating Nominatim
The following command will keep your database constantly up to date:
Nominatim supports different modes how to retrieve the update data from the
server. Which one you want to use depends on your exact setup and how often you
want to retrieve updates.
./utils/update.php --import-osmosis-all
These instructions are for using a single source of updates. If you have
imported multiple country extracts and want to keep them
up-to-date, [Advanced installations section](Advanced-Installations.md)
contains instructions to set up and update multiple country extracts.
#### One-time mode
When the `--once` parameter is given, then Nominatim will download exactly one
batch of updates and then exit. This one-time mode still respects the
`NOMINATIM_REPLICATION_UPDATE_INTERVAL` that you have set. If according to
the update interval no new data has been published yet, it will go to sleep
until the next expected update and only then attempt to download the next batch.
The one-time mode is particularly useful if you want to run updates continuously
but need to schedule other work in between updates. For example, the main
service at osm.org uses it, to regularly recompute postcodes -- a process that
must not be run while updates are in progress. Its update script
looks like this:
```sh
#!/bin/bash
# Switch to your project directory.
cd /srv/nominatim
while true; do
nominatim replication --once
if [ -f "/srv/nominatim/schedule-maintenance" ]; then
rm /srv/nominatim/schedule-maintenance
nominatim refresh --postcodes
fi
done
```
A cron job then creates the file `/srv/nominatim/schedule-maintenance` once per night.
##### One-time mode with systemd
You can run the one-time mode with a systemd timer & service.
Create a timer description like `/etc/systemd/system/nominatim-updates.timer`:
```
[Unit]
Description=Timer to start updates of Nominatim
[Timer]
OnActiveSec=2
OnUnitActiveSec=1min
Unit=nominatim-updates.service
[Install]
WantedBy=multi-user.target
```
And then a similar service definition: `/etc/systemd/system/nominatim-updates.service`:
```
[Unit]
Description=Single updates of Nominatim
[Service]
WorkingDirectory=/srv/nominatim
ExecStart=nominatim replication --once
StandardOutput=append:/var/log/nominatim-updates.log
StandardError=append:/var/log/nominatim-updates.error.log
User=nominatim
Group=nominatim
Type=simple
[Install]
WantedBy=multi-user.target
```
Replace the `WorkingDirectory` with your project directory. Also adapt user and
group names as required. `OnUnitActiveSec` defines how often the individual
update command is run.
Now activate the service and start the updates:
```
sudo systemctl daemon-reload
sudo systemctl enable nominatim-updates.timer
sudo systemctl start nominatim-updates.timer
```
You can stop future data updates, while allowing any current, in-progress
update steps to finish, by running `sudo systemctl stop
nominatim-updates.timer` and waiting until `nominatim-updates.service` isn't
running (`sudo systemctl is-active nominatim-updates.service`). Current output
from the update can be seen like above (`systemctl status
nominatim-updates.service`).
#### Catch-up mode
With the `--catch-up` parameter, Nominatim will immediately try to download
all changes from the server until the database is up-to-date. The catch-up mode
still respects the parameter `NOMINATIM_REPLICATION_MAX_DIFF`. It downloads and
applies the changes in appropriate batches until all is done.
The catch-up mode is foremost useful to bring the database up to speed after the
initial import. Give that the service usually is not in production at this
point, you can temporarily be a bit more generous with the batch size and
number of threads you use for the updates by running catch-up like this:
```
cd /srv/nominatim
NOMINATIM_REPLICATION_MAX_DIFF=5000 nominatim replication --catch-up --threads 15
```
The catch-up mode is also useful when you want to apply updates at a lower
frequency than what the source publishes. You can set up a cron job to run
replication catch-up at whatever interval you desire.
!!! hint
When running scheduled updates with catch-up, it is a good idea to choose
a replication source with an update frequency that is an order of magnitude
lower. For example, if you want to update once a day, use an hourly updated
source. This makes sure that you don't miss an entire day of updates when
the source is unexpectedly late to publish its update.
If you want to use the source with the same update frequency (e.g. a daily
updated source with daily updates), use the
continuous update mode. It ensures to re-request the newest update until it
is published.
#### Continuous updates
!!! danger
This mode is no longer recommended to use and will removed in future
releases. systemd is much better
suited for running regular updates. Please refer to the setup
instructions for running one-time mode with systemd above.
This is the easiest mode. Simply run the replication command without any
parameters:
nominatim replication
The update application keeps running forever and retrieves and applies
new updates from the server as they are published.
You can run this command as a simple systemd service. Create a service
description like that in `/etc/systemd/system/nominatim-updates.service`:
```
[Unit]
Description=Continuous updates of Nominatim
[Service]
WorkingDirectory=/srv/nominatim
ExecStart=nominatim replication
StandardOutput=append:/var/log/nominatim-updates.log
StandardError=append:/var/log/nominatim-updates.error.log
User=nominatim
Group=nominatim
Type=simple
[Install]
WantedBy=multi-user.target
```
Replace the `WorkingDirectory` with your project directory. Also adapt user
and group names as required.
Now activate the service and start the updates:
```
sudo systemctl daemon-reload
sudo systemctl enable nominatim-updates
sudo systemctl start nominatim-updates
```
(Note that even though the old name "import-osmosis-all" has been kept for
compatibility reasons, Osmosis is not required to run this - it uses pyosmium
behind the scenes.)
If you have imported multiple country extracts and want to keep them
up-to-date, [Advanced installations section](Advanced-Installations.md) contains instructions
to set up and update multiple country extracts.

View File

@@ -2,13 +2,17 @@
Show all details about a single place saved in the database.
This API endpoint is meant for visual inspection of the data in the database,
mainly together with [Nominatim-UI](https://github.com/osm-search/nominatim-ui/).
The parameters of the endpoint and the output may change occasionally between
versions of Nominatim. Do not rely on the output in scripts or applications.
!!! warning
The details page exists for debugging only. You may not use it in scripts
or to automatically query details about a result.
The details endpoint at https://nominatim.openstreetmap.org
may not used in scripts or bots at all.
See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
## Parameters
The details API supports the following two request formats:
@@ -35,59 +39,90 @@ for a place is different between Nominatim installation (servers) and
changes when data gets reimported. Therefore it cannot be used as
a permanent id and shouldn't be used in bug reports.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/details.php`. This is now deprecated
and will be removed in future versions.
Additional optional parameters are explained below.
## Parameters
This section lists additional optional parameters.
### Output format
* `json_callback=<string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
When set, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
* `pretty=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| pretty | 0 or 1 | 0 |
Add indentation to make it more human-readable. (Default: 0)
`[PHP-only]` Add indentation to the output to make it more human-readable.
### Output details
* `addressdetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
Include a breakdown of the address into elements. (Default: 0)
When set to 1, include a breakdown of the address into elements.
* `keywords=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| keywords | 0 or 1 | 0 |
Include a list of name keywords and address keywords (word ids). (Default: 0)
When set to 1, include a list of name keywords and address keywords
in the result.
* `linkedplaces=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| linkedplaces | 0 or 1 | 1 |
Include a details of places that are linked with this one. Places get linked
Include details of places that are linked with this one. Places get linked
together when they are different forms of the same physical object. Nominatim
links two kinds of objects together: place nodes get linked with the
corresponding administrative boundaries. Waterway relations get linked together with their
members.
(Default: 1)
* `hierarchy=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| hierarchy | 0 or 1 | 0 |
Include details of places lower in the address hierarchy. (Default: 0)
Include details of places lower in the address hierarchy.
* `group_hierarchy=[0|1]`
`[Python-only]` will only return properly parented places. These are address
or POI-like places that reuse the address of their parent street or place.
For JSON output will group the places by type. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| group_hierarchy | 0 or 1 | 0 |
* `polygon_geojson=[0|1]`
When set to 1, the output of the address hierarchy will be
grouped by type.
Include geometry of result. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
Include geometry of result.
### Language of results
* `accept-language=<browser language string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing result, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
## Examples

View File

@@ -35,7 +35,7 @@ it contains the county/state/country across the border.
#### 3. I get different counties/states/countries when I change the zoom parameter in the reverse query. How is that possible?
This is basically the same problem as in the previous answer.
The zoom level influences at which [search rank](https://wiki.openstreetmap.org/wiki/Nominatim/Development_overview#Country_to_street_level) Nominatim starts looking
The zoom level influences at which [search rank](../customize/Ranking.md#search-rank) Nominatim starts looking
for the closest object. So the closest house number maybe on one side of the
border while the closest street is on the other. As the address details contain
the address of the closest object found, you might sometimes get one result,
@@ -58,4 +58,28 @@ The [Overpass API](https://wiki.openstreetmap.org/wiki/Overpass_API) is more
suited for these kinds of queries.
That said if you installed your own Nominatim instance you can use the
`/utils/export.php` PHP script as basis to return such lists.
`nominatim export` PHP script as basis to return such lists.
#### 7. My result has a wrong postcode. Where does it come from?
Most places in OSM don't have a postcode, so Nominatim tries to interpolate
one. It first look at all the places that make up the address of the place.
If one of them has a postcode defined, this is the one to be used. When
none of the address parts has a postcode either, Nominatim interpolates one
from the surrounding objects. If the postcode is for your result is one, then
most of the time there is an OSM object with the wrong postcode nearby.
To find the bad postcode, go to
[https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
and search for your place. When you have found it, click on the 'details' link
under the result to go to the details page. There is a field 'Computed Postcode'
which should display the bad postcode. Click on the 'how?' link. A small
explanation text appears. It contains a link to a query for Overpass Turbo.
Click on that and you get a map with all places in the area that have the bad
postcode. If none is displayed, zoom the map out a bit and then click on 'Run'.
Now go to [OpenStreetMap](https://openstreetmap.org) and fix the error you
have just found. It will take at least a day for Nominatim to catch up with
your data fix. Sometimes longer, depending on how much editing activity is in
the area.

View File

@@ -3,7 +3,7 @@
The lookup API allows to query the address and other details of one or
multiple OSM objects like node, way or relation.
## Parameters
## Endpoint
The lookup API has the following format:
@@ -15,86 +15,140 @@ The lookup API has the following format:
prefixed with its type, one of node(N), way(W) or relation(R). Up to 50 ids
can be queried at the same time.
Additional optional parameters are explained below.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/lookup.php`. This is now deprecated
and will be removed in future versions.
## Parameters
This section lists additional optional parameters.
### Output format
* `format=[xml|json|jsonv2|geojson|geocodejson]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
See [Place Output Formats](Output.md) for details on each format.
* `json_callback=<string>`
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Only has an effect for JSON output formats.
### Output details
* `addressdetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
Include a breakdown of the address into elements. (Default: 0)
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
* `extratags=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
* `namedetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
### Language of results
* `accept-language=<browser language string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
Preferred language order for showing search results, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
### Polygon output
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
* `polygon_threshold=0.0`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
Return a simplified version of the output geometry. The parameter is the
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
geometry. Topology is preserved in the geometry.
### Other
* `email=<valid email address>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
* `debug=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
## Examples
##### XML
[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189)
[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W50637691,N240109189)
```xml
<lookupresults timestamp="Mon, 29 Jun 15 18:01:33 +0000" attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright" querystring="R146656,W104393803,N240109189" polygon="false">
<place place_id="127761056" osm_type="relation" osm_id="146656" place_rank="16" lat="53.4791466" lon="-2.2447445" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.704893333438333">
<lookupresults timestamp="Mon, 28 Mar 22 14:38:54 +0000" attribution="Data &#xA9; OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright" querystring="R146656,W50637691,N240109189" more_url="">
<place place_id="282236157" osm_type="relation" osm_id="146656" place_rank="16" address_rank="16" boundingbox="53.3401044,53.5445923,-2.3199185,-2.1468288" lat="53.44246175" lon="-2.2324547359718547" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.35">
<city>Manchester</city>
<county>Greater Manchester</county>
<state_district>North West England</state_district>
@@ -102,21 +156,20 @@ This overrides the specified machine readable format. (Default: 0)
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
<place place_id="77769745" osm_type="way" osm_id="104393803" place_rank="30" lat="52.5162024" lon="13.3777343363579" display_name="Brandenburg Gate, 1, Pariser Platz, Mitte, Berlin, 10117, Germany" class="tourism" type="attraction" importance="0.443472858361592">
<attraction>Brandenburg Gate</attraction>
<house_number>1</house_number>
<pedestrian>Pariser Platz</pedestrian>
<suburb>Mitte</suburb>
<city_district>Mitte</city_district>
<city>Berlin</city>
<state>Berlin</state>
<postcode>10117</postcode>
<place place_id="115462561" osm_type="way" osm_id="50637691" place_rank="30" address_rank="30" boundingbox="52.3994612,52.3996426,13.0479574,13.0481754" lat="52.399550700000006" lon="13.048066846939687" display_name="Brandenburger Tor, Brandenburger Stra&#xDF;e, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany" class="tourism" type="attraction" importance="0.29402874005524">
<tourism>Brandenburger Tor</tourism>
<road>Brandenburger Stra&#xDF;e</road>
<suburb>Historische Innenstadt</suburb>
<city>Potsdam</city>
<state>Brandenburg</state>
<postcode>14467</postcode>
<country>Germany</country>
<country_code>de</country_code>
</place>
<place place_id="2570600569" osm_type="node" osm_id="240109189" place_rank="15" lat="52.5170365" lon="13.3888599" display_name="Berlin, Germany" class="place" type="city" importance="0.822149797630868">
<place place_id="567505" osm_type="node" osm_id="240109189" place_rank="15" address_rank="16" boundingbox="52.3586925,52.6786925,13.2396024,13.5596024" lat="52.5186925" lon="13.3996024" display_name="Berlin, 10178, Germany" class="place" type="city" importance="0.78753902824914">
<city>Berlin</city>
<state>Berlin</state>
<postcode>10178</postcode>
<country>Germany</country>
<country_code>de</country_code>
</place>
@@ -125,38 +178,50 @@ This overrides the specified machine readable format. (Default: 0)
##### JSON with extratags
[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json)
[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1)
```json
[
{
"place_id": "84271358",
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
"osm_type": "way",
"osm_id": "50637691",
"lat": "52.39955055",
"lon": "13.04806574678",
"display_name": "Brandenburger Tor, Brandenburger Straße, Nördliche Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
"class": "historic",
"type": "city_gate",
"importance": "0.221233780277011",
"address": {
"address29": "Brandenburger Tor",
"pedestrian": "Brandenburger Straße",
"suburb": "Nördliche Innenstadt",
"city": "Potsdam",
"state": "Brandenburg",
"postcode": "14467",
"country": "Germany",
"country_code": "de"
},
"extratags": {
"image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
"wikidata": "Q695045",
"wikipedia": "de:Brandenburger Tor (Potsdam)",
"wheelchair": "yes",
"description": "Kleines Brandenburger Tor in Potsdam"
}
}
{
"place_id": 115462561,
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
"osm_type": "way",
"osm_id": 50637691,
"boundingbox": [
"52.3994612",
"52.3996426",
"13.0479574",
"13.0481754"
],
"lat": "52.399550700000006",
"lon": "13.048066846939687",
"display_name": "Brandenburger Tor, Brandenburger Straße, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
"class": "tourism",
"type": "attraction",
"importance": 0.2940287400552381,
"address": {
"tourism": "Brandenburger Tor",
"road": "Brandenburger Straße",
"suburb": "Historische Innenstadt",
"city": "Potsdam",
"state": "Brandenburg",
"postcode": "14467",
"country": "Germany",
"country_code": "de"
},
"extratags": {
"image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
"heritage": "4",
"wikidata": "Q695045",
"architect": "Carl von Gontard;Georg Christian Unger",
"wikipedia": "de:Brandenburger Tor (Potsdam)",
"wheelchair": "yes",
"description": "Kleines Brandenburger Tor in Potsdam",
"heritage:website": "http://www.bldam-brandenburg.de/images/stories/PDF/DML%202012/04-p-internet-13.pdf",
"heritage:operator": "bldam",
"architect:wikidata": "Q68768;Q95223",
"year_of_construction": "1771"
}
}
]
```

View File

@@ -28,6 +28,7 @@ a single place (for reverse) of the following format:
"city": "London",
"state_district": "Greater London",
"state": "England",
"ISO3166-2-lvl4": "GB-ENG",
"postcode": "SW1A 2DU",
"country": "United Kingdom",
"country_code": "gb"
@@ -97,7 +98,10 @@ The GeocodeJSON format follows the
The following feature attributes are implemented:
* `osm_type`, `osm_id` - reference to the OSM object (unofficial extension, [see notes](#osm-reference))
* `type` - value of the main tag of the object (e.g. residential, restaurant, ...)
* `type` - the 'address level' of the object ('house', 'street', `district`, `city`,
`county`, `state`, `country`, `locality`)
* `osm_key`- key of the main tag of the OSM object (e.g. boundary, highway, amenity)
* `osm_value` - value of the main tag of the OSM object (e.g. residential, restaurant)
* `label` - full comma-separated address
* `name` - localised name of the place
* `housenumber`, `street`, `locality`, `district`, `postcode`, `city`,
@@ -126,6 +130,7 @@ formats depending on the API call.
</result>
<addressparts>
<state>Bavaria</state>
<ISO3166-2-lvl4>DE-BY</ISO3166-2-lvl4>
<country>Germany</country>
<country_code>de</country_code>
</addressparts>
@@ -179,6 +184,7 @@ Additional information requested with `addressdetails=1`, `extratags=1` and
<city>London</city>
<state_district>Greater London</state_district>
<state>England</state>
<ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
<postcode>SW1A 2DU</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
@@ -205,8 +211,8 @@ be more than one. The attributes of that element contain:
* `ref` - content of `ref` tag if it exists
* `lat`, `lon` - latitude and longitude of the centroid of the object
* `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))
* `place_rank` - class [search rank](../develop/Ranking#search-rank)
* `address_rank` - place [address rank](../develop/Ranking#address-rank)
* `place_rank` - class [search rank](../customize/Ranking.md#search-rank)
* `address_rank` - place [address rank](../customize/Ranking.md#address-rank)
* `display_name` - full comma-separated address
* `class`, `type` - key and value of the main OSM tag
* `importance` - computed importance rank
@@ -230,7 +236,7 @@ on another server. It may even change its ID on the same server when it is
removed and reimported while updating the database with fresh OSM data.
It is thus not useful to treat it as permanent for later use.
The combination `osm_type`+`osm_id` is slighly better but remember in
The combination `osm_type`+`osm_id` is slightly better but remember in
OpenStreetMap mappers can delete, split, recreate places (and those
get a new `osm_id`), there is no link between those old and new ids.
Places can also change their meaning without changing their `osm_id`,
@@ -279,17 +285,18 @@ with a designation label. Per default the following labels may appear:
* continent
* country, country_code
* region, state, state_district, county
* region, state, state_district, county, ISO3166-2-lvl<admin_level>
* municipality, city, town, village
* city_district, district, borough, suburb, subdivision
* hamlet, croft, isolated_dwelling
* neighbourhood, allotments, quarter
* city_block, residental, farm, farmyard, industrial, commercial, retail
* city_block, residential, farm, farmyard, industrial, commercial, retail
* road
* house_number, house_name
* emergency, historic, military, natural, landuse, place, railway,
man_made, aerialway, boundary, amenity, aeroway, club, craft, leisure,
office, mountain_pass, shop, tourism, bridge, tunnel, waterway
* postcode
They roughly correspond to the classification of the OpenStreetMap data
according to either the `place` tag or the main key of the object.

View File

@@ -1,8 +1,16 @@
### Nominatim API
Nominatim indexes named (or numbered) features within the OpenStreetMap (OSM) dataset and a subset of other unnamed features (pubs, hotels, churches, etc).
!!! Attention
The current version of Nominatim implements two different search frontends:
the old PHP frontend and the new Python frontend. They have a very similar
API but differ in some implementation details. These are marked in the
documentation as `[Python-only]` or `[PHP-only]`.
Its API has the following endpoints for querying the data:
`https://nominatim.openstreetmap.org` implements the **Python frontend**.
So users should refer to the **`[Python-only]`** comments.
This section describes the API V1 of the Nominatim web service. The
service offers the following endpoints:
* __[/search](Search.md)__ - search OSM objects by name or type
* __[/reverse](Reverse.md)__ - search OSM object by their location
@@ -12,3 +20,6 @@ Its API has the following endpoints for querying the data:
back in Nominatim in case the deletion was accidental
* __/polygons__ - list of broken polygons detected by Nominatim
* __[/details](Details.md)__ - show internal details for an object (for debugging only)

View File

@@ -1,6 +1,7 @@
# Reverse Geocoding
Reverse geocoding generates an address from a latitude and longitude.
Reverse geocoding generates an address from a coordinate given as
latitude and longitude.
## How it works
@@ -18,8 +19,7 @@ The other issue to be aware of is that the closest OSM object may not always
have a similar enough address to the coordinate you were requesting. For
example, in dense city areas it may belong to a completely different street.
## Parameters
## Endpoint
The main format of the reverse API is
@@ -27,61 +27,105 @@ The main format of the reverse API is
https://nominatim.openstreetmap.org/reverse?lat=<value>&lon=<value>&<params>
```
where `lat` and `lon` are latitude and longitutde of a coordinate in WGS84
where `lat` and `lon` are latitude and longitude of a coordinate in WGS84
projection. The API returns exactly one result or an error when the coordinate
is in an area with no OSM data coverage.
Additional paramters are accepted as listed below.
!!! warning "Deprecation warning"
!!! danger "Deprecation warning"
The reverse API used to allow address lookup for a single OSM object by
its OSM id. This use is now deprecated. Use the [Address Lookup API](../Lookup)
instead.
its OSM id for `[PHP-only]`. The use is considered deprecated.
Use the [Address Lookup API](Lookup.md) instead.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/reverse.php`. This is now deprecated
and will be removed in future versions.
## Parameters
This section lists additional parameters to further influence the output.
### Output format
* `format=[xml|json|jsonv2|geojson|geocodejson]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `xml` |
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
See [Place Output Formats](Output.md) for details on each format.
* `json_callback=<string>`
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Only has an effect for JSON output formats.
### Output details
* `addressdetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 1 |
Include a breakdown of the address into elements. (Default: 1)
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
* `extratags=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
* `namedetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
### Language of results
* `accept-language=<browser language string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing search results, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
### Result limitation
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
* `zoom=[0-18]`
Level of detail required for the address. Default: 18. This is a number that
### Result restriction
| Parameter | Value | Default |
|-----------| ----- | ------- |
| zoom | 0-18 | 18 |
Level of detail required for the address. This is a number that
corresponds roughly to the zoom level used in XYZ tile sources in frameworks
like Leaflet.js, Openlayers etc.
In terms of address details the zoom levels are as follows:
@@ -92,41 +136,79 @@ In terms of address details the zoom levels are as follows:
5 | state
8 | county
10 | city
14 | suburb
12 | town / borough
13 | village / suburb
14 | neighbourhood
15 | any settlement
16 | major streets
17 | major and minor streets
18 | building
| Parameter | Value | Default |
|-----------| ----- | ------- |
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
The layer filter allows to select places by themes.
The `address` layer contains all places that make up an address:
address points with house numbers, streets, inhabited places (suburbs, villages,
cities, states etc.) and administrative boundaries.
The `poi` layer selects all point of interest. This includes classic points
of interest like restaurants, shops, hotels but also less obvious features
like recycling bins, guideposts or benches.
The `railway` layer includes railway infrastructure like tracks.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database.
The `natural` layer collects feautures like rivers, lakes and mountains while
the `manmade` layer functions as a catch-all for features not covered by the
other layers.
### Polygon output
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
* `polygon_threshold=0.0`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
Return a simplified version of the output geometry. The parameter is the
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
geometry. Topology is preserved in the geometry.
### Other
* `email=<valid email address>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
* `debug=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
## Examples
@@ -162,7 +244,7 @@ This overrides the specified machine readable format. (Default: 0)
"licence":"Data © OpenStreetMap contributors, ODbL 1.0. https:\/\/www.openstreetmap.org\/copyright",
"osm_type":"way",
"osm_id":"280940520",
"lat":"-34.4391708",
"lat":"-34.4391708",
"lon":"-58.7064573",
"place_rank":"26",
"category":"highway",

View File

@@ -8,12 +8,12 @@ The search query may also contain
which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
This can be used to narrow down the kind of objects to be returned.
!!! warning
!!! note
Special phrases are not suitable to query all objects of a certain type in an
area. Nominatim will always just return a collection of the best matches. To
download OSM data by object type, use the [Overpass API](https://overpass-api.de/).
## Parameters
## Endpoint
The search API has the following format:
@@ -21,82 +21,211 @@ The search API has the following format:
https://nominatim.openstreetmap.org/search?<params>
```
The search term may be specified with two different sets of parameters:
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/search.php`. This is now deprecated
and will be removed in future versions.
* `q=<query>`
The query term can be given in two different forms: free-form or structured.
Free-form query string to search for.
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
[pilkington avenue, birmingham](//nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
[birmingham, pilkington avenue](//nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
Commas are optional, but improve performance by reducing the complexity of the search.
### Free-form query
| Parameter | Value |
|-----------| ----- |
| q | Free-form query string to search for |
* `street=<housenumber> <streetname>`
* `city=<city>`
* `county=<county>`
* `state=<state>`
* `country=<country>`
* `postalcode=<postalcode>`
In this form, the query can be unstructured.
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
Commas are optional, but improve performance by reducing the complexity of the search.
Alternative query string format split into several parameters for structured requests.
Structured requests are faster but are less robust against alternative
OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
The free-form may also contain special phrases to describe the type of
place to be returned or a coordinate to search close to a position.
Both query forms accept the additional parameters listed below.
### Structured query
| Parameter | Value |
|----------- | ----- |
| amenity | name and/or type of POI |
| street | housenumber and streetname |
| city | city |
| county | county |
| state | state |
| country | country |
| postalcode | postal code |
The structured form of the search query allows to lookup up an address
that is already split into its components. Each parameter represents a field
of the address. All parameters are optional. You should only use the ones
that are relevant for the address you want to geocode.
!!! Attention
Cannot be combined with the `q=<query>` parameter. Newer versions of
the API will return an error if you do so. Older versions simply return
unexpected results.
## Parameters
The following parameters can be used to further restrict the search and
change the output. They are usable for both forms of the search query.
### Output format
* `format=[xml|json|jsonv2|geojson|geocodejson]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
See [Place Output Formats](Output.md) for details on each format.
* `json_callback=<string>`
!!! note
The Nominatim service at
[https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
has a different default behaviour for historical reasons. When the
`format` parameter is omitted, the request will be forwarded to the Web UI.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| limit | number | 10 |
Limit the maximum number of returned results. Cannot be more than 40.
Nominatim may decide to return less results than given, if additional
results do not sufficiently match the query.
### Output details
* `addressdetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
Include a breakdown of the address into elements. (Default: 0)
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
* `extratags=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
* `namedetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
### Language of results
* `accept-language=<browser language string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing search results, overrides the value
specified in the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
### Result limitation
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
* `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`
### Result restriction
Limit search results to one or more countries. `<countrycode>` must be the
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
e.g. `gb` for the United Kingdom, `de` for Germany.
There are two ways to influence the results. *Filters* exclude certain
kinds of results completely. *Boost parameters* only change the order of the
results and thus give a preference to some results over others.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| countrycodes | comma-separated list of country codes | _unset_ |
Filer that limits the search results to one or more countries.
The country code must be the
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
of the country, e.g. `gb` for the United Kingdom, `de` for Germany.
Each place in Nominatim is assigned to one country code based
on OSM country boundaries. In rare cases a place may not be in any country
at all, for example, in international waters.
at all, for example, when it is in international waters. These places are
also excluded when the filter is set.
* `exclude_place_ids=<place_id,[place_id],[place_id]`
!!! Note
This parameter should not be confused with the 'country' parameter of
the structured query. The 'country' parameter contains a search term
and will be handled with some fuzziness. The `countrycodes` parameter
is a hard filter and as such should be prefered. Having both parameters
in the same query will work. If the parameters contradict each other,
the search will come up empty.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
The layer filter allows to select places by themes.
The `address` layer contains all places that make up an address:
address points with house numbers, streets, inhabited places (suburbs, villages,
cities, states tec.) and administrative boundaries.
The `poi` layer selects all point of interest. This includes classic POIs like
restaurants, shops, hotels but also less obvious features like recycling bins,
guideposts or benches.
The `railway` layer includes railway infrastructure like tracks.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database.
The `natural` layer collects feautures like rivers, lakes and mountains while
the `manmade` layer functions as a catch-all for features not covered by the
other layers.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| featureType | one of: `country`, `state`, `city`, `settlement` | _unset_ |
The featureType allows to have a more fine-grained selection for places
from the address layer. Results can be restricted to places that make up
the 'state', 'country' or 'city' part of an address. A featureType of
settlement selects any human inhabited feature from 'state' down to
'neighbourhood'.
When featureType ist set, then results are automatically restricted
to the address layer (see above).
!!! tip
Instead of using the featureType filters `country`, `state` or `city`,
you can also use a structured query without the finer-grained parameters
amenity or street.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| exclude_place_ids | comma-separeted list of place ids |
If you do not want certain OSM objects to appear in the search
result, give a comma separated list of the `place_id`s you want to skip.
@@ -104,180 +233,212 @@ This can be used to retrieve additional search results. For example, if a
previous query only returned a few results, then including those here would
cause the search to return other, less accurate, matches (if possible).
| Parameter | Value | Default |
|-----------| ----- | ------- |
| viewbox | `<x1>,<y1>,<x2>,<y2>` | _unset_ |
* `limit=<integer>`
Boost parameter which focuses the search on the given area.
Any two corner points of the box are accepted as long as they make a proper
box. `x` is longitude, `y` is latitude.
Limit the number of returned results. (Default: 10, Maximum: 50)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| bounded | 0 or 1 | 0 |
When set to 1, then it turns the 'viewbox' parameter (see above) into
a filter paramter, excluding any results outside the viewbox.
* `viewbox=<x1>,<y1>,<x2>,<y2>`
The preferred area to find search results. Any two corner points of the box
are accepted as long as they span a real box. `x` is longitude,
`y` is latitude.
* `bounded=[0|1]`
When a viewbox is given, restrict the result to items contained within that
viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
only search is allowed. Give the special keyword for the amenity in square
When `bounded=1` is given and the viewbox is small enough, then an amenity-only
search is allowed. Give the special keyword for the amenity in square
brackets, e.g. `[pub]` and a selection of objects of this type is returned.
There is no guarantee that the result is complete. (Default: 0)
There is no guarantee that the result returns all objects in the area.
### Polygon output
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
* `polygon_threshold=0.0`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
Return a simplified version of the output geometry. The parameter is the
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
geometry. Topology is preserved in the geometry.
### Other
* `email=<valid email address>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
* `dedupe=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| dedupe | 0 or 1 | 1 |
Sometimes you have several objects in OSM identifying the same place or
object in reality. The simplest case is a street being split into many
different OSM ways due to different characteristics. Nominatim will
attempt to detect such duplicates and only return one match unless
this parameter is set to 0. (Default: 1)
attempt to detect such duplicates and only return one match. Setting
this parameter to 0 disables this deduplication mechanism and
ensures that all results are returned.
* `debug=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
## Examples
##### XML with kml polygon
##### XML with KML polygon
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1)
```xml
<searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
<place
place_id="1620612" osm_type="node" osm_id="452010817"
boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
lat="52.5487429714954" lon="-1.81602098644987"
display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
class="place" type="house">
<geokml>
<Polygon>
<outerBoundaryIs>
<LinearRing>
<coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
</LinearRing>
</outerBoundaryIs>
</Polygon>
</geokml>
<house_number>135</house_number>
<road>Pilkington Avenue</road>
<village>Wylde Green</village>
<town>Sutton Coldfield</town>
<city>City of Birmingham</city>
<county>West Midlands (county)</county>
<postcode>B72</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
</searchresults>
<?xml version="1.0" encoding="UTF-8" ?>
<searchresults timestamp="Tue, 08 Aug 2023 15:45:41 +00:00"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright"
querystring="135 pilkington avenue, birmingham"
more_url="https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue%2C+birmingham&amp;polygon_kml=1&amp;addressdetails=1&amp;limit=20&amp;exclude_place_ids=125279639&amp;format=xml"
exclude_place_ids="125279639">
<place place_id="125279639"
osm_type="way"
osm_id="90394480"
lat="52.5487921"
lon="-1.8164308"
boundingbox="52.5487473,52.5488481,-1.8165130,-1.8163464"
place_rank="30"
address_rank="30"
display_name="135, Pilkington Avenue, Maney, Sutton Coldfield, Wylde Green, Birmingham, West Midlands Combined Authority, England, B72 1LH, United Kingdom"
class="building"
type="residential"
importance="9.999999994736442e-08">
<geokml>
<Polygon>
<outerBoundaryIs>
<LinearRing>
<coordinates>-1.816513,52.5487566 -1.816434,52.5487473 -1.816429,52.5487629 -1.8163717,52.5487561 -1.8163464,52.5488346 -1.8164599,52.5488481 -1.8164685,52.5488213 -1.8164913,52.548824 -1.816513,52.5487566</coordinates>
</LinearRing>
</outerBoundaryIs>
</Polygon>
</geokml>
<house_number>135</house_number>
<road>Pilkington Avenue</road>
<hamlet>Maney</hamlet>
<town>Sutton Coldfield</town>
<village>Wylde Green</village>
<city>Birmingham</city>
<ISO3166-2-lvl8>GB-BIR</ISO3166-2-lvl8>
<state_district>West Midlands Combined Authority</state_district>
<state>England</state>
<ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
<postcode>B72 1LH</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
</searchresults>
```
##### JSON with SVG polygon
[https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1)
[https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1)
```json
{
"address": {
"city": "Berlin",
"city_district": "Mitte",
"construction": "Unter den Linden",
"continent": "European Union",
"country": "Deutschland",
"country_code": "de",
"house_number": "1",
"neighbourhood": "Scheunenviertel",
"postcode": "10117",
"public_building": "Kommandantenhaus",
"state": "Berlin",
"suburb": "Mitte"
},
"boundingbox": [
"52.5170783996582",
"52.5173187255859",
"13.3975105285645",
"13.3981599807739"
],
"class": "amenity",
"display_name": "Kommandantenhaus, 1, Unter den Linden, Scheunenviertel, Mitte, Berlin, 10117, Deutschland, European Union",
"importance": 0.73606775332943,
"lat": "52.51719785",
"licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
"lon": "13.3978352028938",
"osm_id": "15976890",
"osm_type": "way",
"place_id": "30848715",
"svg": "M 13.397511 -52.517283599999999 L 13.397829400000001 -52.517299800000004 13.398131599999999 -52.517315099999998 13.398159400000001 -52.517112099999999 13.3975388 -52.517080700000001 Z",
"type": "public_building"
}
[
{
"address": {
"ISO3166-2-lvl4": "DE-BE",
"borough": "Mitte",
"city": "Berlin",
"country": "Deutschland",
"country_code": "de",
"historic": "Kommandantenhaus",
"house_number": "1",
"neighbourhood": "Friedrichswerder",
"postcode": "10117",
"road": "Unter den Linden",
"suburb": "Mitte"
},
"boundingbox": [
"52.5170798",
"52.5173311",
"13.3975116",
"13.3981577"
],
"class": "historic",
"display_name": "Kommandantenhaus, 1, Unter den Linden, Friedrichswerder, Mitte, Berlin, 10117, Deutschland",
"importance": 0.8135042058306902,
"lat": "52.51720765",
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
"lon": "13.397834399325466",
"osm_id": 15976890,
"osm_type": "way",
"place_id": 108681845,
"svg": "M 13.3975116 -52.5172905 L 13.397549 -52.5170798 13.397715 -52.5170906 13.3977122 -52.5171064 13.3977392 -52.5171086 13.3977417 -52.5170924 13.3979655 -52.5171069 13.3979623 -52.5171233 13.3979893 -52.5171248 13.3979922 -52.5171093 13.3981577 -52.5171203 13.398121 -52.5173311 13.3978115 -52.5173103 Z",
"type": "house"
}
]
```
##### JSON with address details
[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
[https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1](https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1)
```json
{
"address": {
"bakery": "B\u00e4cker Kamps",
"city_district": "Mitte",
"continent": "European Union",
"country": "Deutschland",
"country_code": "de",
"footway": "Bahnsteig U6",
"neighbourhood": "Sprengelkiez",
"postcode": "13353",
"state": "Berlin",
"suburb": "Wedding"
},
"boundingbox": [
"52.5460929870605",
"52.5460968017578",
"13.3591794967651",
"13.3591804504395"
],
"class": "shop",
"display_name": "B\u00e4cker Kamps, Bahnsteig U6, Sprengelkiez, Wedding, Mitte, Berlin, 13353, Deutschland, European Union",
"icon": "https://nominatim.openstreetmap.org/images/mapicons/shopping_bakery.p.20.png",
"importance": 0.201,
"lat": "52.5460941",
"licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
"lon": "13.35918",
"osm_id": "317179427",
"osm_type": "node",
"place_id": "1453068",
"type": "bakery"
}
[
{
"address": {
"ISO3166-2-lvl4": "DE-BE",
"borough": "Mitte",
"city": "Berlin",
"country": "Deutschland",
"country_code": "de",
"neighbourhood": "Sprengelkiez",
"postcode": "13347",
"road": "Lindower Straße",
"shop": "Ditsch",
"suburb": "Wedding"
},
"addresstype": "shop",
"boundingbox": [
"52.5427201",
"52.5427654",
"13.3668619",
"13.3669442"
],
"category": "shop",
"display_name": "Ditsch, Lindower Straße, Sprengelkiez, Wedding, Mitte, Berlin, 13347, Deutschland",
"importance": 9.99999999995449e-06,
"lat": "52.54274275",
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright",
"lon": "13.36690305710228",
"name": "Ditsch",
"osm_id": 437595031,
"osm_type": "way",
"place_id": 204751033,
"place_rank": 30,
"type": "bakery"
}
]
```
##### GeoJSON

View File

@@ -1,46 +1,67 @@
# Status
Useful for checking if the service and database is running. The JSON output also shows
Report on the state of the service and database. Useful for checking if the
service is up and running. The JSON output also reports
when the database was last updated.
## Endpoint
The status API has the following format:
```
https://nominatim.openstreetmap.org/status
```
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/status.php`. This is now deprecated
and will be removed in future versions.
## Parameters
* `format=[text|json]` (defaults to 'text')
The status endpoint takes a single optional parameter:
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `text`, `json` | 'text' |
Selects the output format. See below.
## Output
#### Text format
```
https://nominatim.openstreetmap.org/status.php
```
When everything is okay, a status code 200 is returned and a simple message: `OK`
will return HTTP status code 200 and print `OK`.
On error it will return HTTP status code 500 and print a message, e.g.
On error it will return HTTP status code 500 and print a detailed error message, e.g.
`ERROR: Database connection failed`.
#### JSON format
```
https://nominatim.openstreetmap.org/status.php?format=json
```
Always returns a HTTP code 200, when the status call could be executed.
will return HTTP code 200 and a structure
On success a JSON dictionary with the following structure is returned:
```json
{
"status": 0,
"message": "OK",
"data_updated": "2020-05-04T14:47:00+00:00"
"data_updated": "2020-05-04T14:47:00+00:00",
"software_version": "3.6.0-0",
"database_version": "3.6.0-0"
}
```
On error will also return HTTP status code 200 and a structure with error
code and message, e.g.
The `software_version` field contains the version of Nominatim used to serve
the API. The `database_version` field contains the version of the data format
in the database.
On error will return a shorter JSON dictionary with the error message
and status only, e.g.
```json
{
@@ -48,13 +69,3 @@ code and message, e.g.
"message": "Database connection failed"
}
```
Possible status codes are
| | message | notes |
|-----|----------------------|---------------------------------------------------|
| 700 | "No database" | connection failed |
| 701 | "Module failed" | database could not load nominatim.so |
| 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
| 703 | "Query failed" | test query against a database table failed |
| 704 | "No value" | test query worked but returned no results |

View File

@@ -0,0 +1,149 @@
# Customizing Per-Country Data
Whenever an OSM is imported into Nominatim, the object is first assigned
a country. Nominatim can use this information to adapt various aspects of
the address computation to the local customs of the country. This section
explains how country assignment works and the principal per-country
localizations.
## Country assignment
Countries are assigned on the basis of country data from the OpenStreetMap
input data itself. Countries are expected to be tagged according to the
[administrative boundary schema](https://wiki.openstreetmap.org/wiki/Tag:boundary%3Dadministrative):
a OSM relation with `boundary=administrative` and `admin_level=2`. Nominatim
uses the country code to distinguish the countries.
If there is no country data available for a point, then Nominatim uses the
fallback data imported from `data/country_osm_grid.sql.gz`. This was computed
from OSM data as well but is guaranteed to cover all countries.
Some OSM objects may also be located outside any country, for example a buoy
in the middle of the ocean. These object do not get any country assigned and
get a default treatment when it comes to localized handling of data.
## Per-country settings
### Global country settings
The main place to configure settings per country is the file
`settings/country_settings.yaml`. This file has one section per country that
is recognised by Nominatim. Each section is tagged with the country code
(in lower case) and contains the different localization information. Only
countries which are listed in this file are taken into account for computations.
For example, the section for Andorra looks like this:
```
partition: 35
languages: ca
names: !include country-names/ad.yaml
postcode:
pattern: "(ddd)"
output: AD\1
```
The individual settings are described below.
#### `partition`
Nominatim internally splits the data into multiple tables to improve
performance. The partition number tells Nominatim into which table to put
the country. This is purely internal management and has no effect on the
output data.
The default is to have one partition per country.
#### `languages`
A comma-separated list of ISO-639 language codes of default languages in the
country. These are the languages used in name tags without a language suffix.
Note that this is not necessarily the same as the list of official languages
in the country. There may be officially recognised languages in a country
which are only ever used in name tags with the appropriate language suffixes.
Conversely, a non-official language may appear a lot in the name tags, for
example when used as an unofficial Lingua Franca.
List the languages in order of frequency of appearance with the most frequently
used language first. It is not recommended to add languages when there are only
very few occurrences.
If only one language is listed, then Nominatim will 'auto-complete' the
language of names without an explicit language-suffix.
#### `names`
List of names of the country and its translations. These names are used as
a baseline. It is always possible to search countries by the given names, no
matter what other names are in the OSM data. They are also used as a fallback
when a needed translation is not available.
!!! Note
The list of names per country is currently fairly large because Nominatim
supports translations in many languages per default. That is why the
name lists have been separated out into extra files. You can find the
name lists in the file `settings/country-names/<country code>.yaml`.
The names section in the main country settings file only refers to these
files via the special `!include` directive.
#### `postcode`
Describes the format of the postcode that is in use in the country.
When a country has no official postcodes, set this to no. Example:
```
ae:
postcode: no
```
When a country has a postcode, you need to state the postcode pattern and
the default output format. Example:
```
bm:
postcode:
pattern: "(ll)[ -]?(dd)"
output: \1 \2
```
The **pattern** is a regular expression that describes the possible formats
accepted as a postcode. The pattern follows the standard syntax for
[regular expressions in Python](https://docs.python.org/3/library/re.html#regular-expression-syntax)
with two extra shortcuts: `d` is a shortcut for a single digit([0-9])
and `l` for a single ASCII letter ([A-Z]).
Use match groups to indicate groups in the postcode that may optionally be
separated with a space or a hyphen.
For example, the postcode for Bermuda above always consists of two letters
and two digits. They may optionally be separated by a space or hyphen. That
means that Nominatim will consider `AB56`, `AB 56` and `AB-56` spelling variants
for one and the same postcode.
Never add the country code in front of the postcode pattern. Nominatim will
automatically accept variants with a country code prefix for all postcodes.
The **output** field is an optional field that describes what the canonical
spelling of the postcode should be. The format is the
[regular expression expand syntax](https://docs.python.org/3/library/re.html#re.Match.expand) referring back to the bracket groups in the pattern.
Most simple postcodes only have one spelling variant. In that case, the
**output** can be omitted. The postcode will simply be used as is.
In the Bermuda example above, the canonical spelling would be to have a space
between letters and digits.
!!! Warning
When your postcode pattern covers multiple variants of the postcode, then
you must explicitly state the canonical output or Nominatim will not
handle the variations correctly.
### Other country-specific configuration
There are some other configuration files where you can set localized settings
according to the assigned country. These are:
* [Place ranking configuration](Ranking.md)
Please see the linked documentation sections for more information.

View File

@@ -0,0 +1,443 @@
## Configuring the Import
In the very first step of a Nominatim import, OSM data is loaded into the
database. Nominatim uses [osm2pgsql](https://osm2pgsql.org) for this task.
It comes with a [flex style](https://osm2pgsql.org/doc/manual.html#the-flex-output)
specifically tailored to filter and convert OSM data into Nominatim's
internal data representation.
There are a number of default configurations for the flex style which
result in geocoding databases of different detail. The
[Import section](../admin/Import.md#filtering-imported-data) explains
these default configurations in detail.
You can also create your own custom style. Put the style file into your
project directory and then set `NOMINATIM_IMPORT_STYLE` to the name of the file.
It is always recommended to start with one of the standard styles and customize
those. You find the standard styles under the name `import-<stylename>.lua`
in the standard Nominatim configuration path (usually `/etc/nominatim` or
`/usr/local/etc/nominatim`).
The remainder of the page describes how the flex style works and how to
customize it.
### The `flex-base.lua` module
The core of Nominatim's flex import configuration is the `flex-base` module.
It defines the table layout used by Nominatim and provides standard
implementations for the import callbacks that make it easy to customize
how OSM tags are used by Nominatim.
Every custom style should include this module to make sure that the correct
tables are created. Thus start your custom style as follows:
``` lua
local flex = require('flex-base')
```
The following sections explain how the module can be customized.
### Changing the recognized tags
If you just want to change which OSM tags are recognized during import,
then there are a number of convenience functions to set the tag lists used
during the processing.
!!! warning
There are no built-in defaults for the tag lists, so all the functions
need to be called from your style script to fully process the data.
Make sure you start from one of the default style and only modify
the data you are interested in. You can also derive your style from an
existing style by importing the appropriate module, e.g.
`local flex = require('import-street')`.
Many of the following functions take _key match lists_. These lists can
contain three kinds of strings to match against tag keys:
A string that ends in an asterisk `*` is a prefix match and accordingly matches
against any key that starts with the given string (minus the `*`).
A suffix match can be defined similarly with a string that starts with a `*`.
Any other string is matched exactly against tag keys.
#### `set_main_tags()` - principal tags
If a principal or main tag is found on an OSM object, then the object
is included in Nominatim's search index. A single object may also have
multiple main tags. In that case, the object will be included multiple
times in the index, once for each main tag.
The flex script distinguishes between four types of main tags:
* __always__: a main tag that is used unconditionally
* __named__: consider this main tag only, if the object has a proper name
(a reference is not enough, see below).
* __named_with_key__: consider this main tag only, when the object has
a proper name with a domain prefix. For example, if the main tag is
`bridge=yes`, then it will only be added as an extra row, if there is
a tag `bridge:name[:XXX]` for the same object. If this property is set,
all other names that are not domain-specific are ignored.
* __fallback__: use this main tag only, if there is no other main tag.
Fallback always implied `named`, i.e. fallbacks are only tried for
named objects.
The `set_main_tags()` function takes exactly one table parameter which
defines the keys and key/value combinations to include and the kind of
main tag. Each lua table key defines an OSM tag key. The value may
be a string defining the kind of main key as described above. Then the tag will
be considered a main tag for any possible value. To further restrict
which values are acceptable, give a table with the permitted values
and their kind of main tag. If the table contains a simple value without
key, then this is used as default for values that are not listed.
!!! example
``` lua
local flex = require('import-full')
flex.set_main_tags{
boundary = {administrative = 'named'},
highway = {'always', street_lamp = 'named'},
landuse = 'fallback'
}
```
In this example an object with a `boundary` tag will only be included
when it has a value of `administrative`. Objects with `highway` tags are
always included. However when the value is `street_lamp` then the object
must have a name, too. With any other value, the object is included
independently of the name. Finally, if a `landuse` tag is present then
it will be used independely of the concrete value if neither boundary
nor highway tags were found and the object is named.
#### `set_prefilters()` - ignoring tags
Pre-filtering of tags allows to ignore them for any further processing.
Thus pre-filtering takes precedence over any other tag processing. This is
useful when some specific key/value combinations need to be excluded from
processing. When tags are filtered, they may either be deleted completely
or moved to `extratags`. Extra tags are saved with the object and returned
to the user when requested, but are not used otherwise.
`set_prefilters()` takes a table with four optional fields:
* __delete_keys__ is a _key match list_ for tags that should be deleted
* __delete_tags__ contains a table of tag keys pointing to a list of tag
values. Tags with matching key/value pairs are deleted.
* __extra_keys__ is a _key match list_ for tags which should be saved into
extratags
* __extra_tags__ contains a table of tag keys pointing to a list of tag
values. Tags with matching key/value pairs are moved to extratags.
Key list may contain three kinds of strings:
A string that ends in an asterisk `*` is a prefix match and accordingly matches
against any key that starts with the given string (minus the `*`).
A suffix match can be defined similarly with a string that starts with a `*`.
Any other string is matched exactly against tag keys.
!!! example
``` lua
local flex = require('import-full')
flex.set_prefilters{
delete_keys = {'source', 'source:*'},
extra_tags = {amenity = {'yes', 'no'}}
}
flex.set_main_tags{
amenity = 'always'
}
```
In this example any tags `source` and tags that begin with `source:` are
deleted before any other processing is done. Getting rid of frequent tags
this way can speed up the import.
Tags with `amenity=yes` or `amenity=no` are moved to extratags. Later
all tags with an `amenity` key are made a main tag. This effectively means
that Nominatim will use all amenity tags except for those with value
yes and no.
#### `set_name_tags()` - defining names
The flex script distinguishes between two kinds of names:
* __main__: the primary names make an object fully searchable.
Main tags of type _named_ will only cause the object to be included when
such a primary name is present. Primary names are usually those found
in the `name` tag and its variants.
* __extra__: extra names are still added to the search index but they are
alone not sufficient to make an object named.
`set_name_tags()` takes a table with two optional fields `main` and `extra`.
They take _key match lists_ for main and extra names respectively.
!!! example
``` lua
local flex = require('flex-base')
flex.set_main_tags{highway = {traffic_light = 'named'}}
flex.set_name_tags{main = {'name', 'name:*'},
extra = {'ref'}
}
```
This example creates a search index over traffic lights but will
only include those that have a common name and not those which just
have some reference ID from the city.
#### `set_address_tags()` - defining address parts
Address tags will be used to build up the address of an object.
`set_address_tags()` takes a table with arbitrary fields pointing to
_key match lists_. To fields have a special meaning:
* __main__: defines
the tags that make a full address object out of the OSM object. This
is usually the housenumber or variants thereof. If a main address tag
appears, then the object will always be included, if necessary with a
fallback of `place=house`. If the key has a prefix of `addr:` or `is_in:`
this will be stripped.
* __extra__: defines all supplementary tags for addresses, tags like `addr:street`, `addr:city` etc. If the key has a prefix of `addr:` or `is_in:` this will be stripped.
All other fields will be handled as summary fields. If a key matches the
key match list, then its value will be added to the address tags with the
name of the field as key. If multiple tags match, then an arbitrary one
wins.
Country tags are handled slightly special. Only tags with a two-letter code
are accepted, all other values are discarded.
!!! example
``` lua
local flex = require('import-full')
flex.set_address_tags{
main = {'addr:housenumber'},
extra = {'addr:*'},
postcode = {'postal_code', 'postcode', 'addr:postcode'},
country = {'country-code', 'ISO3166-1'}
}
```
In this example all tags which begin with `addr:` will be saved in
the address tag list. If one of the tags is `addr:housenumber`, the
object will fall back to be entered as a `place=house` in the database
unless there is another interested main tag to be found.
Tags with keys `country-code` and `ISO3166-1` are saved with their
value under `country` in the address tag list. The same thing happens
to postcodes, they will always be saved under the key `postcode` thus
normalizing the multitude of keys that are used in the OSM database.
#### `set_unused_handling()` - processing remaining tags
This function defines what to do with tags that remain after all tags
have been classified using the functions above. There are two ways in
which the function can be used:
`set_unused_handling(delete_keys = ..., delete_tags = ...)` deletes all
keys that match the descriptions in the parameters and moves all remaining
tags into the extratags list.
`set_unused_handling(extra_keys = ..., extra_tags = ...)` moves all tags
matching the parameters into the extratags list and then deletes the remaining
tags. For the format of the parameters see the description in `set_prefilters()`
above.
!!! example
``` lua
local flex = require('import-full')
flex.set_address_tags{
main = {'addr:housenumber'},
extra = {'addr:*', 'tiger:county'}
}
flex.set_unused_handling{delete_keys = {'tiger:*'}}
```
In this example all remaining tags except those beginning with `tiger:`
are moved to the extratags list. Note that it is not possible to
already delete the tiger tags with `set_prefilters()` because that
would remove tiger:county before the address tags are processed.
### Customizing osm2pgsql callbacks
osm2pgsql expects the flex style to implement three callbacks, one process
function per OSM type. If you want to implement special handling for
certain OSM types, you can override the default implementations provided
by the flex-base module.
#### Changing the relation types to be handled
The default scripts only allows relations of type `multipolygon`, `boundary`
and `waterway`. To add other types relations, set `RELATION_TYPES` for
the type to the kind of geometry that should be created. The following
kinds of geometries can be used:
* __relation_as_multipolygon__ creates a (Multi)Polygon from the ways in
the relation. If the ways do not form a valid area, then the object is
silently discarded.
* __relation_as_multiline__ creates a (Mutli)LineString from the ways in
the relations. Ways are combined as much as possible without any regards
to their order in the relation.
!!! Example
``` lua
local flex = require('import-full')
flex.RELATION_TYPES['site'] = flex.relation_as_multipolygon
```
With this line relations of `type=site` will be included in the index
according to main tags found. This only works when the site relation
resolves to a valid area. Nodes in the site relation are not part of the
geometry.
#### Adding additional logic to processing functions
The default processing functions are also exported by the flex-base module
as `process_node`, `process_way` and `process_relation`. These can be used
to implement your own processing functions with some additional processing
logic.
!!! Example
``` lua
local flex = require('import-full')
function osm2pgsql.process_relation(object)
if object.tags.boundary ~= 'administrative' or object.tags.admin_level ~= '2' then
flex.process_relation(object)
end
end
```
This example discards all country-level boundaries and uses standard
handling for everything else. This can be useful if you want to use
your own custom country boundaries.
### Customizing the main processing function
The main processing function of the flex style can be found in the function
`process_tags`. This function is called for all OSM object kinds and is
responsible for filtering the tags and writing out the rows into Postgresql.
!!! Example
``` lua
local flex = require('import-full')
local original_process_tags = flex.process_tags
function flex.process_tags(o)
if o.object.tags.highway ~= nil and o.object.tags.access == 'no' then
return
end
original_process_tags(o)
end
```
This example shows the most simple customization of the process_tags function.
It simply adds some additional processing before running the original code.
To do that, first save the original function and then overwrite process_tags
from the module. In this example all highways which are not accessible
by anyone will be ignored.
#### The `Place` class
The `process_tags` function receives a Lua object of `Place` type which comes
with some handy functions to collect the data necessary for geocoding and
writing it into the place table. Always use this object to fill the table.
The Place class has some attributes which you may access read-only:
* __object__ is the original OSM object data handed in by osm2pgsql
* __admin_level__ is the content of the admin_level tag, parsed into an
integer and normalized to a value between 0 and 15
* __has_name__ is a boolean indicating if the object has a full name
* __names__ is a table with the collected list of name tags
* __address__ is a table with the collected list of address tags
* __extratags__ is a table with the collected list of additional tags to save
There are a number of functions to fill these fields. All functions expect
a table parameter with fields as indicated in the description.
Many of these functions expect match functions which are described in detail
further below.
* __delete{match=...}__ removes all tags that match the match function given
in _match_.
* __grab_extratags{match=...}__ moves all tags that match the match function
given in _match_ into extratags. Returns the number of tags moved.
* __clean{delete=..., extra=...}__ deletes all tags that match _delete_ and
moves the ones that match _extra_ into extratags
* __grab_address_parts{groups=...}__ moves matching tags into the address table.
_groups_ must be a group match function. Tags of the group `main` and
`extra` are added to the address table as is but with `addr:` and `is_in:`
prefixes removed from the tag key. All other groups are added with the
group name as key and the value from the tag. Multiple values of the same
group overwrite each other. The function returns the number of tags saved
from the main group.
* __grab_main_parts{groups=...}__ moves matching tags into the name table.
_groups_ must be a group match function. If a tags of the group `main` is
present, the object will be marked as having a name. Tags of group `house`
produce a fallback to `place=house`. This fallback is return by the function
if present.
There are two functions to write a row into the place table. Both functions
expect the main tag (key and value) for the row and then use the collected
information from the name, address, extratags etc. fields to complete the row.
They also have a boolean parameter `save_extra_mains` which defines how any
unprocessed tags are handled: when True, the tags will be saved as extratags,
when False, they will be simply discarded.
* __write_row(key, value, save_extra_mains)__ creates a new table row from
the current state of the Place object.
* __write_place(key, value, mtype, save_extra_mains)__ creates a new row
conditionally. When value is nil, the function will attempt to look up the
value in the object tags. If value is still nil or mtype is nil, the row
is ignored. An mtype of `always` will then always write out the row,
a mtype of `named` only, when the object has a full name. When mtype
is `named_with_key`, the function checks for a domain name, i.e. a name
tag prefixed with the name of the main key. Only if at least one is found,
the row will be written. The names are replaced with the domain names found.
#### Match functions
The Place functions usually expect either a _match function_ or a
_group match function_ to find the tags to apply their function to.
The __match function__ is a Lua function which takes two parameters,
key and value, and returns a boolean to indicate that a tag matches. The
flex-base module has a convenience function `tag_match()` to create such a
function. It takes a table with two optional fields: `keys` takes a key match
list (see above), `tags` takes a table with keys that point to a list of
possible values, thus defining key/value matches.
The __group match function__ is a Lua function which also takes two parameters,
key and value, and returns a string indicating to which group or type they
belong to. The `tag_group()` can be used to create such a function. It expects
a table where the group names are the keys and the values are a key match list.
### Using the gazetteer output of osm2pgsql
Nominatim still allows you to configure the gazetteer output to remain
backwards compatible with older imports. It will be automatically used
when the style file name ends in `.style`. For documentation of the
old import style, please refer to the documentation of older releases
of Nominatim. Do not use the gazetteer output for new imports. There is no
guarantee that new versions of Nominatim are fully compatible with the
gazetteer output.
### Changing the Style of Existing Databases
There is normally no issue changing the style of a database that is already
imported and now kept up-to-date with change files. Just be aware that any
change in the style applies to updates only. If you want to change the data
that is already in the database, then a reimport is necessary.

View File

@@ -0,0 +1,49 @@
## Importance
Search requests can yield multiple results which match equally well with
the original query. In such case Nominatim needs to order the results
according to a different criterion: importance. This is a measure for how
likely it is that a user will search for a given place. This section explains
the sources Nominatim uses for computing importance of a place and how to
customize them.
### How importance is computed
The main value for importance is derived from page ranking values for Wikipedia
pages for a place. For places that do not have their own
Wikipedia page, a formula is used that derives a static importance from the
places [search rank](../customize/Ranking.md#search-rank).
In a second step, a secondary importance value is added which is meant to
represent how well-known the general area is where the place is located. It
functions as a tie-breaker between places with very similar primary
importance values.
nominatim.org has preprocessed importance tables for the
[primary Wikipedia rankings](https://nominatim.org/data/wikimedia-importance.sql.gz)
and for a secondary importance based on the number of tile views on openstreetmap.org.
### Customizing secondary importance
The secondary importance is implemented as a simple
[Postgis raster](https://postgis.net/docs/raster.html) table, where Nominatim
looks up the value for the coordinates of the centroid of a place. You can
provide your own secondary importance raster in form of an SQL file named
`secondary_importance.sql.gz` in your project directory.
The SQL file needs to drop and (re)create a table `secondary_importance` which
must as a minimum contain a column `rast` of type `raster`. The raster must
be in EPSG:4326 and contain 16bit unsigned ints
(`raster_constraint_pixel_types(rast) = '{16BUI}'). Any other columns in the
table will be ignored. You must furthermore create an index as follows:
```
CREATE INDEX ON secondary_importance USING gist(ST_ConvexHull(gist))
```
The following raster2pgsql command will create a table that conforms to
the requirements:
```
raster2pgsql -I -C -Y -d -t 128x128 input.tiff public.secondary_importance
```

View File

@@ -0,0 +1,20 @@
Nominatim comes with a predefined set of configuration options that should
work for most standard installations. If you have special requirements, there
are many places where the configuration can be adapted. This chapter describes
the following configurable parts:
* [Global Settings](Settings.md) has a detailed description of all parameters that
can be set in your local `.env` configuration
* [Import styles](Import-Styles.md) explains how to write your own import style
in order to control what kind of OSM data will be imported
* [Place ranking](Ranking.md) describes the configuration around classifing
places in terms of their importance and their role in an address
* [Tokenizers](Tokenizers.md) describes the configuration of the module
responsible for analysing and indexing names
* [Special Phrases](Special-Phrases.md) are common nouns or phrases that
can be used in search to identify a class of places
There are also guides for adding the following external data:
* [US house numbers from the TIGER dataset](Tiger.md)
* [External postcodes](Postcodes.md)

View File

@@ -0,0 +1,37 @@
# External postcode data
Nominatim creates a table of known postcode centroids during import. This table
is used for searches of postcodes and for adding postcodes to places where the
OSM data does not provide one. These postcode centroids are mainly computed
from the OSM data itself. In addition, Nominatim supports reading postcode
information from an external CSV file, to supplement the postcodes that are
missing in OSM.
To enable external postcode support, simply put one CSV file per country into
your project directory and name it `<CC>_postcodes.csv`. `<CC>` must be the
two-letter country code for which to apply the file. The file may also be
gzipped. Then it must be called `<CC>_postcodes.csv.gz`.
The CSV file must use commas as a delimiter and have a header line. Nominatim
expects three columns to be present: `postcode`, `lat` and `lon`. All other
columns are ignored. `lon` and `lat` must describe the x and y coordinates of the
postcode centroids in WGS84.
The postcode files are loaded only when there is data for the given country
in your database. For example, if there is a `us_postcodes.csv` file in your
project directory but you import only an excerpt of Italy, then the US postcodes
will simply be ignored.
As a rule, the external postcode data should be put into the project directory
**before** starting the initial import. Still, you can add, remove and update the
external postcode data at any time. Simply
run:
```
nominatim refresh --postcodes
```
to make the changes visible in your database. Be aware, however, that the changes
only have an immediate effect on searches for postcodes. Postcodes that were
added to places are only updated, when they are reindexed. That usually happens
only during replication updates.

View File

@@ -1,8 +1,7 @@
# Place Ranking in Nominatim
Nominatim uses two metrics to rank a place: search rank and address rank.
Both can be assigned a value between 0 and 30. They serve slightly
different purposes, which are explained in this chapter.
This chapter explains what place ranking means and how it can be customized.
## Search rank
@@ -87,9 +86,9 @@ into the database. There are a few hard-coded rules for the assignment:
* highway nodes
* landuse that is not an area
Other than that, the ranks can be freely assigned via the JSON file
defined with `CONST_Address_Level_Config` according to their type and
the country they are in.
Other than that, the ranks can be freely assigned via the JSON file according
to their type and the country they are in. The name of the config file to be
used can be changed with the setting `NOMINATIM_ADDRESS_LEVEL_CONFIG`.
The address level configuration must consist of an array of configuration
entries, each containing a tag definition and an optional country array:

751
docs/customize/Settings.md Normal file
View File

@@ -0,0 +1,751 @@
This section provides a reference of all configuration parameters that can
be used with Nominatim.
# Configuring Nominatim
Nominatim uses [dotenv](https://github.com/theskumar/python-dotenv) to manage
its configuration settings. There are two means to set configuration
variables: through an `.env` configuration file or through an environment
variable.
The `.env` configuration file needs to be placed into the
[project directory](../admin/Import.md#creating-the-project-directory). It
must contain configuration parameters in `<parameter>=<value>` format.
Please refer to the dotenv documentation for details.
The configuration options may also be set in the form of shell environment
variables. This is particularly useful, when you want to temporarily change
a configuration option. For example, to force the replication serve to
download the next change, you can temporarily disable the update interval:
NOMINATIM_REPLICATION_UPDATE_INTERVAL=0 nominatim replication --once
If a configuration option is defined through .env file and environment
variable, then the latter takes precedence.
## Configuration Parameter Reference
### Import and Database Settings
#### NOMINATIM_DATABASE_DSN
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Database connection string |
| **Format:** | string: `pgsql:<param1>=<value1>;<param2>=<value2>;...` |
| **Default:** | pgsql:dbname=nominatim |
| **After Changes:** | run `nominatim refresh --website` |
Sets the connection parameters for the Nominatim database. At a minimum
the name of the database (`dbname`) is required. You can set any additional
parameter that is understood by libpq. See the [Postgres documentation](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) for a full list.
!!! note
It is usually recommended not to set the password directly in this
configuration parameter. Use a
[password file](https://www.postgresql.org/docs/current/libpq-pgpass.html)
instead.
#### NOMINATIM_DATABASE_WEBUSER
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Database query user |
| **Format:** | string |
| **Default:** | www-data |
| **After Changes:** | cannot be changed after import |
Defines the name of the database user that will run search queries. Usually
this is the user under which the webserver is executed. When running Nominatim
via php-fpm, you can also define a separate query user. The Postgres user
needs to be set up before starting the import.
Nominatim grants minimal rights to this user to all tables that are needed
for running geocoding queries.
#### NOMINATIM_DATABASE_MODULE_PATH
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Directory where to find the PostgreSQL server module |
| **Format:** | path |
| **Default:** | _empty_ (use `<project_directory>/module`) |
| **After Changes:** | run `nominatim refresh --functions` |
| **Comment:** | Legacy tokenizer only |
Defines the directory in which the PostgreSQL server module `nominatim.so`
is stored. The directory and module must be accessible by the PostgreSQL
server.
For information on how to use this setting when working with external databases,
see [Advanced Installations](../admin/Advanced-Installations.md).
The option is only used by the Legacy tokenizer and ignored otherwise.
#### NOMINATIM_TOKENIZER
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Tokenizer used for normalizing and parsing queries and names |
| **Format:** | string |
| **Default:** | icu |
| **After Changes:** | cannot be changed after import |
Sets the tokenizer type to use for the import. For more information on
available tokenizers and how they are configured, see
[Tokenizers](../customize/Tokenizers.md).
#### NOMINATIM_TOKENIZER_CONFIG
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Configuration file for the tokenizer |
| **Format:** | path |
| **Default:** | _empty_ (default file depends on tokenizer) |
| **After Changes:** | see documentation for each tokenizer |
Points to the file with additional configuration for the tokenizer.
See the [Tokenizer](../customize/Tokenizers.md) descriptions for details
on the file format.
If a relative path is given, then the file is searched first relative to the
project directory and then in the global settings directory.
#### NOMINATIM_MAX_WORD_FREQUENCY
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Number of occurrences before a word is considered frequent |
| **Format:** | int |
| **Default:** | 50000 |
| **After Changes:** | cannot be changed after import |
| **Comment:** | Legacy tokenizer only |
The word frequency count is used by the Legacy tokenizer to automatically
identify _stop words_. Any partial term that occurs more often then what
is defined in this setting, is effectively ignored during search.
#### NOMINATIM_LIMIT_REINDEXING
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Avoid invalidating large areas |
| **Format:** | bool |
| **Default:** | yes |
Nominatim computes the address of each place at indexing time. This has the
advantage to make search faster but also means that more objects needs to
be invalidated when the data changes. For example, changing the name of
the state of Florida would require recomputing every single address point
in the state to make the new name searchable in conjunction with addresses.
Setting this option to 'yes' means that Nominatim skips reindexing of contained
objects when the area becomes too large.
#### NOMINATIM_LANGUAGES
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Restrict search languages |
| **Format:** | string: comma-separated list of language codes |
| **Default:** | _empty_ |
Normally Nominatim will include all language variants of name:XX
in the search index. Set this to a comma separated list of language
codes, to restrict import to a subset of languages.
Currently only affects the initial import of country names and special phrases.
#### NOMINATIM_TERM_NORMALIZATION
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Rules for normalizing terms for comparisons |
| **Format:** | string: semicolon-separated list of ICU rules |
| **Default:** | :: NFD (); [[:Nonspacing Mark:] [:Cf:]] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC (); |
| **Comment:** | Legacy tokenizer only |
[Special phrases](Special-Phrases.md) have stricter matching requirements than
normal search terms. They must appear exactly in the query after this term
normalization has been applied.
Only has an effect on the Legacy tokenizer. For the ICU tokenizer the rules
defined in the
[normalization section](Tokenizers.md#normalization-and-transliteration)
will be used.
#### NOMINATIM_USE_US_TIGER_DATA
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable searching for Tiger house number data |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --functions` |
When this setting is enabled, search and reverse queries also take data
from [Tiger house number data](Tiger.md) into account.
#### NOMINATIM_USE_AUX_LOCATION_DATA
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable searching in external house number tables |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --functions` |
| **Comment:** | Do not use. |
When this setting is enabled, search queries also take data from external
house number tables into account.
*Warning:* This feature is currently unmaintained and should not be used.
#### NOMINATIM_HTTP_PROXY
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Use HTTP proxy when downloading data |
| **Format:** | boolean |
| **Default:** | no |
When this setting is enabled and at least
[NOMINATIM_HTTP_PROXY_HOST](#nominatim_http_proxy_host) and
[NOMINATIM_HTTP_PROXY_PORT](#nominatim_http_proxy_port) are set, the
configured proxy will be used, when downloading external data like
replication diffs.
#### NOMINATIM_HTTP_PROXY_HOST
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Host name of the proxy to use |
| **Format:** | string |
| **Default:** | _empty_ |
When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, this setting
configures the proxy host name.
#### NOMINATIM_HTTP_PROXY_PORT
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Port number of the proxy to use |
| **Format:** | integer |
| **Default:** | 3128 |
When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, this setting
configures the port number to use with the proxy.
#### NOMINATIM_HTTP_PROXY_LOGIN
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Username for proxies that require login |
| **Format:** | string |
| **Default:** | _empty_ |
When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, use this
setting to define the username for proxies that require a login.
#### NOMINATIM_HTTP_PROXY_PASSWORD
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Password for proxies that require login |
| **Format:** | string |
| **Default:** | _empty_ |
When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, use this
setting to define the password for proxies that require a login.
#### NOMINATIM_OSM2PGSQL_BINARY
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Location of the osm2pgsql binary |
| **Format:** | path |
| **Default:** | _empty_ (use binary shipped with Nominatim) |
| **Comment:** | EXPERT ONLY |
Nominatim uses [osm2pgsql](https://osm2pgsql.org) to load the OSM data
initially into the database. Nominatim comes bundled with a version of
osm2pgsql that is guaranteed to be compatible. Use this setting to use
a different binary instead. You should do this only when you know exactly
what you are doing. If the osm2pgsql version is not compatible, then the
result is undefined.
#### NOMINATIM_WIKIPEDIA_DATA_PATH
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Directory with the wikipedia importance data |
| **Format:** | path |
| **Default:** | _empty_ (project directory) |
Set a custom location for the
[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
unset, Nominatim expects the data to be saved in the project directory.
#### NOMINATIM_ADDRESS_LEVEL_CONFIG
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Configuration file for rank assignments |
| **Format:** | path |
| **Default:** | address-levels.json |
The _address level configuration_ defines the rank assignments for places. See
[Place Ranking](Ranking.md) for a detailed explanation what rank assignments
are and what the configuration file must look like.
When a relative path is given, then the file is searched first relative to the
project directory and then in the global settings directory.
#### NOMINATIM_IMPORT_STYLE
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Configuration to use for the initial OSM data import |
| **Format:** | string or path |
| **Default:** | extratags |
The _style configuration_ describes which OSM objects and tags are taken
into consideration for the search database. Nominatim comes with a set
of pre-configured styles, that may be configured here.
You can also write your own custom style and point the setting to the file
with the style. When a relative path is given, then the style file is searched
first relative to the project directory and then in the global settings
directory.
See [Import Styles](Import-Styles.md)
for more information on the available internal styles and the format of the
configuration file.
#### NOMINATIM_FLATNODE_FILE
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Location of osm2pgsql flatnode file |
| **Format:** | path |
| **Default:** | _empty_ (do not use a flatnote file) |
| **After Changes:** | Only change when moving the file physically. |
The `osm2pgsql flatnode file` is file that efficiently stores geographic
location for OSM nodes. For larger imports it can significantly speed up
the import. When this option is unset, then osm2pgsql uses a PsotgreSQL table
to store the locations.
When a relative path is given, then the flatnode file is created/searched
relative to the project directory.
!!! warning
The flatnode file is not only used during the initial import but also
when adding new data with `nominatim add-data` or `nominatim replication`.
Make sure you keep the flatnode file around and this setting unmodified,
if you plan to add more data or run regular updates.
#### NOMINATIM_TABLESPACE_*
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Group of settings for distributing the database over tablespaces |
| **Format:** | string |
| **Default:** | _empty_ (do not use a table space) |
| **After Changes:** | no effect after initial import |
Nominatim allows to distribute the search database over up to 10 different
[PostgreSQL tablespaces](https://www.postgresql.org/docs/current/manage-ag-tablespaces.html).
If you use this option, make sure that the tablespaces exist before starting
the import.
The available tablespace groups are:
NOMINATIM_TABLESPACE_SEARCH_DATA
: Data used by the geocoding frontend.
NOMINATIM_TABLESPACE_SEARCH_INDEX
: Indexes used by the geocoding frontend.
NOMINATIM_TABLESPACE_OSM_DATA
: Raw OSM data cache used for import and updates.
NOMINATIM_TABLESPACE_OSM_DATA
: Indexes on the raw OSM data cache.
NOMINATIM_TABLESPACE_PLACE_DATA
: Data table with the pre-filtered but still unprocessed OSM data.
Used only during imports and updates.
NOMINATIM_TABLESPACE_PLACE_INDEX
: Indexes on raw data table. Used only during imports and updates.
NOMINATIM_TABLESPACE_ADDRESS_DATA
: Data tables used for computing search terms and addresses of places
during import and updates.
NOMINATIM_TABLESPACE_ADDRESS_INDEX
: Indexes on the data tables for search term and address computation.
Used only for import and updates.
NOMINATIM_TABLESPACE_AUX_DATA
: Auxiliary data tables for non-OSM data, e.g. for Tiger house number data.
NOMINATIM_TABLESPACE_AUX_INDEX
: Indexes on auxiliary data tables.
### Replication Update Settings
#### NOMINATIM_REPLICATION_URL
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Base URL of the replication service |
| **Format:** | url |
| **Default:** | https://planet.openstreetmap.org/replication/minute |
| **After Changes:** | run `nominatim replication --init` |
Replication services deliver updates to OSM data. Use this setting to choose
which replication service to use. See [Updates](../admin/Update.md) for more
information on how to set up regular updates.
#### NOMINATIM_REPLICATION_MAX_DIFF
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Maximum amount of data to download per update cycle (in MB) |
| **Format:** | integer |
| **Default:** | 50 |
| **After Changes:** | restart the replication process |
At each update cycle Nominatim downloads diffs until either no more diffs
are available on the server (i.e. the database is up-to-date) or the limit
given in this setting is exceeded. Nominatim guarantees to downloads at least
one diff, if one is available, no matter how small the setting.
The default for this setting is fairly conservative because Nominatim keeps
all data downloaded in one cycle in RAM. Using large values in a production
server may interfere badly with the search frontend because it evicts data
from RAM that is needed for speedy answers to incoming requests. It is usually
a better idea to keep this setting lower and run multiple update cycles
to catch up with updates.
When catching up in non-production mode, for example after the initial import,
the setting can easily be changed temporarily on the command line:
NOMINATIM_REPLICATION_MAX_DIFF=3000 nominatim replication
#### NOMINATIM_REPLICATION_UPDATE_INTERVAL
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Publication interval of the replication service (in seconds) |
| **Format:** | integer |
| **Default:** | 75 |
| **After Changes:** | restart the replication process |
This setting determines when Nominatim will attempt to download again a new
update. The time is computed from the publication date of the last diff
downloaded. Setting this to a slightly higher value than the actual
publication interval avoids unnecessary rechecks.
#### NOMINATIM_REPLICATION_RECHECK_INTERVAL
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Wait time to recheck for a pending update (in seconds) |
| **Format:** | integer |
| **Default:** | 60 |
| **After Changes:** | restart the replication process |
When replication updates are run in continuous mode (using `nominatim replication`),
this setting determines how long Nominatim waits until it looks for updates
again when updates were not available on the server.
Note that this is different from
[NOMINATIM_REPLICATION_UPDATE_INTERVAL](#nominatim_replication_update_interval).
Nominatim will never attempt to query for new updates for UPDATE_INTERVAL
seconds after the current database date. Only after the update interval has
passed it asks for new data. If then no new data is found, it waits for
RECHECK_INTERVAL seconds before it attempts again.
### API Settings
#### NOMINATIM_CORS_NOACCESSCONTROL
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Send permissive CORS access headers |
| **Format:** | boolean |
| **Default:** | yes |
| **After Changes:** | run `nominatim refresh --website` |
When this setting is enabled, API HTTP responses include the HTTP
[CORS](https://en.wikipedia.org/wiki/CORS) headers
`access-control-allow-origin: *` and `access-control-allow-methods: OPTIONS,GET`.
#### NOMINATIM_MAPICON_URL
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | URL prefix for static icon images |
| **Format:** | url |
| **Default:** | _empty_ |
| **After Changes:** | run `nominatim refresh --website` |
When a mapicon URL is configured, then Nominatim includes an additional `icon`
field in the responses, pointing to an appropriate icon for the place type.
Map icons used to be included in Nominatim itself but now have moved to the
[nominatim-ui](https://github.com/osm-search/nominatim-ui/) project. If you
want the URL to be included in API responses, make the `/mapicon`
directory of the project available under a public URL and point this setting
to the directory.
#### NOMINATIM_DEFAULT_LANGUAGE
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Language of responses when no language is requested |
| **Format:** | language code |
| **Default:** | _empty_ (use the local language of the feature) |
| **After Changes:** | run `nominatim refresh --website` |
Nominatim localizes the place names in responses when the corresponding
translation is available. Users can request a custom language setting through
the HTTP accept-languages header or through the explicit parameter
[accept-languages](../api/Search.md#language-of-results). If neither is
given, it falls back to this setting. If the setting is also empty, then
the local languages (in OSM: the name tag without any language suffix) is
used.
#### NOMINATIM_SEARCH_BATCH_MODE
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable a special batch query mode |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
This feature is currently undocumented and potentially broken.
#### NOMINATIM_SEARCH_NAME_ONLY_THRESHOLD
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Threshold for switching the search index lookup strategy |
| **Format:** | integer |
| **Default:** | 500 |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
This setting defines the threshold over which a name is no longer considered
as rare. When searching for places with rare names, only the name is used
for place lookups. Otherwise the name and any address information is used.
This setting only has an effect after `nominatim refresh --word-counts` has
been called to compute the word frequencies.
#### NOMINATIM_LOOKUP_MAX_COUNT
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Maximum number of OSM ids accepted by /lookup |
| **Format:** | integer |
| **Default:** | 50 |
| **After Changes:** | run `nominatim refresh --website` |
The /lookup point accepts list of ids to look up address details for. This
setting restricts the number of places a user may look up with a single
request.
#### NOMINATIM_POLYGON_OUTPUT_MAX_TYPES
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Number of different geometry formats that may be returned |
| **Format:** | integer |
| **Default:** | 1 |
| **After Changes:** | run `nominatim refresh --website` |
Nominatim supports returning full geometries of places. The geometries may
be requested in different formats with one of the
[`polygon_*` parameters](../api/Search.md#polygon-output). Use this
setting to restrict the number of geometry types that may be requested
with a single query.
Setting this parameter to 0 disables polygon output completely.
#### NOMINATIM_SEARCH_WITHIN_COUNTRIES
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Disable search for elements that are not in the country grid |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
Enable to search elements just within countries.
When enabled, if, despite not finding a point within the static grid of countries, it
finds a geometry of a region, do not return the geometry.
Return "Unable to geocode" instead.
#### NOMINATIM_SERVE_LEGACY_URLS
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable serving via URLs with a .php suffix |
| **Format:** | boolean |
| **Default:** | yes |
| **Comment:** | Python frontend only |
When enabled, then endpoints are reachable as `/<name>` as well as `/<name>.php`.
This can be useful when you want to be backwards-compatible with previous
versions of Nominatim.
#### NOMINATIM_API_POOL_SIZE
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Number of parallel database connections per worker |
| **Format:** | number |
| **Default:** | 10 |
| **Comment:** | Python frontend only |
Sets the maximum number of database connections available for a single instance
of Nominatim. When configuring the maximum number of connections that your
PostgreSQL database can handle, you need at least
`NOMINATIM_API_POOL_SIZE` * `<number of configured workers>` connections.
For configuring the number of workers, refer to the section about
[Deploying the Python frontend](../admin/Deployment-Python.md).
#### NOMINATIM_QUERY_TIMEOUT
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Timeout for SQL queries to the database |
| **Format:** | number (seconds) |
| **Default:** | 10 |
| **Comment:** | Python frontend only |
When this timeout is set, then all SQL queries that run longer than the
specified numbers of seconds will be cancelled and the user receives a
timeout exceptions. Users of the API see a 503 HTTP error.
The timeout does ont apply when using the
[low-level DB access](../library/Low-Level-DB-Access.md)
of the library. A timeout can be manually set, if required.
#### NOMINATIM_REQUEST_TIMEOUT
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Timeout for search queries |
| **Format:** | number (seconds) |
| **Default:** | 60 |
| **Comment:** | Python frontend only |
When this timeout is set, a search query will finish sending queries
to the database after the timeout has passed and immediately return the
results gathered so far.
Note that under high load you may observe that users receive different results
than usual without seeing an error. This may cause some confusion.
### Logging Settings
#### NOMINATIM_LOG_DB
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Log requests into the database |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --website` |
Enable logging requests into a database table with this setting. The logs
can be found in the table `new_query_log`.
When using this logging method, it is advisable to set up a job that
regularly clears out old logging information. Nominatim will not do that
on its own.
Can be used as the same time as NOMINATIM_LOG_FILE.
#### NOMINATIM_LOG_FILE
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Log requests into a file |
| **Format:** | path |
| **Default:** | _empty_ (logging disabled) |
| **After Changes:** | run `nominatim refresh --website` |
Enable logging of requests into a file with this setting by setting the log
file where to log to. A relative file name is assumed to be relative to
the project directory.
The entries in the log file have the following format:
<request time> <execution time in s> <number of results> <type> "<query string>"
Request time is the time when the request was started. The execution time is
given in seconds and corresponds to the time the query took executing in PHP.
type contains the name of the endpoint used.
Can be used as the same time as NOMINATIM_LOG_DB.
#### NOMINATIM_DEBUG_SQL
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable printing of raw SQL by SQLAlchemy |
| **Format:** | boolean |
| **Default:** | no |
| **Comment:** | **For developers only.** |
This settings enables
[SQL debugging](https://docs.sqlalchemy.org/en/20/core/engines.html#dbengine-logging)
by SQLAlchemy. This can be helpful when debugging some bugs with internal
query handling. It should only be used together with the CLI query functions.
Enabling it for server mode may have unintended consequences. Use the `debug`
parameter instead, which prints information on how the search is executed
including SQL statements.

View File

@@ -0,0 +1,34 @@
# Special phrases
## Importing OSM user-maintained special phrases
As described in the [Import section](../admin/Import.md), it is possible to
import special phrases from the wiki with the following command:
```sh
nominatim special-phrases --import-from-wiki
```
## Importing custom special phrases
But, it is also possible to import some phrases from a csv file.
To do so, you have access to the following command:
```sh
nominatim special-phrases --import-from-csv <csv file>
```
Note that the two previous import commands will update the phrases from your database.
This means that if you import some phrases from a csv file, only the phrases
present in the csv file will be kept into the database. All other phrases will
be removed.
If you want to only add new phrases and not update the other ones you can add
the argument `--no-replace` to the import command. For example:
```sh
nominatim special-phrases --import-from-csv <csv file> --no-replace
```
This will add the phrases present in the csv file into the database without
removing the other ones.

28
docs/customize/Tiger.md Normal file
View File

@@ -0,0 +1,28 @@
# Installing TIGER housenumber data for the US
Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
address set to complement the OSM house number data in the US. You can add
TIGER data to your own Nominatim instance by following these steps. The
entire US adds about 10GB to your database.
1. Get preprocessed TIGER data:
cd $PROJECT_DIR
wget https://nominatim.org/data/tiger-nominatim-preprocessed-latest.csv.tar.gz
2. Import the data into your Nominatim database:
nominatim add-data --tiger-data tiger-nominatim-preprocessed-latest.csv.tar.gz
3. Enable use of the Tiger data in your existing `.env` file by adding:
echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
4. Apply the new settings:
nominatim refresh --functions --website
See the [TIGER-data project](https://github.com/osm-search/TIGER-data) for more
information on how the data got preprocessed.

View File

@@ -0,0 +1,405 @@
# Tokenizers
The tokenizer module in Nominatim is responsible for analysing the names given
to OSM objects and the terms of an incoming query in order to make sure, they
can be matched appropriately.
Nominatim offers different tokenizer modules, which behave differently and have
different configuration options. This sections describes the tokenizers and how
they can be configured.
!!! important
The use of a tokenizer is tied to a database installation. You need to choose
and configure the tokenizer before starting the initial import. Once the import
is done, you cannot switch to another tokenizer anymore. Reconfiguring the
chosen tokenizer is very limited as well. See the comments in each tokenizer
section.
## Legacy tokenizer
The legacy tokenizer implements the analysis algorithms of older Nominatim
versions. It uses a special Postgresql module to normalize names and queries.
This tokenizer is automatically installed and used when upgrading an older
database. It should not be used for new installations anymore.
### Compiling the PostgreSQL module
The tokeinzer needs a special C module for PostgreSQL which is not compiled
by default. If you need the legacy tokenizer, compile Nominatim as follows:
```
mkdir build
cd build
cmake -DBUILD_MODULE=on
make
```
### Enabling the tokenizer
To enable the tokenizer add the following line to your project configuration:
```
NOMINATIM_TOKENIZER=legacy
```
The Postgresql module for the tokenizer is available in the `module` directory
and also installed with the remainder of the software under
`lib/nominatim/module/nominatim.so`. You can specify a custom location for
the module with
```
NOMINATIM_DATABASE_MODULE_PATH=<path to directory where nominatim.so resides>
```
This is in particular useful when the database runs on a different server.
See [Advanced installations](../admin/Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
There are no other configuration options for the legacy tokenizer. All
normalization functions are hard-coded.
## ICU tokenizer
The ICU tokenizer uses the [ICU library](http://site.icu-project.org/) to
normalize names and queries. It also offers configurable decomposition and
abbreviation handling.
This tokenizer is currently the default.
To enable the tokenizer add the following line to your project configuration:
```
NOMINATIM_TOKENIZER=icu
```
### How it works
On import the tokenizer processes names in the following three stages:
1. During the **Sanitizer step** incoming names are cleaned up and converted to
**full names**. This step can be used to regularize spelling, split multi-name
tags into their parts and tag names with additional attributes. See the
[Sanitizers section](#sanitizers) below for available cleaning routines.
2. The **Normalization** part removes all information from the full names
that are not relevant for search.
3. The **Token analysis** step takes the normalized full names and creates
all transliterated variants under which the name should be searchable.
See the [Token analysis](#token-analysis) section below for more
information.
During query time, only normalization and transliteration are relevant.
An incoming query is first split into name chunks (this usually means splitting
the string at the commas) and the each part is normalised and transliterated.
The result is used to look up places in the search index.
### Configuration
The ICU tokenizer is configured using a YAML file which can be configured using
`NOMINATIM_TOKENIZER_CONFIG`. The configuration is read on import and then
saved as part of the internal database status. Later changes to the variable
have no effect.
Here is an example configuration file:
``` yaml
normalization:
- ":: lower ()"
- "ß > 'ss'" # German szet is unambiguously equal to double ss
transliteration:
- !include /etc/nominatim/icu-rules/extended-unicode-to-asccii.yaml
- ":: Ascii ()"
sanitizers:
- step: split-name-list
token-analysis:
- analyzer: generic
variants:
- !include icu-rules/variants-ca.yaml
- words:
- road -> rd
- bridge -> bdge,br,brdg,bri,brg
mutations:
- pattern: 'ä'
replacements: ['ä', 'ae']
```
The configuration file contains four sections:
`normalization`, `transliteration`, `sanitizers` and `token-analysis`.
#### Normalization and Transliteration
The normalization and transliteration sections each define a set of
ICU rules that are applied to the names.
The **normalization** rules are applied after sanitation. They should remove
any information that is not relevant for search at all. Usual rules to be
applied here are: lower-casing, removing of special characters, cleanup of
spaces.
The **transliteration** rules are applied at the end of the tokenization
process to transfer the name into an ASCII representation. Transliteration can
be useful to allow for further fuzzy matching, especially between different
scripts.
Each section must contain a list of
[ICU transformation rules](https://unicode-org.github.io/icu/userguide/transforms/general/rules.html).
The rules are applied in the order in which they appear in the file.
You can also include additional rules from external yaml file using the
`!include` tag. The included file must contain a valid YAML list of ICU rules
and may again include other files.
!!! warning
The ICU rule syntax contains special characters that conflict with the
YAML syntax. You should therefore always enclose the ICU rules in
double-quotes.
#### Sanitizers
The sanitizers section defines an ordered list of functions that are applied
to the name and address tags before they are further processed by the tokenizer.
They allows to clean up the tagging and bring it to a standardized form more
suitable for building the search index.
!!! hint
Sanitizers only have an effect on how the search index is built. They
do not change the information about each place that is saved in the
database. In particular, they have no influence on how the results are
displayed. The returned results always show the original information as
stored in the OpenStreetMap database.
Each entry contains information of a sanitizer to be applied. It has a
mandatory parameter `step` which gives the name of the sanitizer. Depending
on the type, it may have additional parameters to configure its operation.
The order of the list matters. The sanitizers are applied exactly in the order
that is configured. Each sanitizer works on the results of the previous one.
The following is a list of sanitizers that are shipped with Nominatim.
##### split-name-list
::: nominatim.tokenizer.sanitizers.split_name_list
options:
members: False
heading_level: 6
docstring_section_style: spacy
##### strip-brace-terms
::: nominatim.tokenizer.sanitizers.strip_brace_terms
options:
members: False
heading_level: 6
docstring_section_style: spacy
##### tag-analyzer-by-language
::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
options:
members: False
heading_level: 6
docstring_section_style: spacy
##### clean-housenumbers
::: nominatim.tokenizer.sanitizers.clean_housenumbers
options:
members: False
heading_level: 6
docstring_section_style: spacy
##### clean-postcodes
::: nominatim.tokenizer.sanitizers.clean_postcodes
options:
members: False
heading_level: 6
docstring_section_style: spacy
##### clean-tiger-tags
::: nominatim.tokenizer.sanitizers.clean_tiger_tags
options:
members: False
heading_level: 6
docstring_section_style: spacy
#### delete-tags
::: nominatim.tokenizer.sanitizers.delete_tags
options:
members: False
heading_level: 6
docstring_section_style: spacy
#### tag-japanese
::: nominatim.tokenizer.sanitizers.tag_japanese
options:
members: False
heading_level: 6
docstring_section_style: spacy
#### Token Analysis
Token analyzers take a full name and transform it into one or more normalized
form that are then saved in the search index. In its simplest form, the
analyzer only applies the transliteration rules. More complex analyzers
create additional spelling variants of a name. This is useful to handle
decomposition and abbreviation.
The ICU tokenizer may use different analyzers for different names. To select
the analyzer to be used, the name must be tagged with the `analyzer` attribute
by a sanitizer (see for example the
[tag-analyzer-by-language sanitizer](#tag-analyzer-by-language)).
The token-analysis section contains the list of configured analyzers. Each
analyzer must have an `id` parameter that uniquely identifies the analyzer.
The only exception is the default analyzer that is used when no special
analyzer was selected. There are analysers with special ids:
* '@housenumber'. If an analyzer with that name is present, it is used
for normalization of house numbers.
* '@potcode'. If an analyzer with that name is present, it is used
for normalization of postcodes.
Different analyzer implementations may exist. To select the implementation,
the `analyzer` parameter must be set. The different implementations are
described in the following.
##### Generic token analyzer
The generic analyzer `generic` is able to create variants from a list of given
abbreviation and decomposition replacements and introduce spelling variations.
###### Variants
The optional 'variants' section defines lists of replacements which create alternative
spellings of a name. To create the variants, a name is scanned from left to
right and the longest matching replacement is applied until the end of the
string is reached.
The variants section must contain a list of replacement groups. Each group
defines a set of properties that describes where the replacements are
applicable. In addition, the word section defines the list of replacements
to be made. The basic replacement description is of the form:
```
<source>[,<source>[...]] => <target>[,<target>[...]]
```
The left side contains one or more `source` terms to be replaced. The right side
lists one or more replacements. Each source is replaced with each replacement
term.
!!! tip
The source and target terms are internally normalized using the
normalization rules given in the configuration. This ensures that the
strings match as expected. In fact, it is better to use unnormalized
words in the configuration because then it is possible to change the
rules for normalization later without having to adapt the variant rules.
###### Decomposition
In its standard form, only full words match against the source. There
is a special notation to match the prefix and suffix of a word:
``` yaml
- ~strasse => str # matches "strasse" as full word and in suffix position
- hinter~ => hntr # matches "hinter" as full word and in prefix position
```
There is no facility to match a string in the middle of the word. The suffix
and prefix notation automatically trigger the decomposition mode: two variants
are created for each replacement, one with the replacement attached to the word
and one separate. So in above example, the tokenization of "hauptstrasse" will
create the variants "hauptstr" and "haupt str". Similarly, the name "rote strasse"
triggers the variants "rote str" and "rotestr". By having decomposition work
both ways, it is sufficient to create the variants at index time. The variant
rules are not applied at query time.
To avoid automatic decomposition, use the '|' notation:
``` yaml
- ~strasse |=> str
```
simply changes "hauptstrasse" to "hauptstr" and "rote strasse" to "rote str".
###### Initial and final terms
It is also possible to restrict replacements to the beginning and end of a
name:
``` yaml
- ^south => s # matches only at the beginning of the name
- road$ => rd # matches only at the end of the name
```
So the first example would trigger a replacement for "south 45th street" but
not for "the south beach restaurant".
###### Replacements vs. variants
The replacement syntax `source => target` works as a pure replacement. It changes
the name instead of creating a variant. To create an additional version, you'd
have to write `source => source,target`. As this is a frequent case, there is
a shortcut notation for it:
```
<source>[,<source>[...]] -> <target>[,<target>[...]]
```
The simple arrow causes an additional variant to be added. Note that
decomposition has an effect here on the source as well. So a rule
``` yaml
- "~strasse -> str"
```
means that for a word like `hauptstrasse` four variants are created:
`hauptstrasse`, `haupt strasse`, `hauptstr` and `haupt str`.
###### Mutations
The 'mutation' section in the configuration describes an additional set of
replacements to be applied after the variants have been computed.
Each mutation is described by two parameters: `pattern` and `replacements`.
The pattern must contain a single regular expression to search for in the
variant name. The regular expressions need to follow the syntax for
[Python regular expressions](file:///usr/share/doc/python3-doc/html/library/re.html#regular-expression-syntax).
Capturing groups are not permitted.
`replacements` must contain a list of strings that the pattern
should be replaced with. Each occurrence of the pattern is replaced with
all given replacements. Be mindful of combinatorial explosion of variants.
###### Modes
The generic analyser supports a special mode `variant-only`. When configured
then it consumes the input token and emits only variants (if any exist). Enable
the mode by adding:
```
mode: variant-only
```
to the analyser configuration.
##### Housenumber token analyzer
The analyzer `housenumbers` is purpose-made to analyze house numbers. It
creates variants with optional spaces between numbers and letters. Thus,
house numbers of the form '3 a', '3A', '3-A' etc. are all considered equivalent.
The analyzer cannot be customized.
##### Postcode token analyzer
The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
a 'lookup' varaint of the token, which produces variants with optional
spaces. Use together with the clean-postcodes sanitizer.
The analyzer cannot be customized.
### Reconfiguration
Changing the configuration after the import is currently not possible, although
this feature may be added at a later time.

View File

@@ -0,0 +1,167 @@
# Database Layout
### Import tables
OSM data is initially imported using [osm2pgsql](https://osm2pgsql.org).
Nominatim uses its own data output style 'gazetteer', which differs from the
output style created for map rendering.
The import process creates the following tables:
![osm2pgsql tables](osm2pgsql-tables.svg)
The `planet_osm_*` tables are the usual backing tables for OSM data. Note
that Nominatim uses them to look up special relations and to find nodes on
ways.
The gazetteer style produces a single table `place` as output with the following
columns:
* `osm_type` - kind of OSM object (**N** - node, **W** - way, **R** - relation)
* `osm_id` - original OSM ID
* `class` - key of principal tag defining the object type
* `type` - value of principal tag defining the object type
* `name` - collection of tags that contain a name or reference
* `admin_level` - numerical value of the tagged administrative level
* `address` - collection of tags defining the address of an object
* `extratags` - collection of additional interesting tags that are not
directly relevant for searching
* `geometry` - geometry of the object (in WGS84)
A single OSM object may appear multiple times in this table when it is tagged
with multiple tags that may constitute a principal tag. Take for example a
motorway bridge. In OSM, this would be a way which is tagged with
`highway=motorway` and `bridge=yes`. This way would appear in the `place` table
once with `class` of `highway` and once with a `class` of `bridge`. Thus the
*unique key* for `place` is (`osm_type`, `osm_id`, `class`).
How raw OSM tags are mapped to the columns in the place table is to a certain
degree configurable. See [Customizing Import Styles](../customize/Import-Styles.md)
for more information.
### Search tables
The following tables carry all information needed to do the search:
![search tables](search-tables.svg)
The **placex** table is the central table that saves all information about the
searchable places in Nominatim. The basic columns are the same as for the
place table and have the same meaning. The placex tables adds the following
additional columns:
* `place_id` - the internal unique ID to identify the place
* `partition` - the id to use with partitioned tables (see below)
* `geometry_sector` - a location hash used for geographically close ordering
* `parent_place_id` - the next higher place in the address hierarchy, only
relevant for POI-type places (with rank 30)
* `linked_place_id` - place ID of the place this object has been merged with.
When this ID is set, then the place is invisible for search.
* `importance` - measure how well known the place is
* `rank_search`, `rank_address` - search and address rank (see [Customizing ranking](../customize/Ranking.md)
* `wikipedia` - the wikipedia page used for computing the importance of the place
* `country_code` - the country the place is located in
* `housenumber` - normalized housenumber, if the place has one
* `postcode` - computed postcode for the place
* `indexed_status` - processing status of the place (0 - ready, 1 - freshly inserted, 2 - needs updating, 100 - needs deletion)
* `indexed_date` - timestamp when the place was processed last
* `centroid` - a point feature for the place
The **location_property_osmline** table is a special table for
[address interpolations](https://wiki.openstreetmap.org/wiki/Addresses#Using_interpolation).
The columns have the same meaning and use as the columns with the same name in
the placex table. Only three columns are special:
* `startnumber` and `endnumber` - beginning and end of the number range
for the interpolation
* `interpolationtype` - a string `odd`, `even` or `all` to indicate
the interval between the numbers
Address interpolations are always ways in OSM, which is why there is no column
`osm_type`.
The **location_postcode** table holds computed centroids of all postcodes that
can be found in the OSM data. The meaning of the columns is again the same
as that of the placex table.
Every place needs an address, a set of surrounding places that describe the
location of the place. The set of address places is made up of OSM places
themselves. The **place_addressline** table cross-references for each place
all the places that make up its address. Two columns define the address
relation:
* `place_id` - reference to the place being addressed
* `address_place_id` - reference to the place serving as an address part
The most of the columns cache information from the placex entry of the address
part. The exceptions are:
* `fromarea` - is true if the address part has an area geometry and can
therefore be considered preceise
* `isaddress` - is true if the address part should show up in the address
output. Sometimes there are multiple places competing for for same address
type (e.g. multiple cities) and this field resolves the tie.
The **search_name** table contains the search index proper. It saves for each
place the terms with which the place can be found. The terms are split into
the name itself and all terms that make up the address. The table mirrors some
of the columns from placex for faster lookup.
Search terms are not saved as strings. Each term is assigned an integer and those
integers are saved in the name and address vectors of the search_name table. The
**word** table serves as the lookup table from string to such a word ID. The
exact content of the word table depends on the [tokenizer](Tokenizers.md) used.
## Address computation tables
Next to the main search tables, there is a set of secondary helper tables used
to compute the address relations between places. These tables are partitioned.
Each country is assigned a partition number in the country_name table (see
below) and the data is then split between a set of tables, one for each
partition. Note that Nominatim still manually manages partitioned tables.
Native support for partitions in PostgreSQL only became usable with version 13.
It will be a little while before Nominatim drops support for older versions.
![address tables](address-tables.svg)
The **search_name_X** tables are used to look up streets that appear in the
`addr:street` tag.
The **location_area_large_X** tables are used to look up larger areas
(administrative boundaries and place nodes) either through their geographic
closeness or through `addr:*` entries.
The **location_road_X** tables are used to find the closest street for a
dependent place.
All three table cache specific information from the placex table for their
selected subset of places:
* `keywords` and `name_vector` contain lists of term ids (from the word table)
that the full name of the place should match against
* `isguess` is true for places that are not described by an area
All other columns reflect their counterpart in the placex table.
## Static data tables
Nominatim also creates a number of static tables at import:
* `nominatim_properties` saves settings that must not be changed after
import
* `address_levels` save the rank information from the
[ranking configuration](../customize/Ranking.md)
* `country_name` contains a fallback of names for all countries, their
default languages and saves the assignment of countries to partitions.
* `country_osm_grid` provides a fallback for country geometries
## Auxiliary data tables
Finally there are some table for auxiliary data:
* `location_property_tiger` - saves housenumber from the Tiger import. Its
layout is similar to that of `location_propoerty_osmline`.
* `place_class_*` tables are helper tables to facilitate lookup of POIs
by their class and type. They exist because it is not possible to create
combined indexes with geometries.

View File

@@ -1,6 +1,6 @@
# Setting up Nominatim for Development
This chapter gives an overview how to set up Nominatim for developement
This chapter gives an overview how to set up Nominatim for development
and how to run tests.
!!! Important
@@ -26,16 +26,29 @@ following packages should get you started:
## Prerequisites for testing and documentation
The Nominatim test suite consists of behavioural tests (using behave) and
unit tests (using PHPUnit). It has the following additional requirements:
unit tests (using PHPUnit for PHP code and pytest for Python code).
It has the following additional requirements:
* [behave test framework](https://behave.readthedocs.io) >= 1.2.5
* [nose](https://nose.readthedocs.io)
* [phpunit](https://phpunit.de) >= 7.3
* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
* [phpunit](https://phpunit.de) (9.5 is known to work)
* [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
* [Pylint](https://pylint.org/) (CI always runs the latest version from pip)
* [mypy](http://mypy-lang.org/) (plus typing information for external libs)
* [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9)
* [pytest](https://pytest.org)
* [pytest-asyncio](https://pytest-asyncio.readthedocs.io)
For testing the Python search frontend, you need to install extra dependencies
depending on your choice of webserver framework:
* [httpx](https://www.python-httpx.org/) (starlette only)
* [asgi-lifespan](https://github.com/florimondmanca/asgi-lifespan) (starlette only)
The documentation is built with mkdocs:
* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
* [mkdocstrings-python](https://mkdocstrings.github.io/python/)
### Installing prerequisites on Ubuntu/Debian
@@ -49,7 +62,10 @@ To install all necessary packages run:
sudo apt install php-cgi phpunit php-codesniffer \
python3-pip python3-setuptools python3-dev
pip3 install --user behave nose mkdocs
pip3 install --user behave mkdocs mkdocstrings pytest pytest-asyncio pylint \
mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \
types-ujson types-requests types-Pygments typing-extensions\
httpx asgi-lifespan
```
The `mkdocs` executable will be located in `.local/bin`. You may have to add
@@ -78,58 +94,15 @@ echo 'export PATH=~/.config/composer/vendor/bin:$PATH' > ~/.profile
## Executing Tests
All tests are located in the `\test` directory.
All tests are located in the `/test` directory.
### Preparing the test database
Some of the behavioural test expect a test database to be present. You need at
least 2GB RAM and 10GB disk space to create the database.
First create a separate directory for the test DB and fetch the test planet
data and the Tiger data for South Dakota:
```
mkdir testdb
cd testdb
wget https://www.nominatim.org/data/test/nominatim-api-testdata.pbf
wget -O - https://nominatim.org/data/tiger2018-nominatim-preprocessed.tar.gz | tar xz --wildcards --no-anchored '46*'
```
Configure and build Nominatim in the usual way:
```
cmake $USERNAME/Nominatim
make
```
Copy the test settings:
```
cp $USERNAME/Nominatim/test/testdb/local.php settings/
```
Inspect the file to check that all settings are correct for your local setup.
Now you can import the test database:
```
dropdb --if-exists test_api_nominatim
./utils/setup.php --all --osm-file nominatim-api-testdb.pbf 2>&1 | tee import.log
./utils/specialphrases.php --wiki-import | psql -d test_api_nominatim 2>&1 | tee -a import.log
./utils/setup.php --import-tiger-data 2>&1 | tee -a import.log
```
### Running the tests
To run all tests just go to the test directory and run make:
To run all tests just go to the build directory and run make:
```sh
cd test
make
cd build
make test
```
To skip tests that require the test database, run `make no-test-db` instead.
For more information about the structure of the tests and how to change and
extend the test suite, see the [Testing chapter](Testing.md).
@@ -154,7 +127,7 @@ symlinks (see `CMakeLists.txt` for the exact steps).
Now you can start webserver for local testing
```
build> mkdocs serve
build> make serve-doc
[server:296] Serving on http://127.0.0.1:8000
[handlers:62] Start watching changes
```
@@ -163,7 +136,7 @@ If you develop inside a Vagrant virtual machine, use a port that is forwarded
to your host:
```
build> mkdocs serve --dev-addr 0.0.0.0:8088
build> PYTHONPATH=$SRCDIR mkdocs serve --dev-addr 0.0.0.0:8088
[server:296] Serving on http://0.0.0.0:8088
[handlers:62] Start watching changes
```

View File

@@ -0,0 +1,220 @@
# Writing custom sanitizer and token analysis modules for the ICU tokenizer
The [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer) provides a
highly customizable method to pre-process and normalize the name information
of the input data before it is added to the search index. It comes with a
selection of sanitizers and token analyzers which you can use to adapt your
installation to your needs. If the provided modules are not enough, you can
also provide your own implementations. This section describes the API
of sanitizers and token analysis.
!!! warning
This API is currently in early alpha status. While this API is meant to
be a public API on which other sanitizers and token analyzers may be
implemented, it is not guaranteed to be stable at the moment.
## Using non-standard sanitizers and token analyzers
Sanitizer names (in the `step` property) and token analysis names (in the
`analyzer`) may refer to externally supplied modules. There are two ways
to include external modules: through a library or from the project directory.
To include a module from a library, use the absolute import path as name and
make sure the library can be found in your PYTHONPATH.
To use a custom module without creating a library, you can put the module
somewhere in your project directory and then use the relative path to the
file. Include the whole name of the file including the `.py` ending.
## Custom sanitizer modules
A sanitizer module must export a single factory function `create` with the
following signature:
``` python
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]
```
The function receives the custom configuration for the sanitizer and must
return a callable (function or class) that transforms the name and address
terms of a place. When a place is processed, then a `ProcessInfo` object
is created from the information that was queried from the database. This
object is sequentially handed to each configured sanitizer, so that each
sanitizer receives the result of processing from the previous sanitizer.
After the last sanitizer is finished, the resulting name and address lists
are forwarded to the token analysis module.
Sanitizer functions are instantiated once and then called for each place
that is imported or updated. They don't need to be thread-safe.
If multi-threading is used, each thread creates their own instance of
the function.
### Sanitizer configuration
::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
options:
heading_level: 6
### The main filter function of the sanitizer
The filter function receives a single object of type `ProcessInfo`
which has with three members:
* `place: PlaceInfo`: read-only information about the place being processed.
See PlaceInfo below.
* `names: List[PlaceName]`: The current list of names for the place.
* `address: List[PlaceName]`: The current list of address names for the place.
While the `place` member is provided for information only, the `names` and
`address` lists are meant to be manipulated by the sanitizer. It may add and
remove entries, change information within a single entry (for example by
adding extra attributes) or completely replace the list with a different one.
#### PlaceInfo - information about the place
::: nominatim.data.place_info.PlaceInfo
options:
heading_level: 6
#### PlaceName - extended naming information
::: nominatim.data.place_name.PlaceName
options:
heading_level: 6
### Example: Filter for US street prefixes
The following sanitizer removes the directional prefixes from street names
in the US:
``` python
import re
def _filter_function(obj):
if obj.place.country_code == 'us' \
and obj.place.rank_address >= 26 and obj.place.rank_address <= 27:
for name in obj.names:
name.name = re.sub(r'^(north|south|west|east) ',
'',
name.name,
flags=re.IGNORECASE)
def create(config):
return _filter_function
```
This is the most simple form of a sanitizer module. If defines a single
filter function and implements the required `create()` function by returning
the filter.
The filter function first checks if the object is interesting for the
sanitizer. Namely it checks if the place is in the US (through `country_code`)
and it the place is a street (a `rank_address` of 26 or 27). If the
conditions are met, then it goes through all available names and
removes any leading directional prefix using a simple regular expression.
Save the source code in a file in your project directory, for example as
`us_streets.py`. Then you can use the sanitizer in your `icu_tokenizer.yaml`:
``` yaml
...
sanitizers:
- step: us_streets.py
...
```
!!! warning
This example is just a simplified show case on how to create a sanitizer.
It is not really read for real-world use: while the sanitizer would
correcly transform `West 5th Street` into `5th Street`. it would also
shorten a simple `North Street` to `Street`.
For more sanitizer examples, have a look at the sanitizers provided by Nominatim.
They can be found in the directory
[`nominatim/tokenizer/sanitizers`](https://github.com/osm-search/Nominatim/tree/master/nominatim/tokenizer/sanitizers).
## Custom token analysis module
::: nominatim.tokenizer.token_analysis.base.AnalysisModule
options:
heading_level: 6
::: nominatim.tokenizer.token_analysis.base.Analyzer
options:
heading_level: 6
### Example: Creating acronym variants for long names
The following example of a token analysis module creates acronyms from
very long names and adds them as a variant:
``` python
class AcronymMaker:
""" This class is the actual analyzer.
"""
def __init__(self, norm, trans):
self.norm = norm
self.trans = trans
def get_canonical_id(self, name):
# In simple cases, the normalized name can be used as a canonical id.
return self.norm.transliterate(name.name).strip()
def compute_variants(self, name):
# The transliterated form of the name always makes up a variant.
variants = [self.trans.transliterate(name)]
# Only create acronyms from very long words.
if len(name) > 20:
# Take the first letter from each word to form the acronym.
acronym = ''.join(w[0] for w in name.split())
# If that leds to an acronym with at least three letters,
# add the resulting acronym as a variant.
if len(acronym) > 2:
# Never forget to transliterate the variants before returning them.
variants.append(self.trans.transliterate(acronym))
return variants
# The following two functions are the module interface.
def configure(rules, normalizer, transliterator):
# There is no configuration to parse and no data to set up.
# Just return an empty configuration.
return None
def create(normalizer, transliterator, config):
# Return a new instance of our token analysis class above.
return AcronymMaker(normalizer, transliterator)
```
Given the name `Trans-Siberian Railway`, the code above would return the full
name `Trans-Siberian Railway` and the acronym `TSR` as variant, so that
searching would work for both.
## Sanitizers vs. Token analysis - what to use for variants?
It is not always clear when to implement variations in the sanitizer and
when to write a token analysis module. Just take the acronym example
above: it would also have been possible to write a sanitizer which adds the
acronym as an additional name to the name list. The result would have been
similar. So which should be used when?
The most important thing to keep in mind is that variants created by the
token analysis are only saved in the word lookup table. They do not need
extra space in the search index. If there are many spelling variations, this
can mean quite a significant amount of space is saved.
When creating additional names with a sanitizer, these names are completely
independent. In particular, they can be fed into different token analysis
modules. This gives a much greater flexibility but at the price that the
additional names increase the size of the search index.

View File

@@ -1,170 +0,0 @@
# OSM Data Import
OSM data is initially imported using [osm2pgsql](https://osm2pgsql.org).
Nominatim uses its own data output style 'gazetteer', which differs from the
output style created for map rendering.
## Database Layout
The gazetteer style produces a single table `place` with the following rows:
* `osm_type` - kind of OSM object (**N** - node, **W** - way, **R** - relation)
* `osm_id` - original OSM ID
* `class` - key of principal tag defining the object type
* `type` - value of principal tag defining the object type
* `name` - collection of tags that contain a name or reference
* `admin_level` - numerical value of the tagged administrative level
* `address` - collection of tags defining the address of an object
* `extratags` - collection of additional interesting tags that are not
directly relevant for searching
* `geometry` - geometry of the object (in WGS84)
A single OSM object may appear multiple times in this table when it is tagged
with multiple tags that may constitute a principal tag. Take for example a
motorway bridge. In OSM, this would be a way which is tagged with
`highway=motorway` and `bridge=yes`. This way would appear in the `place` table
once with `class` of `highway` and once with a `class` of `bridge`. Thus the
*unique key* for `place` is (`osm_type`, `osm_id`, `class`).
## Configuring the Import
How tags are interpreted and assigned to the different `place` columns can be
configured via the import style configuration file (`CONST_Import_style`). This
is a JSON file which contains a list of rules which are matched against every
tag of every object and then assign the tag its specific role.
### Configuration Rules
A single rule looks like this:
```json
{
"keys" : ["key1", "key2", ...],
"values" : {
"value1" : "prop",
"value2" : "prop1,prop2"
}
}
```
A rule first defines a list of keys to apply the rule to. This is always a list
of strings. The string may have four forms. An empty string matches against
any key. A string that ends in an asterisk `*` is a prefix match and accordingly
matches against any key that starts with the given string (minus the `*`). A
suffix match can be defined similarly with a string that starts with a `*`. Any
other string constitutes an exact match.
The second part of the rules defines a list of values and the properties that
apply to a successful match. Value strings may be either empty, which
means that they match any value, or describe an exact match. Prefix
or suffix matching of values is not possible.
For a rule to match, it has to find a valid combination of keys and values. The
resulting property is that of the matched values.
The rules in a configuration file are processed sequentially and the first
match for each tag wins.
A rule where key and value are the empty string is special. This defines the
fallback when none of the rules match. The fallback is always used as a last
resort when nothing else matches, no matter where the rule appears in the file.
Defining multiple fallback rules is not allowed. What happens in this case,
is undefined.
### Tag Properties
One or more of the following properties may be given for each tag:
* `main`
A principal tag. A new row will be added for the object with key and value
as `class` and `type`.
* `with_name`
When the tag is a principal tag (`main` property set): only really add a new
row, if there is any name tag found (a reference tag is not sufficient, see
below).
* `with_name_key`
When the tag is a principal tag (`main` property set): only really add a new
row, if there is also a name tag that matches the key of the principal tag.
For example, if the main tag is `bridge=yes`, then it will only be added as
an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
If this property is set, all other names that are not domain-specific are
ignored.
* `fallback`
When the tag is a principal tag (`main` property set): only really add a new
row, when no other principal tags for this object have been found. Only one
fallback tag can win for an object.
* `operator`
When the tag is a principal tag (`main` property set): also include the
`operator` tag in the list of names. This is a special construct for an
out-dated tagging practise in OSM. Fuel stations and chain restaurants
in particular used to have the name of the chain tagged as `operator`.
These days the chain can be more commonly found in the `brand` tag but
there is still enough old data around to warrant this special case.
* `name`
Add tag to the list of names.
* `ref`
Add tag to the list of names as a reference. At the moment this only means
that the object is not considered to be named for `with_name`.
* `address`
Add tag to the list of address tags. If the tag starts with `addr:` or
`is_in:`, then this prefix is cut off before adding it to the list.
* `postcode`
Add the value as a postcode to the address tags. If multiple tags are
candidate for postcodes, one wins out and the others are dropped.
* `country`
Add the value as a country code to the address tags. The value must be a
two letter country code, otherwise it is ignored. If there are multiple
tags that match, then one wins out and the others are dropped.
* `house`
If no principle tags can be found for the object, still add the object with
`class`=`place` and `type`=`house`. Use this for address nodes that have no
other function.
* `interpolation`
Add this object as an address interpolation (appears as `class`=`place` and
`type`=`houses` in the database).
* `extra`
Add tag to the list of extra tags.
* `skip`
Skip the tag completely. Useful when a custom default fallback is defined
or to define exceptions to rules.
A rule can define as many of these properties for one match as it likes. For
example, if the property is `"main,extra"` then the tag will open a new row
but also have the tag appear in the list of extra tags.
There are a number of pre-defined styles in the `settings/` directory. It is
advisable to start from one of these styles when defining your own.
### Changing the Style of Existing Databases
There is normally no issue changing the style of a database that is already
imported and now kept up-to-date with change files. Just be aware that any
change in the style applies to updates only. If you want to change the data
that is already in the database, then a reimport is necessary.

152
docs/develop/Indexing.md Normal file
View File

@@ -0,0 +1,152 @@
# Indexing Places
In Nominatim, the word __indexing__ refers to the process that takes the raw
OpenStreetMap data from the place table, enriches it with address information
and creates the search indexes. This section explains the basic data flow.
## Initial import
After osm2pgsql has loaded the raw OSM data into the place table,
the data is copied to the final search tables placex and location_property_osmline.
While they are copied, some basic properties are added:
* country_code, geometry_sector and partition
* initial search and address rank
In addition the column `indexed_status` is set to `1` marking the place as one
that needs to be indexed.
All this happens in the triggers `placex_insert` and `osmline_insert`.
## Indexing
The main work horse of the data import is the indexing step, where Nominatim
takes every place from the placex and location_property_osmline tables where
the indexed_status != 0 and computes the search terms and the address parts
of the place.
The indexing happens in three major steps:
1. **Data preparation** - The indexer gets the data for the place to be indexed
from the database.
2. **Search name processing** - The prepared data is given to the
tokenizer which computes the search terms from the names
and potentially other information.
3. **Address processing** - The indexer then hands the prepared data and the
tokenizer information back to the database via an `INSERT` statement which
also sets the indexed_status to `0`. This triggers the update triggers
`placex_update`/`osmline_update` which do the work of computing address
parts and filling all the search tables.
When computing the address terms of a place, Nominatim relies on the processed
search names of all the address parts. That is why places are processed in rank
order, from smallest rank to largest. To ensure correct handling of linked
place nodes, administrative boundaries are processed before all other places.
Apart from these restrictions, each place can be indexed independently
from the others. This allows a large degree of parallelization during the indexing.
It also means that the indexing process can be interrupted at any time and
will simply pick up where it left of when restarted.
### Data preparation
The data preparation step computes and retrieves all data for a place that
might be needed for the next step of processing the search name. That includes
* location information (country code)
* place classification (class, type, ranks)
* names (including names of linked places)
* address information (`addr:*` tags)
Data preparation is implemented in pl/PgSQL mostly in the functions
`placex_indexing_prepare()` and `get_interpolation_address()`.
#### `addr:*` tag inheritance
Nominatim has limited support for inheriting address tags from a building
to POIs inside the building. This only works when the address tags are on the
building outline. Any rank 30 object inside such a building or on its outline
inherits all address tags when it does not have any address tags of its own.
The inheritance is computed in the data preparation step.
### Search name processing
The prepared place information is handed to the tokenizer next. This is a
Python module responsible for processing the names from both name and address
terms and building up the word index from them. The process is explained in
more detail in the [Tokenizer chapter](Tokenizers.md).
### Address processing
Finally, the preprocessed place information and the results of the search name
processing are written back to the database. At this point the update trigger
of the placex/location_property_osmline tables take over and fill all the
dependent tables. This makes up the most work-intensive part of the indexing.
Nominatim distinguishes between dependent and independent places.
**Dependent places** are all places on rank 30: house numbers, POIs etc. These
places don't have a full address of their own. Instead they are attached to
a parent street or place and use the information of the parent for searching
and displaying information. Everything else are **independent places**: streets,
parks, water bodies, suburbs, cities, states etc. They receive a full address
on their own.
The address processing for both types of places is very different.
#### Independent places
To compute the address of an independent place Nominatim searches for all
places that cover the place to compute the address for at least partially.
For places with an area, that area is used to check for coverage. For place
nodes an artificial square area is computed according to the rank of
the place. The lower the rank the lager the area. The `location_area_large_X`
tables are there to facilitate the lookup. All places that can function as
the address of another place are saved in those tables.
`addr:*` and `isin:*` tags are taken into account to compute the address, too.
Nominatim will give preference to places with the same name as in these tags
when looking for places in the vicinity. If there are no matching place names
at all, then the tags are at least added to the search index. That means that
the names will not be shown in the result as the 'address' of the place, but
searching by them still works.
Independent places are always added to the global search index `search_name`.
#### Dependent places
Dependent places skip the full address computation for performance reasons.
Instead they just find a parent place to attach themselves to.
![parenting of dependent places](parenting-flow.svg)
By default a POI
or house number will be attached to the closest street. That can be any major
or minor street indexed by Nominatim. In the default configuration that means
that it can attach itself to a footway but only when it has a name.
When the dependent place has an `addr:street` tag, then Nominatim will first
try to find a street with the same name before falling back to the closest
street.
There are also addresses in OSM, where the housenumber does not belong
to a street at all. These have an `addr:place` tag. For these places, Nominatim
tries to find a place with the given name in the indexed places with an
address rank between 16 and 25. If none is found, then the dependent place
is attached to the closest place in that category and the addr:place name is
added as *unlisted* place, which indicates to Nominatim that it needs to add
it to the address output, no matter what. This special case is necessary to
cover addresses that don't really refer to an existing object.
When an address has both the `addr:street` and `addr:place` tag, then Nominatim
assumes that the `addr:place` tag in fact should be the city part of the address
and give the POI the usual street number address.
Dependent places are only added to the global search index `search_name` when
they have either a name themselves or when they have address tags that are not
covered by the places that make up their address. The latter ensures that
addresses are always searchable by those address tags.

View File

@@ -1,45 +0,0 @@
# Postcodes in Nominatim
The blog post
[Nominatim and Postcodes](https://www.openstreetmap.org/user/lonvia/diary/43143)
describes the handling implemented since Nominatim 3.1.
Postcode centroids (aka 'calculated postcodes') are generated by looking at all
postcodes of a country, grouping them and calculating the geometric centroid.
There is currently no logic to deal with extreme outliers (typos or other
mistakes in OSM data). There is also no check if a postcodes adheres to a
country's format, e.g. if Swiss postcodes are 4 digits.
## Regular updating calculated postcodes
The script to rerun the calculation is
`build/utils/update.php --calculate-postcodes`
and runs once per night on nominatim.openstreetmap.org.
## Finding places that share a specific postcode
In the Nominatim database run
```sql
SELECT address->'postcode' as pc,
osm_type, osm_id, class, type,
st_x(centroid) as lon, st_y(centroid) as lat
FROM placex
WHERE country_code='fr'
AND upper(trim (both ' ' from address->'postcode')) = '33210';
```
Alternatively on [Overpass](https://overpass-turbo.eu/) run the following query
```
[out:json][timeout:250];
area["name"="France"]->.boundaryarea;
(
nwr(area.boundaryarea)["addr:postcode"="33210"];
);
out body;
>;
out skel qt;
```

View File

@@ -21,14 +21,15 @@ This test directory is sturctured as follows:
| +- api Tests for API endpoints (search, reverse, etc.)
|
+- php PHP unit tests
+- scenes Geometry test data
+- python Python unit tests
+- testdb Base data for generating API test database
+- testdata Additional test data used by unit tests
```
## PHP Unit Tests (`test/php`)
Unit tests can be found in the php/ directory. They test selected php functions.
Very low coverage.
Unit tests for PHP code can be found in the `php/` directory. They test selected
PHP functions. Very low coverage.
To execute the test suite run
@@ -36,11 +37,26 @@ To execute the test suite run
UNIT_TEST_DSN='pgsql:dbname=nominatim_unit_tests' phpunit ../
It will read phpunit.xml which points to the library, test path, bootstrap
strip and set other parameters.
strip and sets other parameters.
It will use (and destroy) a local database 'nominatim_unit_tests'. You can set
a different connection string with e.g. UNIT_TEST_DSN='pgsql:dbname=foo_unit_tests'.
## Python Unit Tests (`test/python`)
Unit tests for Python code can be found in the `python/` directory. The goal is
to have complete coverage of the Python library in `nominatim`.
To execute the tests run
py.test-3 test/python
or
pytest test/python
The name of the pytest binary depends on your installation.
## BDD Functional Tests (`test/bdd`)
Functional tests are written as BDD instructions. For more information on
@@ -67,17 +83,19 @@ The tests can be configured with a set of environment variables (`behave -D key=
the test databases (db tests)
* `TEST_DB` - name of test database (db tests)
* `API_TEST_DB` - name of the database containing the API test data (api tests)
* `API_TEST_FILE` - OSM file to be imported into the API test database (api tests)
* `API_ENGINE` - webframe to use for running search queries, same values as
`nominatim serve --engine` parameter
* `DB_HOST` - (optional) hostname of database host
* `DB_PORT` - (optional) port of database on host
* `DB_USER` - (optional) username of database login
* `DB_PASS` - (optional) password for database login
* `SERVER_MODULE_PATH` - (optional) path on the Postgres server to Nominatim
module shared library file
* `TEST_SETTINGS_TEMPLATE` - file to write temporary Nominatim settings to
* `REMOVE_TEMPLATE` - if true, the template database will not be reused during
the next run. Reusing the base templates speeds up tests
considerably but might lead to outdated errors for some
changes in the database layout.
* `REMOVE_TEMPLATE` - if true, the template and API database will not be reused
during the next run. Reusing the base templates speeds
up tests considerably but might lead to outdated errors
for some changes in the database layout.
* `KEEP_TEST_DB` - if true, the test database will not be dropped after a test
is finished. Should only be used if one single scenario is
run, otherwise the result is undefined.
@@ -89,25 +107,22 @@ feature of behave which comes in handy when writing new tests.
### API Tests (`test/bdd/api`)
These tests are meant to test the different API endpoints and their parameters.
They require to import several datasets into a test database.
See the [Development Setup chapter](Development-Environment.md#preparing-the-test-database)
for instructions on how to set up this database.
They require to import several datasets into a test database. This is normally
done automatically during setup of the test. The API test database is then
kept around and reused in subsequent runs of behave. Use `behave -DREMOVE_TEMPLATE`
to force a reimport of the database.
The official test dataset was derived from the 180924 planet (note: such
file no longer exists at https://planet.openstreetmap.org/planet/2018/).
Newer planets are likely to work as well but you may see isolated test
failures where the data has changed.
The official test dataset is saved in the file `test/testdb/apidb-test-data.pbf`
and compromises the following data:
The official test dataset can always be downloaded from
[nominatim.org](https://www.nominatim.org/data/test/nominatim-api-testdata.pbf)
To recreate the input data for the test database run:
* Geofabrik extract of Liechtenstein
* extract of Autauga country, Alabama, US (for tests against Tiger data)
* additional data from `test/testdb/additional_api_test.data.osm`
```
wget https://ftp5.gwdg.de/pub/misc/openstreetmap/planet.openstreetmap.org/pbf/planet-180924.osm.pbf
osmconvert planet-180924.osm.pbf -B=test/testdb/testdb.polys -o=testdb.pbf
```
API tests should only be testing the functionality of the website PHP code.
Most tests should be formulated as BDD DB creation tests (see below) instead.
#### Code Coverage
#### Code Coverage (PHP engine only)
The API tests also support code coverage tests. You need to install
[PHP_CodeCoverage](https://github.com/sebastianbergmann/php-code-coverage).

354
docs/develop/Tokenizers.md Normal file
View File

@@ -0,0 +1,354 @@
# Tokenizers
The tokenizer is the component of Nominatim that is responsible for
analysing names of OSM objects and queries. Nominatim provides different
tokenizers that use different strategies for normalisation. This page describes
how tokenizers are expected to work and the public API that needs to be
implemented when creating a new tokenizer. For information on how to configure
a specific tokenizer for a database see the
[tokenizer chapter in the Customization Guide](../customize/Tokenizers.md).
## Generic Architecture
### About Search Tokens
Search in Nominatim is organised around search tokens. Such a token represents
string that can be part of the search query. Tokens are used so that the search
index does not need to be organised around strings. Instead the database saves
for each place which tokens match this place's name, address, house number etc.
To be able to distinguish between these different types of information stored
with the place, a search token also always has a certain type: name, house number,
postcode etc.
During search an incoming query is transformed into a ordered list of such
search tokens (or rather many lists, see below) and this list is then converted
into a database query to find the right place.
It is the core task of the tokenizer to create, manage and assign the search
tokens. The tokenizer is involved in two distinct operations:
* __at import time__: scanning names of OSM objects, normalizing them and
building up the list of search tokens.
* __at query time__: scanning the query and returning the appropriate search
tokens.
### Importing
The indexer is responsible to enrich an OSM object (or place) with all data
required for geocoding. It is split into two parts: the controller collects
the places that require updating, enriches the place information as required
and hands the place to Postgresql. The collector is part of the Nominatim
library written in Python. Within Postgresql, the `placex_update`
trigger is responsible to fill out all secondary tables with extra geocoding
information. This part is written in PL/pgSQL.
The tokenizer is involved in both parts. When the indexer prepares a place,
it hands it over to the tokenizer to inspect the names and create all the
search tokens applicable for the place. This usually involves updating the
tokenizer's internal token lists and creating a list of all token IDs for
the specific place. This list is later needed in the PL/pgSQL part where the
indexer needs to add the token IDs to the appropriate search tables. To be
able to communicate the list between the Python part and the pl/pgSQL trigger,
the `placex` table contains a special JSONB column `token_info` which is there
for the exclusive use of the tokenizer.
The Python part of the tokenizer returns a structured information about the
tokens of a place to the indexer which converts it to JSON and inserts it into
the `token_info` column. The content of the column is then handed to the PL/pqSQL
callbacks of the tokenizer which extracts the required information. Usually
the tokenizer then removes all information from the `token_info` structure,
so that no information is ever persistently saved in the table. All information
that went in should have been processed after all and put into secondary tables.
This is however not a hard requirement. If the tokenizer needs to store
additional information about a place permanently, it may do so in the
`token_info` column. It just may never execute searches over it and
consequently not create any special indexes on it.
### Querying
At query time, Nominatim builds up multiple _interpretations_ of the search
query. Each of these interpretations is tried against the database in order
of the likelihood with which they match to the search query. The first
interpretation that yields results wins.
The interpretations are encapsulated in the `SearchDescription` class. An
instance of this class is created by applying a sequence of
_search tokens_ to an initially empty SearchDescription. It is the
responsibility of the tokenizer to parse the search query and derive all
possible sequences of search tokens. To that end the tokenizer needs to parse
the search query and look up matching words in its own data structures.
## Tokenizer API
The following section describes the functions that need to be implemented
for a custom tokenizer implementation.
!!! warning
This API is currently in early alpha status. While this API is meant to
be a public API on which other tokenizers may be implemented, the API is
far away from being stable at the moment.
### Directory Structure
Nominatim expects two files for a tokenizer:
* `nominatim/tokenizer/<NAME>_tokenizer.py` containing the Python part of the
implementation
* `lib-php/tokenizer/<NAME>_tokenizer.php` with the PHP part of the
implementation
where `<NAME>` is a unique name for the tokenizer consisting of only lower-case
letters, digits and underscore. A tokenizer also needs to install some SQL
functions. By convention, these should be placed in `lib-sql/tokenizer`.
If the tokenizer has a default configuration file, this should be saved in
the `settings/<NAME>_tokenizer.<SUFFIX>`.
### Configuration and Persistence
Tokenizers may define custom settings for their configuration. All settings
must be prefixed with `NOMINATIM_TOKENIZER_`. Settings may be transient or
persistent. Transient settings are loaded from the configuration file when
Nominatim is started and may thus be changed at any time. Persistent settings
are tied to a database installation and must only be read during installation
time. If they are needed for the runtime then they must be saved into the
`nominatim_properties` table and later loaded from there.
### The Python module
The Python module is expect to export a single factory function:
```python
def create(dsn: str, data_dir: Path) -> AbstractTokenizer
```
The `dsn` parameter contains the DSN of the Nominatim database. The `data_dir`
is a directory in the project directory that the tokenizer may use to save
database-specific data. The function must return the instance of the tokenizer
class as defined below.
### Python Tokenizer Class
All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
and implement the abstract functions defined there.
::: nominatim.tokenizer.base.AbstractTokenizer
options:
heading_level: 6
### Python Analyzer Class
::: nominatim.tokenizer.base.AbstractAnalyzer
options:
heading_level: 6
### PL/pgSQL Functions
The tokenizer must provide access functions for the `token_info` column
to the indexer which extracts the necessary information for the global
search tables. If the tokenizer needs additional SQL functions for private
use, then these functions must be prefixed with `token_` in order to ensure
that there are no naming conflicts with the SQL indexer code.
The following functions are expected:
```sql
FUNCTION token_get_name_search_tokens(info JSONB) RETURNS INTEGER[]
```
Return an array of token IDs of search terms that should match
the name(s) for the given place. These tokens are used to look up the place
by name and, where the place functions as part of an address for another place,
by address. Must return NULL when the place has no name.
```sql
FUNCTION token_get_name_match_tokens(info JSONB) RETURNS INTEGER[]
```
Return an array of token IDs of full names of the place that should be used
to match addresses. The list of match tokens is usually more strict than
search tokens as it is used to find a match between two OSM tag values which
are expected to contain matching full names. Partial terms should not be
used for match tokens. Must return NULL when the place has no name.
```sql
FUNCTION token_get_housenumber_search_tokens(info JSONB) RETURNS INTEGER[]
```
Return an array of token IDs of house number tokens that apply to the place.
Note that a place may have multiple house numbers, for example when apartments
each have their own number. Must be NULL when the place has no house numbers.
```sql
FUNCTION token_normalized_housenumber(info JSONB) RETURNS TEXT
```
Return the house number(s) in the normalized form that can be matched against
a house number token text. If a place has multiple house numbers they must
be listed with a semicolon as delimiter. Must be NULL when the place has no
house numbers.
```sql
FUNCTION token_is_street_address(info JSONB) RETURNS BOOLEAN
```
Return true if this is an object that should be parented against a street.
Only relevant for objects with address rank 30.
```sql
FUNCTION token_has_addr_street(info JSONB) RETURNS BOOLEAN
```
Return true if there are street names to match against for finding the
parent of the object.
```sql
FUNCTION token_has_addr_place(info JSONB) RETURNS BOOLEAN
```
Return true if there are place names to match against for finding the
parent of the object.
```sql
FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[]) RETURNS BOOLEAN
```
Check if the given tokens (previously saved from `token_get_name_match_tokens()`)
match against the `addr:street` tag name. Must return either NULL or FALSE
when the place has no `addr:street` tag.
```sql
FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[]) RETURNS BOOLEAN
```
Check if the given tokens (previously saved from `token_get_name_match_tokens()`)
match against the `addr:place` tag name. Must return either NULL or FALSE
when the place has no `addr:place` tag.
```sql
FUNCTION token_addr_place_search_tokens(info JSONB) RETURNS INTEGER[]
```
Return the search token IDs extracted from the `addr:place` tag. These tokens
are used for searches by address when no matching place can be found in the
database. Must be NULL when the place has no `addr:place` tag.
```sql
FUNCTION token_get_address_keys(info JSONB) RETURNS SETOF TEXT
```
Return the set of keys for which address information is provided. This
should correspond to the list of (relevant) `addr:*` tags with the `addr:`
prefix removed or the keys used in the `address` dictionary of the place info.
```sql
FUNCTION token_get_address_search_tokens(info JSONB, key TEXT) RETURNS INTEGER[]
```
Return the array of search tokens for the given address part. `key` can be
expected to be one of those returned with `token_get_address_keys()`. The
search tokens are added to the address search vector of the place, when no
corresponding OSM object could be found for the given address part from which
to copy the name information.
```sql
FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
```
Check if the given tokens match against the address part `key`.
__Warning:__ the tokens that are handed in are the lists previously saved
from `token_get_name_search_tokens()`, _not_ from the match token list. This
is an historical oddity which will be fixed at some point in the future.
Currently, tokenizers are encouraged to make sure that matching works against
both the search token list and the match token list.
```sql
FUNCTION token_get_postcode(info JSONB) RETURNS TEXT
```
Return the postcode for the object, if any exists. The postcode must be in
the form that should also be presented to the end-user.
```sql
FUNCTION token_strip_info(info JSONB) RETURNS JSONB
```
Return the part of the `token_info` field that should be stored in the database
permanently. The indexer calls this function when all processing is done and
replaces the content of the `token_info` column with the returned value before
the trigger stores the information in the database. May return NULL if no
information should be stored permanently.
### PHP Tokenizer class
The PHP tokenizer class is instantiated once per request and responsible for
analyzing the incoming query. Multiple requests may be in flight in
parallel.
The class is expected to be found under the
name of `\Nominatim\Tokenizer`. To find the class the PHP code includes the file
`tokenizer/tokenizer.php` in the project directory. This file must be created
when the tokenizer is first set up on import. The file should initialize any
configuration variables by setting PHP constants and then require the file
with the actual implementation of the tokenizer.
The tokenizer class must implement the following functions:
```php
public function __construct(object &$oDB)
```
The constructor of the class receives a database connection that can be used
to query persistent data in the database.
```php
public function checkStatus()
```
Check that the tokenizer can access its persistent data structures. If there
is an issue, throw an `\Exception`.
```php
public function normalizeString(string $sTerm) : string
```
Normalize string to a form to be used for comparisons when reordering results.
Nominatim reweighs results how well the final display string matches the actual
query. Before comparing result and query, names and query are normalised against
this function. The tokenizer can thus remove all properties that should not be
taken into account for reweighing, e.g. special characters or case.
```php
public function tokensForSpecialTerm(string $sTerm) : array
```
Return the list of special term tokens that match the given term.
```php
public function extractTokensFromPhrases(array &$aPhrases) : TokenList
```
Parse the given phrases, splitting them into word lists and retrieve the
matching tokens.
The phrase array may take on two forms. In unstructured searches (using `q=`
parameter) the search query is split at the commas and the elements are
put into a sorted list. For structured searches the phrase array is an
associative array where the key designates the type of the term (street, city,
county etc.) The tokenizer may ignore the phrase type at this stage in parsing.
Matching phrase type and appropriate search token type will be done later
when the SearchDescription is built.
For each phrase in the list of phrases, the function must analyse the phrase
string and then call `setWordSets()` to communicate the result of the analysis.
A word set is a list of strings, where each string refers to a search token.
A phrase may have multiple interpretations. Therefore a list of word sets is
usually attached to the phrase. The search tokens themselves are returned
by the function in an associative array, where the key corresponds to the
strings given in the word sets. The value is a list of search tokens. Thus
a single string in the list of word sets may refer to multiple search tokens.

View File

@@ -0,0 +1,35 @@
@startuml
skinparam monochrome true
skinparam ObjectFontStyle bold
map search_name_X {
place_id => BIGINT
address_rank => SMALLINT
name_vector => INT[]
centroid => GEOMETRY
}
map location_area_large_X {
place_id => BIGINT
keywords => INT[]
partition => SMALLINT
rank_search => SMALLINT
rank_address => SMALLINT
country_code => VARCHR(2)
isguess => BOOLEAN
postcode => TEXT
centroid => POINT
geometry => GEOMETRY
}
map location_road_X {
place_id => BIGINT
partition => SMALLINT
country_code => VARCHR(2)
geometry => GEOMETRY
}
search_name_X -[hidden]> location_area_large_X
location_area_large_X -[hidden]> location_road_X
@enduml

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 11 KiB

View File

@@ -13,7 +13,7 @@ More details in [osm-search/country-grid-data](https://github.com/osm-search/cou
## US Census TIGER
For the United States you can choose to import additonal street-level data.
For the United States you can choose to import additional street-level data.
The data isn't mixed into OSM data but queried as fallback when no OSM
result can be found.

View File

@@ -0,0 +1,44 @@
@startuml
skinparam monochrome true
skinparam ObjectFontStyle bold
map planet_osm_nodes #eee {
id => BIGINT
lat => INT
lon => INT
}
map planet_osm_ways #eee {
id => BIGINT
nodes => BIGINT[]
tags => TEXT[]
}
map planet_osm_rels #eee {
id => BIGINT
parts => BIGINT[]
members => TEXT[]
tags => TEXT[]
way_off => SMALLINT
rel_off => SMALLINT
}
map place {
osm_type => CHAR(1)
osm_id => BIGINT
class => TEXT
type => TEXT
name => HSTORE
address => HSTORE
extratags => HSTORE
admin_level => SMALLINT
geometry => GEOMETRY
}
planet_osm_nodes -[hidden]> planet_osm_ways
planet_osm_ways -[hidden]> planet_osm_rels
planet_osm_ways -[hidden]-> place
planet_osm_nodes::id <- planet_osm_ways::nodes
@enduml

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 13 KiB

View File

@@ -0,0 +1,31 @@
@startuml
skinparam monochrome true
start
if (has 'addr:street'?) then (yes)
if (street with that name\n nearby?) then (yes)
:**Use closest street**
**with same name**;
kill
else (no)
:** Use closest**\n**street**;
kill
endif
elseif (has 'addr:place'?) then (yes)
if (place with that name\n nearby?) then (yes)
:**Use closest place**
**with same name**;
kill
else (no)
:add addr:place to adress;
:**Use closest place**\n**rank 16 to 25**;
kill
endif
else (otherwise)
:**Use closest**\n**street**;
kill
endif
@enduml

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 9.8 KiB

View File

@@ -0,0 +1,99 @@
@startuml
skinparam monochrome true
skinparam ObjectFontStyle bold
left to right direction
map placex {
place_id => BIGINT
osm_type => CHAR(1)
osm_id => BIGINT
class => TEXT
type => TEXT
name => HSTORE
address => HSTORE
extratags => HSTORE
admin_level => SMALLINT
partition => SMALLINT
geometry_sector => INT
parent_place_id => BIGINT
linked_place_id => BIGINT
importance => DOUBLE
rank_search => SMALLINT
rank_address => SMALLINT
wikipedia => TEXT
country_code => VARCHAR(2)
housenumber => TEXT
postcode => TEXT
indexed_status => SMALLINT
indexed_date => TIMESTAMP
centroid => GEOMETRY
geometry => GEOMETRY
}
map search_name {
place_id => BIGINT
importance => DOUBLE
search_rank => SMALLINT
address_rank => SMALLINT
name_vector => INT[]
nameaddress_vector => INT[]
country_code => VARCHAR(2)
centroid => GEOMETRY
}
map word {
word_id => INT
word_token => TEXT
... =>
}
map location_property_osmline {
place_id => BIGINT
osm_id => BIGINT
startnumber => INT
endnumber => INT
interpolationtype => TEXT
address => HSTORE
partition => SMALLINT
geometry_sector => INT
parent_place_id => BIGINT
country_code => VARCHAR(2)
postcode => text
indexed_status => SMALLINT
indexed_date => TIMESTAMP
linegeo => GEOMETRY
}
map place_addressline {
place_id => BIGINT
address_place_id => BIGINT
distance => DOUBLE
cached_rank_address => SMALLINT
fromarea => BOOLEAN
isaddress => BOOLEAN
}
map location_postcode {
place_id => BIGINT
postcode => TEXT
parent_place_id => BIGINT
rank_search => SMALLINT
rank_address => SMALLINT
indexed_status => SMALLINT
indexed_date => TIMESTAMP
geometry => GEOMETRY
}
placex::place_id <-- search_name::place_id
placex::place_id <-- place_addressline::place_id
placex::place_id <-- place_addressline::address_place_id
search_name::name_vector --> word::word_id
search_name::nameaddress_vector --> word::word_id
place_addressline -[hidden]> location_property_osmline
search_name -[hidden]> place_addressline
location_property_osmline -[hidden]-> location_postcode
@enduml

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 35 KiB

View File

@@ -2,6 +2,10 @@
display: none!important
}
.wy-nav-content {
max-width: 900px!important
}
table {
margin-bottom: 12pt
}
@@ -13,3 +17,20 @@ th, td {
th {
background-color: #eee;
}
.doc-object h6 {
margin-bottom: 0.8em;
font-size: 130%;
}
.doc-object {
margin-bottom: 1.3em;
}
.doc-children .doc-contents {
margin-left: 3em;
}
.md-footer__inner {
display: none;
}

View File

@@ -1,8 +1,15 @@
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and
address and to generate synthetic addresses of OSM points (reverse geocoding).
It has also limited capability to search features by their type
(pubs, hotels, churches, etc).
This guide comes in three parts:
This guide comes in five parts:
* __[API reference](api/Overview.md)__ for users of Nominatim
* __[Administration Guide](admin/Installation.md)__ for those who want
to install their own Nominatim server
* __[Customization Guide](customize/Overview.md)__ for those who want to
adapt their own installation to their special requirements
* __[Library Guide](library/Getting-Started.md)__ for Python developers who
want to use Nominatim as a library in their project
* __[Developer's Guide](develop/overview.md)__ for developers of the software

View File

@@ -0,0 +1,31 @@
# Configuration
When using Nominatim through the library, it can be configured in exactly
the same way as when running as a service. This means that you should have
created a [project directory](../admin/Import.md#creating-the-project-directory)
which contains all files belonging to the Nominatim instance. It can also contain
an `.env` file with configuration options. Setting configuration parameters
via environment variables works as well.
Configuration options are resolved in the following order:
* from the OS environment (or the dictionary given in `environ`,
(see NominatimAPI.md#nominatim.api.core.NominatimAPI.__init__)
* from the .env file in the project directory of the installation
* from the default installation in the configuration directory
For more information on configuration via dotenv and a list of possible
configuration parameters, see the [Configuration page](../customize/Settings.md).
## `Configuration` class
::: nominatim.config.Configuration
options:
members:
- get_bool
- get_int
- get_str_list
- get_path
heading_level: 6
show_signature_annotations: True

View File

@@ -0,0 +1,248 @@
# Getting Started
The Nominatim search frontend can directly be used as a Python library in
scripts and applications. When you have imported your own Nominatim database,
then it is no longer necessary to run a full web service for it and access
the database through http requests. There are
also less constraints on the kinds of data that can be accessed. The library
allows to get access to more detailed information about the objects saved
in the database.
!!! danger
The library interface is currently in an experimental stage. There might
be some smaller adjustments to the public interface until the next version.
The library also misses a proper installation routine, so some manipulation
of the PYTHONPATH is required. At the moment, use is only recommended for
developers wit some experience in Python.
## Installation
To use the Nominatim library, you need access to a local Nominatim database.
Follow the [installation](../admin/Installation.md) and
[import](../admin/Import.md) instructions to set up your database.
It is not yet possible to install it in the usual way via pip or inside a
virtualenv. To get access to the library you need to set an appropriate
`PYTHONPATH`. With the default installation, the python library can be found
under `/usr/local/share/nominatim/lib-python`. If you have installed
Nominatim under a different prefix, adapt the `/usr/local/` part accordingly.
You can also point the `PYTHONPATH` to the Nominatim source code.
### A simple search example
To query the Nominatim database you need to first set up a connection. This
is done by creating an Nominatim API object. This object exposes all the
search functions of Nominatim that are also known from its web API.
This code snippet implements a simple search for the town of 'Brugge':
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
async def search(query):
api = napi.NominatimAPIAsync(Path('.'))
return await api.search(query)
results = asyncio.run(search('Brugge'))
if not results:
print('Cannot find Brugge')
else:
print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
api = napi.NominatimAPI(Path('.'))
results = api.search('Brugge')
if not results:
print('Cannot find Brugge')
else:
print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
```
The Nominatim library is designed around
[asyncio](https://docs.python.org/3/library/asyncio.html). `NominatimAPIAsync`
provides you with an interface of coroutines.
If you have many requests to make, coroutines can speed up your applications
significantly.
For smaller scripts there is also a synchronous wrapper around the API. By
using `NominatimAPI`, you get exactly the same interface using classic functions.
The examples in this chapter will always show-case both
implementations. The documentation itself will usually refer only to
'Nominatim API class' when both flavours are meant. If a functionality is
available only for the synchronous or asynchronous version, this will be
explicitly mentioned.
### Defining which database to use
The [Configuration](../admin/Import.md#configuration-setup-in-env)
section explains how Nominatim is configured using the
[dotenv](https://github.com/theskumar/python-dotenv) library.
The same configuration mechanism is used with the
Nominatim API library. You should therefore be sure you are familiar with
the section.
The constructor of the 'Nominatim API class' takes one mandatory parameter:
the path to the [project directory](../admin/Import.md#creating-the-project-directory).
You should have set up this directory as part of the Nominatim import.
Any configuration found in the `.env` file in this directory will automatically
used.
Yo may also configure Nominatim be setting environment variables.
Normally, Nominatim will check the operating system environment. This can be
overwritten by giving the constructor a dictionary of configuration parameters.
Let us look up 'Brugge' in the special database named 'belgium' instead of the
standard 'nominatim' database:
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
config_params = {
'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
}
async def search(query):
api = napi.NominatimAPIAsync(Path('.'), environ=config_params)
return await api.search(query)
results = asyncio.run(search('Brugge'))
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
config_params = {
'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
}
api = napi.NominatimAPI(Path('.'), environ=config_params)
results = api.search('Brugge')
```
### Presenting results to humans
All search functions return the raw results from the database. There is no
full human-readable label. To create such a label, you need two things:
* the address details of the place
* adapt the result to the language you wish to use for display
Again searching for 'Brugge', this time with a nicely formatted result:
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
async def search(query):
api = napi.NominatimAPIAsync(Path('.'))
return await api.search(query, address_details=True)
results = asyncio.run(search('Brugge'))
locale = napi.Locales(['fr', 'en'])
for i, result in enumerate(results):
address_parts = result.address_rows.localize(locale)
print(f"{i + 1}. {', '.join(address_parts)}")
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
api = napi.NominatimAPI(Path('.'))
results = api.search('Brugge', address_details=True)
locale = napi.Locales(['fr', 'en'])
for i, result in enumerate(results):
address_parts = result.address_rows.localize(locale)
print(f"{i + 1}. {', '.join(address_parts)}")
```
To request information about the address of a result, add the optional
parameter 'address_details' to your search:
``` python
>>> results = api.search('Brugge', address_details=True)
```
An additional field `address_rows` will set in results that are returned.
It contains a list of all places that make up the address of the place. For
simplicity, this includes name and house number of the place itself. With
the names in this list it is possible to create a human-readable description
of the result. To do that, you first need to decide in which language the
results should be presented. As with the names in the result itself, the
places in `address_rows` contain all possible name translation for each row.
The library has a helper class `Locale` which helps extracting a name of a
place in the preferred language. It takes a single parameter with a list
of language codes in the order of preference. So
``` python
locale = napi.Locale(['fr', 'en'])
```
creates a helper class that returns the name preferably in French. If that is
not possible, it tries English and eventually falls back to the default `name`
or `ref`.
The `Locale` object can be applied to a name dictionary to return the best-matching
name out of it:
``` python
>>> print(locale.display_name(results[0].names))
'Brugges'
```
The `address_row` field has a helper function to apply the function to all
its members and save the result in the `local_name` field. It also returns
all the localized names as a convenient simple list. This list can be used
to create a human-readable output:
``` python
>>> address_parts = results[0].address_rows.localize(locale)
>>> print(', '.join(address_parts))
Bruges, Flandre-Occidentale, Flandre, Belgique
```
This is a fairly simple way to create a human-readable description. The
place information in `address_rows` contains further information about each
place. For example, which OSM `adlin_level` was used, what category the place
belongs to or what rank Nominatim has assigned. Use this to adapt the output
to local address formats.
For more information on address rows, see
[detailed address description](Result-Handling.md#detailed-address-description).

View File

@@ -0,0 +1,62 @@
# Input Parameter Types
This page describes in more detail some of the input parameter types used
in the query functions of the API object.
## Place identification
The [details](NominatimAPI.md#nominatim.api.core.NominatimAPI.details) and
[lookup](NominatimAPI.md#nominatim.api.core.NominatimAPI.lookup) functions
require references to places in the database. Below the possible
types for place identification are listed. All types are dataclasses.
### PlaceID
::: nominatim.api.PlaceID
options:
heading_level: 6
### OsmID
::: nominatim.api.OsmID
options:
heading_level: 6
## Geometry types
::: nominatim.api.GeometryFormat
options:
heading_level: 6
members_order: source
## Geometry input
### Point
::: nominatim.api.Point
options:
heading_level: 6
show_signature_annotations: True
### Bbox
::: nominatim.api.Bbox
options:
heading_level: 6
show_signature_annotations: True
members_order: source
group_by_category: False
## Layers
Layers allow to restrict the search result to thematic groups. This is
orthogonal to restriction by address ranks, which groups places by their
geographic extent.
::: nominatim.api.DataLayer
options:
heading_level: 6
members_order: source

View File

@@ -0,0 +1,56 @@
# Low-level connections
The `NominatimAPIAsync` class allows to directly access the underlying
database connection to explore the raw data. Nominatim uses
[SQLAlchemy](https://docs.sqlalchemy.org/) for building queries. Please
refer to the documentation of the library to understand how to write SQL.
To get access to a search connection, use the `begin()` function of your
API object. This returns a `SearchConnection` object described below
wrapped in a context manager. Its
`t` property has definitions for all Nominatim search tables. For an
overview of available tables, refer to the
[Development Layout](../develop/Database-Layout.md) in in the development
chapter. Note that only tables that are needed for search are accessible
as SQLAlchemy tables.
!!! warning
The database layout is not part of the API definition and may change
without notice. If you play with the low-level access functions, you
need to be prepared for such changes.
Here is a simple example, which prints how many places are available in
the placex table:
```
import asyncio
from pathlib import Path
import sqlalchemy as sa
from nominatim.api import NominatimAPIAsync
async def print_table_size():
api = NominatimAPIAsync(Path('.'))
async with api.begin() as conn:
cnt = await conn.scalar(sa.select(sa.func.count()).select_from(conn.t.placex))
print(f'placex table has {cnt} rows.')
asyncio.run(print_table_size())
```
!!! warning
Low-level connections may only be used to read data from the database.
Do not use it to add or modify data or you might break Nominatim's
normal functions.
## SearchConnection class
::: nominatim.api.SearchConnection
options:
members:
- scalar
- execute
- get_class_table
- get_db_property
- get_property
heading_level: 6

View File

@@ -0,0 +1,36 @@
# The Nominatim API classes
The API classes are the core object of the search library. Always instantiate
one of these classes first. The API classes are **not threadsafe**. You need
to instantiate a separate instance for each thread.
### NominatimAPI
::: nominatim.api.NominatimAPI
options:
members:
- __init__
- config
- close
- status
- details
- lookup
- reverse
- search
- search_address
- search_category
heading_level: 6
group_by_category: False
### NominatimAPIAsync
::: nominatim.api.NominatimAPIAsync
options:
members:
- __init__
- setup_database
- close
- begin
heading_level: 6
group_by_category: False

View File

@@ -0,0 +1,58 @@
# Result handling
The search functions of the Nominatim API always return a result object
with the raw information about the place that is available in the
database. This section discusses data types used in the results and utility
functions that allow further processing of the results.
## Result fields
### Sources
Nominatim takes the result data from multiple sources. The `source_table` field
in the result describes, from which source the result was retrieved.
::: nominatim.api.SourceTable
options:
heading_level: 6
members_order: source
### Detailed address description
When the `address_details` parameter is set, then functions return not
only information about the result place but also about the place that
make up the address. This information is almost always required when you
want to present the user with a human-readable description of the result.
See also [Localization](#localization) below.
The address details are available in the `address_rows` field as a ordered
list of `AddressLine` objects with the country information last. The list also
contains the result place itself and some artificial entries, for example,
for the house number or the country code. This makes processing and creating
a full address easier.
::: nominatim.api.AddressLine
options:
heading_level: 6
members_order: source
### Detailed search terms
The `details` function can return detailed information about which search terms
may be used to find a place, when the `keywords` parameter is set. Search
terms are split into terms for the name of the place and search terms for
its address.
::: nominatim.api.WordInfo
options:
heading_level: 6
## Localization
Results are always returned with the full list of available names.
### Locale
::: nominatim.api.Locales
options:
heading_level: 6

View File

@@ -1,9 +1,13 @@
site_name: Nominatim Documentation
theme: readthedocs
site_name: Nominatim Manual
theme:
name: material
features:
- navigation.tabs
copyright: Copyright &copy; Nominatim developer community
docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
site_url: https://nominatim.org
repo_url: https://github.com/openstreetmap/Nominatim
pages:
nav:
- 'Introduction' : 'index.md'
- 'API Reference':
- 'Overview': 'api/Overview.md'
@@ -18,27 +22,59 @@ pages:
- 'Basic Installation': 'admin/Installation.md'
- 'Import' : 'admin/Import.md'
- 'Update' : 'admin/Update.md'
- 'Deploy' : 'admin/Deployment.md'
- 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
- 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
- 'Maintenance' : 'admin/Maintenance.md'
- 'Migration from older Versions' : 'admin/Migration.md'
- 'Troubleshooting' : 'admin/Faq.md'
- 'Customization Guide':
- 'Overview': 'customize/Overview.md'
- 'Import Styles': 'customize/Import-Styles.md'
- 'Configuration Settings': 'customize/Settings.md'
- 'Per-Country Data': 'customize/Country-Settings.md'
- 'Place Ranking' : 'customize/Ranking.md'
- 'Importance' : 'customize/Importance.md'
- 'Tokenizers' : 'customize/Tokenizers.md'
- 'Special Phrases': 'customize/Special-Phrases.md'
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
- 'External data: Postcodes': 'customize/Postcodes.md'
- 'Library Guide':
- 'Getting Started': 'library/Getting-Started.md'
- 'Nominatim API class': 'library/NominatimAPI.md'
- 'Configuration': 'library/Configuration.md'
- 'Input Parameter Types': 'library/Input-Parameter-Types.md'
- 'Result Handling': 'library/Result-Handling.md'
- 'Low-level DB Access': 'library/Low-Level-DB-Access.md'
- 'Developers Guide':
- 'Setup for Development' : 'develop/Development-Environment.md'
- 'Architecture Overview' : 'develop/overview.md'
- 'OSM Data Import' : 'develop/Import.md'
- 'Place Ranking' : 'develop/Ranking.md'
- 'Postcodes' : 'develop/Postcodes.md'
- 'Database Layout' : 'develop/Database-Layout.md'
- 'Indexing' : 'develop/Indexing.md'
- 'Tokenizers' : 'develop/Tokenizers.md'
- 'Custom modules for ICU tokenizer': 'develop/ICU-Tokenizer-Modules.md'
- 'Setup for Development' : 'develop/Development-Environment.md'
- 'Testing' : 'develop/Testing.md'
- 'External Data Sources': 'develop/data-sources.md'
- 'Appendix':
- 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
- 'Installation on CentOS 8' : 'appendix/Install-on-Centos-8.md'
- 'Installation on Ubuntu 18' : 'appendix/Install-on-Ubuntu-18.md'
- 'Installation on Ubuntu 20' : 'appendix/Install-on-Ubuntu-20.md'
- 'Installation on Ubuntu 22' : 'appendix/Install-on-Ubuntu-22.md'
markdown_extensions:
- codehilite
- admonition
- pymdownx.superfences
- pymdownx.tabbed:
alternate_style: true
- def_list
- toc:
permalink:
extra_css: [extra.css, styles.css]
plugins:
- search
- mkdocstrings:
handlers:
python:
paths: ["${PROJECT_SOURCE_DIR}"]
options:
show_source: False
show_bases: False

View File

@@ -1,8 +1,16 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
require_once(CONST_BasePath.'/lib/ClassTypes.php');
require_once(CONST_LibDir.'/ClassTypes.php');
/**
* Detailed list of address parts for a single result
@@ -61,7 +69,7 @@ class AddressDetails
return join(', ', $aParts);
}
public function getAddressNames($sCountry = null)
public function getAddressNames()
{
$aAddress = array();
@@ -79,12 +87,14 @@ class AddressDetails
$sName = $aLine['housenumber'];
}
if (isset($sName)) {
$sTypeLabel = strtolower(str_replace(' ', '_', $sTypeLabel));
if (!isset($aAddress[$sTypeLabel])
|| $aLine['class'] == 'place'
) {
$aAddress[$sTypeLabel] = $sName;
if (isset($sName)
&& (!isset($aAddress[$sTypeLabel])
|| $aLine['class'] == 'place')
) {
$aAddress[$sTypeLabel] = $sName;
if (!empty($aLine['name'])) {
$this->addSubdivisionCode($aAddress, $aLine['admin_level'], $aLine['name']);
}
}
}
@@ -168,4 +178,14 @@ class AddressDetails
{
return $this->aAddressLines;
}
private function addSubdivisionCode(&$aAddress, $iAdminLevel, $nameDetails)
{
if (is_string($nameDetails)) {
$nameDetails = json_decode('{' . str_replace('"=>"', '":"', $nameDetails) . '}', true);
}
if (!empty($nameDetails['ISO3166-2'])) {
$aAddress["ISO3166-2-lvl$iAdminLevel"] = $nameDetails['ISO3166-2'];
}
}
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim\ClassTypes;

View File

@@ -1,8 +1,16 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
require_once(CONST_BasePath.'/lib/DatabaseError.php');
require_once(CONST_LibDir.'/DatabaseError.php');
/**
* Uses PDO to access the database specified in the CONST_Database_DSN
@@ -12,9 +20,9 @@ class DB
{
protected $connection;
public function __construct($sDSN = CONST_Database_DSN)
public function __construct($sDSN = null)
{
$this->sDSN = $sDSN;
$this->sDSN = $sDSN ?? getSetting('DATABASE_DSN');
}
public function connect($bNew = false, $bPersistent = true)
@@ -30,18 +38,25 @@ class DB
// https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
try {
$conn = new \PDO($this->sDSN, null, null, $aConnOptions);
$this->connection = new \PDO($this->sDSN, null, null, $aConnOptions);
} catch (\PDOException $e) {
$sMsg = 'Failed to establish database connection:' . $e->getMessage();
throw new \Nominatim\DatabaseError($sMsg, 500, null, $e->getMessage());
}
$conn->exec("SET DateStyle TO 'sql,european'");
$conn->exec("SET client_encoding TO 'utf-8'");
$this->connection->exec("SET DateStyle TO 'sql,european'");
$this->connection->exec("SET client_encoding TO 'utf-8'");
// Disable JIT and parallel workers. They interfere badly with search SQL.
$this->connection->exec('SET max_parallel_workers_per_gather TO 0');
if ($this->getPostgresVersion() >= 11) {
$this->connection->exec('SET jit_above_cost TO -1');
}
$iMaxExecution = ini_get('max_execution_time');
if ($iMaxExecution > 0) $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
if ($iMaxExecution > 0) {
$this->connection->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
}
$this->connection = $conn;
return true;
}
@@ -95,7 +110,9 @@ class DB
try {
$stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
$row = $stmt->fetch(\PDO::FETCH_NUM);
if ($row === false) return false;
if ($row === false) {
return false;
}
} catch (\PDOException $e) {
throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
}
@@ -240,16 +257,6 @@ class DB
return ($this->getOne($sSQL, array(':tablename' => $sTableName)) == 1);
}
/**
* Returns a list of table names in the database
*
* @return array[]
*/
public function getListOfTables()
{
return $this->getCol("SELECT tablename FROM pg_tables WHERE schemaname='public'");
}
/**
* Deletes a table. Returns true if deleted or didn't exist.
*
@@ -262,76 +269,6 @@ class DB
return $this->exec('DROP TABLE IF EXISTS '.$sTableName.' CASCADE') == 0;
}
/**
* Check if an index exists in the database. Optional filtered by tablename
*
* @param string $sTableName
*
* @return boolean
*/
public function indexExists($sIndexName, $sTableName = null)
{
return in_array($sIndexName, $this->getListOfIndices($sTableName));
}
/**
* Returns a list of index names in the database, optional filtered by tablename
*
* @param string $sTableName
*
* @return array
*/
public function getListOfIndices($sTableName = null)
{
// table_name | index_name | column_name
// -----------------------+---------------------------------+--------------
// country_name | idx_country_name_country_code | country_code
// country_osm_grid | idx_country_osm_grid_geometry | geometry
// import_polygon_delete | idx_import_polygon_delete_osmid | osm_id
// import_polygon_delete | idx_import_polygon_delete_osmid | osm_type
// import_polygon_error | idx_import_polygon_error_osmid | osm_id
// import_polygon_error | idx_import_polygon_error_osmid | osm_type
$sSql = <<< END
SELECT
t.relname as table_name,
i.relname as index_name,
a.attname as column_name
FROM
pg_class t,
pg_class i,
pg_index ix,
pg_attribute a
WHERE
t.oid = ix.indrelid
and i.oid = ix.indexrelid
and a.attrelid = t.oid
and a.attnum = ANY(ix.indkey)
and t.relkind = 'r'
and i.relname NOT LIKE 'pg_%'
FILTERS
ORDER BY
t.relname,
i.relname,
a.attname
END;
$aRows = null;
if ($sTableName) {
$sSql = str_replace('FILTERS', 'and t.relname = :tablename', $sSql);
$aRows = $this->getAll($sSql, array(':tablename' => $sTableName));
} else {
$sSql = str_replace('FILTERS', '', $sSql);
$aRows = $this->getAll($sSql);
}
$aIndexNames = array_unique(array_map(function ($aRow) {
return $aRow['index_name'];
}, $aRows));
sort($aIndexNames);
return $aIndexNames;
}
/**
* Tries to connect to the database but on failure doesn't throw an exception.
*
@@ -386,9 +323,13 @@ END;
if (preg_match('/^pgsql:(.+)$/', $sDSN, $aMatches)) {
foreach (explode(';', $aMatches[1]) as $sKeyVal) {
list($sKey, $sVal) = explode('=', $sKeyVal, 2);
if ($sKey == 'host') $sKey = 'hostspec';
if ($sKey == 'dbname') $sKey = 'database';
if ($sKey == 'user') $sKey = 'username';
if ($sKey == 'host') {
$sKey = 'hostspec';
} elseif ($sKey == 'dbname') {
$sKey = 'database';
} elseif ($sKey == 'user') {
$sKey = 'username';
}
$aInfo[$sKey] = $sVal;
}
}

View File

@@ -1,11 +1,19 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
class DatabaseError extends \Exception
{
public function __construct($message, $code = 500, Exception $previous = null, $oPDOErr, $sSql = null)
public function __construct($message, $code, $previous, $oPDOErr, $sSql = null)
{
parent::__construct($message, $code, $previous);
// https://secure.php.net/manual/en/class.pdoexception.php

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -78,7 +86,7 @@ class Debug
echo '<th>Address Tokens</th><th>Address Not</th>';
echo '<th>country</th><th>operator</th>';
echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
foreach ($aSearches as $iRank => $aRankedSet) {
foreach ($aSearches as $aRankedSet) {
foreach ($aRankedSet as $aRow) {
$aRow->dumpAsHtmlTableRow($aWordsIDs);
}
@@ -127,7 +135,7 @@ class Debug
public static function printSQL($sSQL)
{
echo '<p><tt><font color="#aaa">'.$sSQL.'</font></tt></p>'."\n";
echo '<p><tt><b>'.date('c').'</b> <font color="#aaa">'.htmlspecialchars($sSQL, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401).'</font></tt></p>'."\n";
}
private static function outputVar($mVar, $sPreNL)
@@ -170,11 +178,12 @@ class Debug
}
if (is_string($mVar)) {
echo "'$mVar'";
return strlen($mVar) + 2;
$sOut = "'$mVar'";
} else {
$sOut = (string)$mVar;
}
echo (string)$mVar;
return strlen((string)$mVar);
echo htmlspecialchars($sOut, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401);
return strlen($sOut);
}
}

19
lib-php/DebugNone.php Normal file
View File

@@ -0,0 +1,19 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
class Debug
{
public static function __callStatic($name, $arguments)
{
// nothing
}
}

View File

@@ -1,24 +1,34 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
require_once(CONST_BasePath.'/lib/PlaceLookup.php');
require_once(CONST_BasePath.'/lib/Phrase.php');
require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
require_once(CONST_BasePath.'/lib/SearchDescription.php');
require_once(CONST_BasePath.'/lib/SearchContext.php');
require_once(CONST_BasePath.'/lib/TokenList.php');
require_once(CONST_LibDir.'/PlaceLookup.php');
require_once(CONST_LibDir.'/Phrase.php');
require_once(CONST_LibDir.'/ReverseGeocode.php');
require_once(CONST_LibDir.'/SearchDescription.php');
require_once(CONST_LibDir.'/SearchContext.php');
require_once(CONST_LibDir.'/SearchPosition.php');
require_once(CONST_LibDir.'/TokenList.php');
require_once(CONST_TokenizerDir.'/tokenizer.php');
class Geocode
{
protected $oDB;
protected $oPlaceLookup;
protected $oTokenizer;
protected $aLangPrefOrder = array();
protected $aExcludePlaceIDs = array();
protected $bReverseInPlan = false;
protected $iLimit = 20;
protected $iFinalLimit = 10;
@@ -42,28 +52,12 @@ class Geocode
protected $sQuery = false;
protected $aStructuredQuery = false;
protected $oNormalizer = null;
public function __construct(&$oDB)
{
$this->oDB =& $oDB;
$this->oPlaceLookup = new PlaceLookup($this->oDB);
$this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
}
private function normTerm($sTerm)
{
if ($this->oNormalizer === null) {
return $sTerm;
}
return $this->oNormalizer->transliterate($sTerm);
}
public function setReverseInPlan($bReverse)
{
$this->bReverseInPlan = $bReverse;
$this->oTokenizer = new \Nominatim\Tokenizer($this->oDB);
}
public function setLanguagePreference($aLangPref)
@@ -85,7 +79,9 @@ class Geocode
$aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs);
}
if ($this->bBoundedSearch) $aParams['bounded'] = '1';
if ($this->bBoundedSearch) {
$aParams['bounded'] = '1';
}
if ($this->aCountryCodes) {
$aParams['countrycodes'] = implode(',', $this->aCountryCodes);
@@ -100,11 +96,14 @@ class Geocode
public function setLimit($iLimit = 10)
{
if ($iLimit > 50) $iLimit = 50;
if ($iLimit < 1) $iLimit = 1;
if ($iLimit > 50) {
$iLimit = 50;
} elseif ($iLimit < 1) {
$iLimit = 1;
}
$this->iFinalLimit = $iLimit;
$this->iLimit = $iLimit + min($iLimit, 10);
$this->iLimit = $iLimit + max($iLimit, 10);
}
public function setFeatureType($sFeatureType)
@@ -191,23 +190,29 @@ class Geocode
$this->bFallback = $oParams->getBool('fallback', $this->bFallback);
// List of excluded Place IDs - used for more acurate pageing
// List of excluded Place IDs - used for more accurate pageing
$sExcluded = $oParams->getStringList('exclude_place_ids');
if ($sExcluded) {
foreach ($sExcluded as $iExcludedPlaceID) {
$iExcludedPlaceID = (int)$iExcludedPlaceID;
if ($iExcludedPlaceID)
if ($iExcludedPlaceID) {
$aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID;
}
}
if (isset($aExcludePlaceIDs))
if (isset($aExcludePlaceIDs)) {
$this->aExcludePlaceIDs = $aExcludePlaceIDs;
}
}
// Only certain ranks of feature
$sFeatureType = $oParams->getString('featureType');
if (!$sFeatureType) $sFeatureType = $oParams->getString('featuretype');
if ($sFeatureType) $this->setFeatureType($sFeatureType);
if (!$sFeatureType) {
$sFeatureType = $oParams->getString('featuretype');
}
if ($sFeatureType) {
$this->setFeatureType($sFeatureType);
}
// Country code list
$sCountries = $oParams->getStringList('countrycodes');
@@ -217,8 +222,9 @@ class Geocode
$aCountries[] = strtolower($sCountryCode);
}
}
if (isset($aCountries))
if (isset($aCountries)) {
$this->aCountryCodes = $aCountries;
}
}
$aViewbox = $oParams->getStringList('viewboxlbrt');
@@ -262,7 +268,6 @@ class Geocode
$oParams->getString('country'),
$oParams->getString('postalcode')
);
$this->setReverseInPlan(false);
} else {
$this->setQuery($sQuery);
}
@@ -271,13 +276,17 @@ class Geocode
public function loadStructuredAddressElement($sValue, $sKey, $iNewMinAddressRank, $iNewMaxAddressRank, $aItemListValues)
{
$sValue = trim($sValue);
if (!$sValue) return false;
if (!$sValue) {
return false;
}
$this->aStructuredQuery[$sKey] = $sValue;
if ($this->iMinAddressRank == 0 && $this->iMaxAddressRank == 30) {
$this->iMinAddressRank = $iNewMinAddressRank;
$this->iMaxAddressRank = $iNewMaxAddressRank;
}
if ($aItemListValues) $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues);
if ($aItemListValues) {
$this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues);
}
return true;
}
@@ -311,11 +320,11 @@ class Geocode
public function fallbackStructuredQuery()
{
if (!$this->aStructuredQuery) return false;
$aParams = $this->aStructuredQuery;
if (count($aParams) == 1) return false;
if (!$aParams || count($aParams) == 1) {
return false;
}
$aOrderToFallback = array('postalcode', 'street', 'city', 'county', 'state');
@@ -330,7 +339,7 @@ class Geocode
return false;
}
public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured)
public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens)
{
/*
Calculate all searches using oValidTokens i.e.
@@ -345,52 +354,26 @@ class Geocode
*/
foreach ($aPhrases as $iPhrase => $oPhrase) {
$aNewPhraseSearches = array();
$sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
$oPosition = new SearchPosition(
$oPhrase->getPhraseType(),
$iPhrase,
count($aPhrases)
);
foreach ($oPhrase->getWordSets() as $aWordset) {
$aWordsetSearches = $aSearches;
// Add all words from this wordset
foreach ($aWordset as $iToken => $sToken) {
//echo "<br><b>$sToken</b>";
$aNewWordsetSearches = array();
$oPosition->setTokenPosition($iToken, count($aWordset));
foreach ($aWordsetSearches as $oCurrentSearch) {
//echo "<i>";
//var_dump($oCurrentSearch);
//echo "</i>";
// Tokens with full name matches.
foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
$aNewSearches = $oCurrentSearch->extendWithFullTerm(
$oSearchTerm,
$oValidTokens->contains($sToken)
&& strpos($sToken, ' ') === false,
$sPhraseType,
$iToken == 0 && $iPhrase == 0,
$iPhrase == 0,
$iToken + 1 == count($aWordset)
&& $iPhrase + 1 == count($aPhrases)
);
foreach ($aNewSearches as $oSearch) {
if ($oSearch->getRank() < $this->iMaxRank) {
$aNewWordsetSearches[] = $oSearch;
}
}
}
// Look for partial matches.
// Note that there is no point in adding country terms here
// because country is omitted in the address.
if ($sPhraseType != 'country') {
// Allow searching for a word - but at extra cost
foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
$aNewSearches = $oCurrentSearch->extendWithPartialTerm(
$sToken,
$oSearchTerm,
$bIsStructured,
$iPhrase,
$oValidTokens->get(' '.$sToken)
foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) {
$aNewSearches = $oSearchTerm->extendSearch(
$oCurrentSearch,
$oPosition
);
foreach ($aNewSearches as $oSearch) {
@@ -405,7 +388,6 @@ class Geocode
usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
$aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50);
}
//var_Dump('<hr>',count($aWordsetSearches)); exit;
$aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches);
usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
@@ -413,8 +395,11 @@ class Geocode
$aSearchHash = array();
foreach ($aNewPhraseSearches as $iSearch => $aSearch) {
$sHash = serialize($aSearch);
if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]);
else $aSearchHash[$sHash] = 1;
if (isset($aSearchHash[$sHash])) {
unset($aNewPhraseSearches[$iSearch]);
} else {
$aSearchHash[$sHash] = 1;
}
}
$aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50);
@@ -435,10 +420,12 @@ class Geocode
$iSearchCount = 0;
$aSearches = array();
foreach ($aGroupedSearches as $iScore => $aNewSearches) {
foreach ($aGroupedSearches as $aNewSearches) {
$iSearchCount += count($aNewSearches);
$aSearches = array_merge($aSearches, $aNewSearches);
if ($iSearchCount > 50) break;
if ($iSearchCount > 50) {
break;
}
}
}
@@ -495,7 +482,9 @@ class Geocode
public function lookup()
{
Debug::newFunction('Geocode::lookup');
if (!$this->sQuery && !$this->aStructuredQuery) return array();
if (!$this->sQuery && !$this->aStructuredQuery) {
return array();
}
Debug::printDebugArray('Geocode', $this);
@@ -520,25 +509,11 @@ class Geocode
Debug::newSection('Query Preprocessing');
$sNormQuery = $this->normTerm($this->sQuery);
Debug::printVar('Normalized query', $sNormQuery);
$sLanguagePrefArraySQL = $this->oDB->getArraySQL(
$this->oDB->getDBQuotedList($this->aLangPrefOrder)
);
$sQuery = $this->sQuery;
if (!preg_match('//u', $sQuery)) {
userError('Query string is not UTF-8 encoded.');
}
// Conflicts between US state abreviations and various words for 'the' in different languages
if (isset($this->aLangPrefOrder['name:en'])) {
$sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/i', '\1illinois\2', $sQuery);
$sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/i', '\1alabama\2', $sQuery);
$sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/i', '\1louisiana\2', $sQuery);
}
// Do we have anything that looks like a lat/lon pair?
$sQuery = $oCtx->setNearPointFromQuery($sQuery);
@@ -576,117 +551,62 @@ class Geocode
}
if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
$sSpecialTerm = pg_escape_string($sSpecialTerm);
$sToken = $this->oDB->getOne(
'SELECT make_standard_name(:term)',
array(':term' => $sSpecialTerm),
'Cannot decode query. Wrong encoding?'
);
$sSQL = 'SELECT class, type FROM word ';
$sSQL .= ' WHERE word_token in (\' '.$sToken.'\')';
$sSQL .= ' AND class is not null AND class not in (\'place\')';
$aTokens = $this->oTokenizer->tokensForSpecialTerm($sSpecialTerm);
Debug::printSQL($sSQL);
$aSearchWords = $this->oDB->getAll($sSQL);
$aNewSearches = array();
foreach ($aSearches as $oSearch) {
foreach ($aSearchWords as $aSearchTerm) {
$oNewSearch = clone $oSearch;
$oNewSearch->setPoiSearch(
Operator::TYPE,
$aSearchTerm['class'],
$aSearchTerm['type']
);
$aNewSearches[] = $oNewSearch;
if (!empty($aTokens)) {
$aNewSearches = array();
$oPosition = new SearchPosition('', 0, 1);
$oPosition->setTokenPosition(0, 1);
foreach ($aSearches as $oSearch) {
foreach ($aTokens as $oToken) {
$aNewSearches = array_merge(
$aNewSearches,
$oToken->extendSearch($oSearch, $oPosition)
);
}
}
$aSearches = $aNewSearches;
}
$aSearches = $aNewSearches;
}
// Split query into phrases
// Commas are used to reduce the search space by indicating where phrases split
$aPhrases = array();
if ($this->aStructuredQuery) {
$aInPhrases = $this->aStructuredQuery;
$bStructuredPhrases = true;
foreach ($this->aStructuredQuery as $iPhrase => $sPhrase) {
$aPhrases[] = new Phrase($sPhrase, $iPhrase);
}
} else {
$aInPhrases = explode(',', $sQuery);
$bStructuredPhrases = false;
foreach (explode(',', $sQuery) as $sPhrase) {
$aPhrases[] = new Phrase($sPhrase, '');
}
}
Debug::printDebugArray('Search context', $oCtx);
Debug::printDebugArray('Base search', empty($aSearches) ? null : $aSearches[0]);
Debug::printVar('Final query phrases', $aInPhrases);
// Convert each phrase to standard form
// Create a list of standard words
// Get all 'sets' of words
// Generate a complete list of all
Debug::newSection('Tokenization');
$aTokens = array();
$aPhrases = array();
foreach ($aInPhrases as $iPhrase => $sPhrase) {
$sPhrase = $this->oDB->getOne(
'SELECT make_standard_name(:phrase)',
array(':phrase' => $sPhrase),
'Cannot normalize query string (is it a UTF-8 string?)'
);
if (trim($sPhrase)) {
$oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
$oPhrase->addTokens($aTokens);
$aPhrases[] = $oPhrase;
}
}
Debug::printVar('Tokens', $aTokens);
$oValidTokens = new TokenList();
if (!empty($aTokens)) {
$oValidTokens->addTokensFromDB(
$this->oDB,
$aTokens,
$this->aCountryCodes,
$sNormQuery,
$this->oNormalizer
);
$oValidTokens = $this->oTokenizer->extractTokensFromPhrases($aPhrases);
if ($oValidTokens->count() > 0) {
$oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
} elseif (preg_match('/^ [0-9]+$/', $sToken)) {
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(
$sToken,
new Token\HouseNumber(null, trim($sToken))
);
}
}
}
$aPhrases = array_filter($aPhrases, function ($oPhrase) {
return $oPhrase->getWordSets() !== null;
});
// Any words that have failed completely?
// TODO: suggestions
Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
foreach ($aPhrases as $oPhrase) {
$oPhrase->computeWordSets($oValidTokens);
}
Debug::printDebugTable('Phrases', $aPhrases);
Debug::newSection('Search candidates');
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
if ($this->bReverseInPlan) {
if (!$this->aStructuredQuery) {
// Reverse phrase array and also reverse the order of the wordsets in
// the first and final phrase. Don't bother about phrases in the middle
// because order in the address doesn't matter.
@@ -695,18 +615,17 @@ class Geocode
if (count($aPhrases) > 1) {
$aPhrases[count($aPhrases)-1]->invertWordSets();
}
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false);
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aReverseGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) {
$aReverseGroupedSearches[$aSearch->getRank()] = array();
if (!isset($aGroupedSearches[$aSearch->getRank()])) {
$aGroupedSearches[$aSearch->getRank()] = array();
}
$aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch;
$aGroupedSearches[$aSearch->getRank()][] = $aSearch;
}
}
$aGroupedSearches = $aReverseGroupedSearches;
ksort($aGroupedSearches);
}
} else {
@@ -714,7 +633,9 @@ class Geocode
$aGroupedSearches = array();
foreach ($aSearches as $aSearch) {
if ($aSearch->getRank() < $this->iMaxRank) {
if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array();
if (!isset($aGroupedSearches[$aSearch->getRank()])) {
$aGroupedSearches[$aSearch->getRank()] = array();
}
$aGroupedSearches[$aSearch->getRank()][] = $aSearch;
}
}
@@ -728,7 +649,9 @@ class Geocode
$sHash = serialize($aSearch);
if (isset($aSearchHash[$sHash])) {
unset($aGroupedSearches[$iGroup][$iSearch]);
if (empty($aGroupedSearches[$iGroup])) unset($aGroupedSearches[$iGroup]);
if (empty($aGroupedSearches[$iGroup])) {
unset($aGroupedSearches[$iGroup]);
}
} else {
$aSearchHash[$sHash] = 1;
}
@@ -772,20 +695,27 @@ class Geocode
}
}
if ($iQueryLoop > 20) break;
if ($iQueryLoop > 20) {
break;
}
}
if (!empty($aResults)) {
$aSplitResults = Result::splitResults($aResults);
Debug::printVar('Split results', $aSplitResults);
if ($iGroupLoop <= 4 && empty($aSplitResults['tail'])
&& reset($aSplitResults['head'])->iResultRank > 0) {
if ($iGroupLoop <= 4
&& reset($aSplitResults['head'])->iResultRank > 0
&& $iGroupedRank !== array_key_last($aGroupedSearches)) {
// Haven't found an exact match for the query yet.
// Therefore add result from the next group level.
$aNextResults = $aSplitResults['head'];
foreach ($aNextResults as $oRes) {
$oRes->iResultRank--;
}
foreach ($aSplitResults['tail'] as $oRes) {
$oRes->iResultRank--;
$aNextResults[$oRes->iId] = $oRes;
}
$aResults = array();
} else {
$aResults = $aSplitResults['head'];
@@ -833,7 +763,6 @@ class Geocode
foreach ($aResults as $oResult) {
if (($this->iMaxAddressRank == 30 &&
($oResult->iTable == Result::TABLE_OSMLINE
|| $oResult->iTable == Result::TABLE_AUX
|| $oResult->iTable == Result::TABLE_TIGER))
|| in_array($oResult->iId, $aFilteredIDs)
) {
@@ -843,9 +772,9 @@ class Geocode
$aResults = $tempIDs;
}
if (!empty($aResults)) break;
if ($iGroupLoop > 4) break;
if ($iQueryLoop > 30) break;
if (!empty($aResults) || $iGroupLoop > 4 || $iQueryLoop > 30) {
break;
}
}
} else {
// Just interpret as a reverse geocode
@@ -863,10 +792,8 @@ class Geocode
// No results? Done
if (empty($aResults)) {
if ($this->bFallback) {
if ($this->fallbackStructuredQuery()) {
return $this->lookup();
}
if ($this->bFallback && $this->fallbackStructuredQuery()) {
return $this->lookup();
}
return array();
@@ -885,7 +812,9 @@ class Geocode
$aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery);
foreach ($aRecheckWords as $i => $sWord) {
if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]);
if (!preg_match('/[\pL\pN]/', $sWord)) {
unset($aRecheckWords[$i]);
}
}
Debug::printVar('Recheck words', $aRecheckWords);
@@ -914,7 +843,9 @@ class Geocode
$aResult['importance'] = 0.001;
$aResult['foundorder'] = $aResult['addressimportance'];
} else {
$aResult['importance'] = max(0.001, $aResult['importance']);
if ($aResult['importance'] == 0) {
$aResult['importance'] = 0.0001;
}
$aResult['importance'] *= $this->viewboxImportanceFactor(
$aResult['lon'],
$aResult['lat']
@@ -943,9 +874,11 @@ class Geocode
$iCountWords = 0;
$sAddress = $aResult['langaddress'];
foreach ($aRecheckWords as $i => $sWord) {
if (stripos($sAddress, $sWord)!==false) {
if (grapheme_stripos($sAddress, $sWord)!==false) {
$iCountWords++;
if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1;
if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) {
$iCountWords += 0.1;
}
}
}
@@ -962,15 +895,8 @@ class Geocode
$aToFilter = $aSearchResults;
$aSearchResults = array();
$bFirst = true;
foreach ($aToFilter as $aResult) {
$this->aExcludePlaceIDs[$aResult['place_id']] = $aResult['place_id'];
if ($bFirst) {
$fLat = $aResult['lat'];
$fLon = $aResult['lon'];
if (isset($aResult['zoom'])) $iZoom = $aResult['zoom'];
$bFirst = false;
}
if (!$this->oPlaceLookup->doDeDupe() || (!isset($aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']])
&& !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['type'].$aResult['name'].$aResult['admin_level']]))
) {
@@ -980,7 +906,9 @@ class Geocode
}
// Absolute limit on number of results
if (count($aSearchResults) >= $this->iFinalLimit) break;
if (count($aSearchResults) >= $this->iFinalLimit) {
break;
}
}
Debug::printVar('Post-filter results', $aSearchResults);
@@ -994,7 +922,6 @@ class Geocode
'Structured query' => $this->aStructuredQuery,
'Name keys' => Debug::fmtArrayVals($this->aLangPrefOrder),
'Excluded place IDs' => Debug::fmtArrayVals($this->aExcludePlaceIDs),
'Try reversed query'=> $this->bReverseInPlan,
'Limit (for searches)' => $this->iLimit,
'Limit (for results)'=> $this->iFinalLimit,
'Country codes' => Debug::fmtArrayVals($this->aCountryCodes),

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -14,7 +22,10 @@ class ParameterParser
public function getBool($sName, $bDefault = false)
{
if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
if (!isset($this->aParams[$sName])
|| !is_string($this->aParams[$sName])
|| strlen($this->aParams[$sName]) == 0
) {
return $bDefault;
}
@@ -23,7 +34,7 @@ class ParameterParser
public function getInt($sName, $bDefault = false)
{
if (!isset($this->aParams[$sName])) {
if (!isset($this->aParams[$sName]) || is_array($this->aParams[$sName])) {
return $bDefault;
}
@@ -36,7 +47,7 @@ class ParameterParser
public function getFloat($sName, $bDefault = false)
{
if (!isset($this->aParams[$sName])) {
if (!isset($this->aParams[$sName]) || is_array($this->aParams[$sName])) {
return $bDefault;
}
@@ -49,7 +60,10 @@ class ParameterParser
public function getString($sName, $bDefault = false)
{
if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
if (!isset($this->aParams[$sName])
|| !is_string($this->aParams[$sName])
|| strlen($this->aParams[$sName]) == 0
) {
return $bDefault;
}
@@ -58,11 +72,14 @@ class ParameterParser
public function getSet($sName, $aValues, $sDefault = false)
{
if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
if (!isset($this->aParams[$sName])
|| !is_string($this->aParams[$sName])
|| strlen($this->aParams[$sName]) == 0
) {
return $sDefault;
}
if (!in_array($this->aParams[$sName], $aValues)) {
if (!in_array($this->aParams[$sName], $aValues, true)) {
userError("Parameter '$sName' must be one of: ".join(', ', $aValues));
}
@@ -90,35 +107,43 @@ class ParameterParser
$aLanguages = array();
$sLangString = $this->getString('accept-language', $sFallback);
if ($sLangString) {
if (preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)) {
foreach ($aLanguagesParse as $iLang => $aLanguage) {
$aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100);
if (!isset($aLanguages[$aLanguage[2]])) $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10;
if ($sLangString
&& preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)
) {
foreach ($aLanguagesParse as $iLang => $aLanguage) {
$aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100);
if (!isset($aLanguages[$aLanguage[2]])) {
$aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10;
}
arsort($aLanguages);
}
arsort($aLanguages);
}
if (empty($aLanguages) && CONST_Default_Language) {
$aLanguages[CONST_Default_Language] = 1;
}
foreach ($aLanguages as $sLanguage => $fLanguagePref) {
$aLangPrefOrder['name:'.$sLanguage] = 'name:'.$sLanguage;
$this->addNameTag($aLangPrefOrder, 'name:'.$sLanguage);
}
$aLangPrefOrder['name'] = 'name';
$aLangPrefOrder['brand'] = 'brand';
$this->addNameTag($aLangPrefOrder, 'name');
$this->addNameTag($aLangPrefOrder, 'brand');
foreach ($aLanguages as $sLanguage => $fLanguagePref) {
$aLangPrefOrder['official_name:'.$sLanguage] = 'official_name:'.$sLanguage;
$aLangPrefOrder['short_name:'.$sLanguage] = 'short_name:'.$sLanguage;
$this->addNameTag($aLangPrefOrder, 'official_name:'.$sLanguage);
$this->addNameTag($aLangPrefOrder, 'short_name:'.$sLanguage);
}
$aLangPrefOrder['official_name'] = 'official_name';
$aLangPrefOrder['short_name'] = 'short_name';
$aLangPrefOrder['ref'] = 'ref';
$aLangPrefOrder['type'] = 'type';
$this->addNameTag($aLangPrefOrder, 'official_name');
$this->addNameTag($aLangPrefOrder, 'short_name');
$this->addNameTag($aLangPrefOrder, 'ref');
$this->addNameTag($aLangPrefOrder, 'type');
return $aLangPrefOrder;
}
private function addNameTag(&$aLangPrefOrder, $sTag)
{
$aLangPrefOrder[$sTag] = $sTag;
$aLangPrefOrder['_place_'.$sTag] = '_place_'.$sTag;
}
public function hasSetAny($aParamNames)
{
foreach ($aParamNames as $sName) {

89
lib-php/Phrase.php Normal file
View File

@@ -0,0 +1,89 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
/**
* Segment of a query string.
*
* The parts of a query strings are usually separated by commas.
*/
class Phrase
{
// Complete phrase as a string (guaranteed to have no leading or trailing
// spaces).
private $sPhrase;
// Element type for structured searches.
private $sPhraseType;
// Possible segmentations of the phrase.
private $aWordSets;
public function __construct($sPhrase, $sPhraseType)
{
$this->sPhrase = trim($sPhrase);
$this->sPhraseType = $sPhraseType;
}
/**
* Get the original phrase of the string.
*/
public function getPhrase()
{
return $this->sPhrase;
}
/**
* Return the element type of the phrase.
*
* @return string Pharse type if the phrase comes from a structured query
* or empty string otherwise.
*/
public function getPhraseType()
{
return $this->sPhraseType;
}
public function setWordSets($aWordSets)
{
$this->aWordSets = $aWordSets;
}
/**
* Return the array of possible segmentations of the phrase.
*
* @return string[][] Array of segmentations, each consisting of an
* array of terms.
*/
public function getWordSets()
{
return $this->aWordSets;
}
/**
* Invert the set of possible segmentations.
*
* @return void
*/
public function invertWordSets()
{
foreach ($this->aWordSets as $i => $aSet) {
$this->aWordSets[$i] = array_reverse($aSet);
}
}
public function debugInfo()
{
return array(
'Type' => $this->sPhraseType,
'Phrase' => $this->sPhrase,
'WordSets' => $this->aWordSets
);
}
}

View File

@@ -1,9 +1,17 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
require_once(CONST_BasePath.'/lib/AddressDetails.php');
require_once(CONST_BasePath.'/lib/Result.php');
require_once(CONST_LibDir.'/AddressDetails.php');
require_once(CONST_LibDir.'/Result.php');
class PlaceLookup
{
@@ -78,7 +86,7 @@ class PlaceLookup
($this->bIncludePolygonAsSVG ? 1 : 0);
if ($iWantedTypes > CONST_PolygonOutput_MaximumTypes) {
if (CONST_PolygonOutput_MaximumTypes) {
userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polgyon output option');
userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polygon output option');
} else {
userError('Polygon output is disabled');
}
@@ -89,20 +97,36 @@ class PlaceLookup
{
$aParams = array();
if ($this->bAddressDetails) $aParams['addressdetails'] = '1';
if ($this->bExtraTags) $aParams['extratags'] = '1';
if ($this->bNameDetails) $aParams['namedetails'] = '1';
if ($this->bAddressDetails) {
$aParams['addressdetails'] = '1';
}
if ($this->bExtraTags) {
$aParams['extratags'] = '1';
}
if ($this->bNameDetails) {
$aParams['namedetails'] = '1';
}
if ($this->bIncludePolygonAsText) $aParams['polygon_text'] = '1';
if ($this->bIncludePolygonAsGeoJSON) $aParams['polygon_geojson'] = '1';
if ($this->bIncludePolygonAsKML) $aParams['polygon_kml'] = '1';
if ($this->bIncludePolygonAsSVG) $aParams['polygon_svg'] = '1';
if ($this->bIncludePolygonAsText) {
$aParams['polygon_text'] = '1';
}
if ($this->bIncludePolygonAsGeoJSON) {
$aParams['polygon_geojson'] = '1';
}
if ($this->bIncludePolygonAsKML) {
$aParams['polygon_kml'] = '1';
}
if ($this->bIncludePolygonAsSVG) {
$aParams['polygon_svg'] = '1';
}
if ($this->fPolygonSimplificationThreshold > 0.0) {
$aParams['polygon_threshold'] = $this->fPolygonSimplificationThreshold;
}
if (!$this->bDeDupe) $aParams['dedupe'] = '0';
if (!$this->bDeDupe) {
$aParams['dedupe'] = '0';
}
return $aParams;
}
@@ -147,8 +171,9 @@ class PlaceLookup
private function langAddressSql($sHousenumber)
{
if ($this->bAddressDetails)
if ($this->bAddressDetails) {
return ''; // langaddress will be computed from address details
}
return 'get_address_by_language(place_id,'.$sHousenumber.','.$this->aLangPrefOrderSql.') AS langaddress,';
}
@@ -162,12 +187,12 @@ class PlaceLookup
return null;
}
$aResults = $this->lookup(array($iPlaceID => new Result($iPlaceID)));
$aResults = $this->lookup(array($iPlaceID => new Result($iPlaceID)), 0, 30, true);
return empty($aResults) ? null : reset($aResults);
}
public function lookup($aResults, $iMinRank = 0, $iMaxRank = 30)
public function lookup($aResults, $iMinRank = 0, $iMaxRank = 30, $bAllowLinked = false)
{
Debug::newFunction('Place lookup');
@@ -222,7 +247,9 @@ class PlaceLookup
if ($this->sAllowedTypesSQLList) {
$sSQL .= 'AND placex.class in '.$this->sAllowedTypesSQLList;
}
$sSQL .= ' AND linked_place_id is null ';
if (!$bAllowLinked) {
$sSQL .= ' AND linked_place_id is null ';
}
$sSQL .= ' GROUP BY ';
$sSQL .= ' osm_type, ';
$sSQL .= ' osm_id, ';
@@ -234,12 +261,20 @@ class PlaceLookup
$sSQL .= ' housenumber,';
$sSQL .= ' country_code, ';
$sSQL .= ' importance, ';
if (!$this->bDeDupe) $sSQL .= 'place_id,';
if (!$this->bAddressDetails) $sSQL .= 'langaddress, ';
if (!$this->bDeDupe) {
$sSQL .= 'place_id,';
}
if (!$this->bAddressDetails) {
$sSQL .= 'langaddress, ';
}
$sSQL .= ' placename, ';
$sSQL .= ' ref, ';
if ($this->bExtraTags) $sSQL .= 'extratags, ';
if ($this->bNameDetails) $sSQL .= 'name, ';
if ($this->bExtraTags) {
$sSQL .= 'extratags, ';
}
if ($this->bNameDetails) {
$sSQL .= 'name, ';
}
$sSQL .= ' extra_place ';
$aSubSelects[] = $sSQL;
@@ -260,8 +295,12 @@ class PlaceLookup
$sSQL .= $this->langAddressSql('-1');
$sSQL .= ' postcode as placename,';
$sSQL .= ' postcode as ref,';
if ($this->bExtraTags) $sSQL .= 'null::text AS extra,';
if ($this->bNameDetails) $sSQL .= 'null::text AS names,';
if ($this->bExtraTags) {
$sSQL .= 'null::text AS extra,';
}
if ($this->bNameDetails) {
$sSQL .= 'null::text AS names,';
}
$sSQL .= ' ST_x(geometry) AS lon, ST_y(geometry) AS lat,';
$sSQL .= ' (0.75-(rank_search::float/40)) AS importance, ';
$sSQL .= $this->addressImportanceSql('geometry', 'lp.parent_place_id');
@@ -298,8 +337,12 @@ class PlaceLookup
$sSQL .= $this->langAddressSql('housenumber_for_place');
$sSQL .= ' null::text AS placename, ';
$sSQL .= ' null::text AS ref, ';
if ($this->bExtraTags) $sSQL .= 'null::text AS extra,';
if ($this->bNameDetails) $sSQL .= 'null::text AS names,';
if ($this->bExtraTags) {
$sSQL .= 'null::text AS extra,';
}
if ($this->bNameDetails) {
$sSQL .= 'null::text AS names,';
}
$sSQL .= ' st_x(centroid) AS lon, ';
$sSQL .= ' st_y(centroid) AS lat,';
$sSQL .= ' -1.15 AS importance, ';
@@ -307,7 +350,9 @@ class PlaceLookup
$sSQL .= ' null::text AS extra_place ';
$sSQL .= ' FROM (';
$sSQL .= ' SELECT place_id, '; // interpolate the Tiger housenumbers here
$sSQL .= ' ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) AS centroid, ';
$sSQL .= ' CASE WHEN startnumber != endnumber';
$sSQL .= ' THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float)';
$sSQL .= ' ELSE ST_LineInterpolatePoint(linegeo, 0.5) END AS centroid, ';
$sSQL .= ' parent_place_id, ';
$sSQL .= ' housenumber_for_place';
$sSQL .= ' FROM (';
@@ -344,8 +389,12 @@ class PlaceLookup
$sSQL .= $this->langAddressSql('housenumber_for_place');
$sSQL .= ' null::text AS placename, ';
$sSQL .= ' null::text AS ref, ';
if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
if ($this->bExtraTags) {
$sSQL .= 'null::text AS extra, ';
}
if ($this->bNameDetails) {
$sSQL .= 'null::text AS names, ';
}
$sSQL .= ' st_x(centroid) AS lon, ';
$sSQL .= ' st_y(centroid) AS lat, ';
// slightly smaller than the importance for normal houses
@@ -360,7 +409,7 @@ class PlaceLookup
$sSQL .= ' CASE '; // interpolate the housenumbers here
$sSQL .= ' WHEN startnumber != endnumber ';
$sSQL .= ' THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) ';
$sSQL .= ' ELSE ST_LineInterpolatePoint(linegeo, 0.5) ';
$sSQL .= ' ELSE linegeo ';
$sSQL .= ' END as centroid, ';
$sSQL .= ' parent_place_id, ';
$sSQL .= ' housenumber_for_place ';
@@ -373,42 +422,6 @@ class PlaceLookup
$aSubSelects[] = $sSQL;
}
if (CONST_Use_Aux_Location_data) {
$sPlaceIDs = Result::joinIdsByTable($aResults, Result::TABLE_AUX);
if ($sPlaceIDs) {
$sHousenumbers = Result::sqlHouseNumberTable($aResults, Result::TABLE_AUX);
$sSQL = ' SELECT ';
$sSQL .= " 'L' AS osm_type, ";
$sSQL .= ' place_id AS osm_id, ';
$sSQL .= " 'place' AS class,";
$sSQL .= " 'house' AS type, ";
$sSQL .= ' null::smallint AS admin_level, ';
$sSQL .= ' 30 AS rank_search,';
$sSQL .= ' 30 AS rank_address, ';
$sSQL .= ' place_id,';
$sSQL .= ' parent_place_id, ';
$sSQL .= ' housenumber,';
$sSQL .= " 'us' AS country_code, ";
$sSQL .= $this->langAddressSql('-1');
$sSQL .= ' null::text AS placename, ';
$sSQL .= ' null::text AS ref, ';
if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
$sSQL .= ' ST_X(centroid) AS lon, ';
$sSQL .= ' ST_Y(centroid) AS lat, ';
$sSQL .= ' -1.10 AS importance, ';
$sSQL .= $this->addressImportanceSql(
'centroid',
'location_property_aux.parent_place_id'
);
$sSQL .= ' null::text AS extra_place ';
$sSQL .= ' FROM location_property_aux ';
$sSQL .= " WHERE place_id in ($sPlaceIDs) ";
$aSubSelects[] = $sSQL;
}
}
}
if (empty($aSubSelects)) {
@@ -434,18 +447,14 @@ class PlaceLookup
if ($this->bExtraTags) {
if ($aPlace['extra']) {
$aPlace['sExtraTags'] = json_decode($aPlace['extra']);
$aPlace['sExtraTags'] = json_decode($aPlace['extra'], true);
} else {
$aPlace['sExtraTags'] = (object) array();
}
}
if ($this->bNameDetails) {
if ($aPlace['names']) {
$aPlace['sNameDetails'] = json_decode($aPlace['names']);
} else {
$aPlace['sNameDetails'] = (object) array();
}
$aPlace['sNameDetails'] = $this->extractNames($aPlace['names']);
}
$aPlace['addresstype'] = ClassTypes\getLabelTag(
@@ -468,6 +477,33 @@ class PlaceLookup
return $aResults;
}
private function extractNames($sNames)
{
if (!$sNames) {
return (object) array();
}
$aFullNames = json_decode($sNames, true);
$aNames = array();
foreach ($aFullNames as $sKey => $sValue) {
if (strpos($sKey, '_place_') === 0) {
$sSubKey = substr($sKey, 7);
if (array_key_exists($sSubKey, $aFullNames)) {
$aNames[$sKey] = $sValue;
} else {
$aNames[$sSubKey] = $sValue;
}
} else {
$aNames[$sKey] = $sValue;
}
}
return $aNames;
}
/* returns an array which will contain the keys
* aBoundingBox
* and may also contain one or more of the keys
@@ -478,73 +514,84 @@ class PlaceLookup
* lat
* lon
*/
public function getOutlines($iPlaceID, $fLon = null, $fLat = null, $fRadius = null, $fLonReverse = null, $fLatReverse = null)
{
$aOutlineResult = array();
if (!$iPlaceID) return $aOutlineResult;
if (!$iPlaceID) {
return $aOutlineResult;
}
if (CONST_Search_AreaPolygons) {
// Get the bounding box and outline polygon
$sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
if ($fLonReverse != null && $fLatReverse != null) {
$sSQL .= ' ST_Y(closest_point) as centrelat,';
$sSQL .= ' ST_X(closest_point) as centrelon,';
} else {
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
}
$sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
$sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
if ($this->bIncludePolygonAsKML) $sSQL .= ',ST_AsKML(geometry) as askml';
if ($this->bIncludePolygonAsSVG) $sSQL .= ',ST_AsSVG(geometry) as assvg';
if ($this->bIncludePolygonAsText) $sSQL .= ',ST_AsText(geometry) as astext';
if ($fLonReverse != null && $fLatReverse != null) {
$sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
$sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
$sFrom .=' ELSE centroid END AS closest_point';
$sFrom .= ' from placex where place_id = '.$iPlaceID.') as plx';
} else {
$sFrom = ' from placex where place_id = '.$iPlaceID;
}
if ($this->fPolygonSimplificationThreshold > 0) {
$sSQL .= ' from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'.$sFrom.') as plx';
} else {
$sSQL .= $sFrom;
// Get the bounding box and outline polygon
$sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
$sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
$sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
if ($this->bIncludePolygonAsGeoJSON) {
$sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
}
if ($this->bIncludePolygonAsKML) {
$sSQL .= ',ST_AsKML(geometry) as askml';
}
if ($this->bIncludePolygonAsSVG) {
$sSQL .= ',ST_AsSVG(geometry) as assvg';
}
if ($this->bIncludePolygonAsText) {
$sSQL .= ',ST_AsText(geometry) as astext';
}
$sSQL .= ' FROM (SELECT place_id';
if ($fLonReverse != null && $fLatReverse != null) {
$sSQL .= ',CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
$sSQL .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
$sSQL .=' ELSE centroid END AS centroid';
} else {
$sSQL .= ',centroid';
}
if ($this->fPolygonSimplificationThreshold > 0) {
$sSQL .= ',ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry';
} else {
$sSQL .= ',geometry';
}
$sSQL .= ' FROM placex where place_id = '.$iPlaceID.') as plx';
$aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');
if ($aPointPolygon && $aPointPolygon['place_id']) {
if ($aPointPolygon['centrelon'] !== null && $aPointPolygon['centrelat'] !== null) {
$aOutlineResult['lat'] = $aPointPolygon['centrelat'];
$aOutlineResult['lon'] = $aPointPolygon['centrelon'];
}
$aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');
if ($aPointPolygon && $aPointPolygon['place_id']) {
if ($aPointPolygon['centrelon'] !== null && $aPointPolygon['centrelat'] !== null) {
$aOutlineResult['lat'] = $aPointPolygon['centrelat'];
$aOutlineResult['lon'] = $aPointPolygon['centrelon'];
}
if ($this->bIncludePolygonAsGeoJSON) $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
if ($this->bIncludePolygonAsKML) $aOutlineResult['askml'] = $aPointPolygon['askml'];
if ($this->bIncludePolygonAsSVG) $aOutlineResult['assvg'] = $aPointPolygon['assvg'];
if ($this->bIncludePolygonAsText) $aOutlineResult['astext'] = $aPointPolygon['astext'];
if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) {
$aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius;
$aPointPolygon['maxlat'] = $aPointPolygon['maxlat'] + $fRadius;
}
if (abs($aPointPolygon['minlon'] - $aPointPolygon['maxlon']) < 0.0000001) {
$aPointPolygon['minlon'] = $aPointPolygon['minlon'] - $fRadius;
$aPointPolygon['maxlon'] = $aPointPolygon['maxlon'] + $fRadius;
}
$aOutlineResult['aBoundingBox'] = array(
(string)$aPointPolygon['minlat'],
(string)$aPointPolygon['maxlat'],
(string)$aPointPolygon['minlon'],
(string)$aPointPolygon['maxlon']
);
if ($this->bIncludePolygonAsGeoJSON) {
$aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
}
if ($this->bIncludePolygonAsKML) {
$aOutlineResult['askml'] = $aPointPolygon['askml'];
}
if ($this->bIncludePolygonAsSVG) {
$aOutlineResult['assvg'] = $aPointPolygon['assvg'];
}
if ($this->bIncludePolygonAsText) {
$aOutlineResult['astext'] = $aPointPolygon['astext'];
}
if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) {
$aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius;
$aPointPolygon['maxlat'] = $aPointPolygon['maxlat'] + $fRadius;
}
if (abs($aPointPolygon['minlon'] - $aPointPolygon['maxlon']) < 0.0000001) {
$aPointPolygon['minlon'] = $aPointPolygon['minlon'] - $fRadius;
$aPointPolygon['maxlon'] = $aPointPolygon['maxlon'] + $fRadius;
}
$aOutlineResult['aBoundingBox'] = array(
(string)$aPointPolygon['minlat'],
(string)$aPointPolygon['maxlat'],
(string)$aPointPolygon['minlon'],
(string)$aPointPolygon['maxlon']
);
}
// as a fallback we generate a bounding box without knowing the size of the geometry

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -13,8 +21,7 @@ class Result
const TABLE_PLACEX = 0;
const TABLE_POSTCODE = 1;
const TABLE_OSMLINE = 2;
const TABLE_AUX = 3;
const TABLE_TIGER = 4;
const TABLE_TIGER = 3;
/// Database table that contains the result.
public $iTable;
@@ -26,6 +33,8 @@ class Result
public $iExactMatches = 0;
/// Subranking within the results (the higher the worse).
public $iResultRank = 0;
/// Address rank of the result.
public $iAddressRank;
public function debugInfo()
{
@@ -54,6 +63,27 @@ class Result
}
)));
}
public static function joinIdsByTableMinRank($aResults, $iTable, $iMinAddressRank)
{
return join(',', array_keys(array_filter(
$aResults,
function ($aValue) use ($iTable, $iMinAddressRank) {
return $aValue->iTable == $iTable && $aValue->iAddressRank >= $iMinAddressRank;
}
)));
}
public static function joinIdsByTableMaxRank($aResults, $iTable, $iMaxAddressRank)
{
return join(',', array_keys(array_filter(
$aResults,
function ($aValue) use ($iTable, $iMaxAddressRank) {
return $aValue->iTable == $iTable && $aValue->iAddressRank <= $iMaxAddressRank;
}
)));
}
public static function sqlHouseNumberTable($aResults, $iTable)
{
$sHousenumbers = '';
@@ -84,7 +114,7 @@ class Result
foreach ($aResults as $oRes) {
if ($oRes->iResultRank < $iMinRank) {
$aTail = array_merge($aTail, $aHead);
$aTail += $aHead;
$aHead = array($oRes->iId => $oRes);
$iMinRank = $oRes->iResultRank;
} elseif ($oRes->iResultRank == $iMinRank) {

View File

@@ -1,8 +1,16 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
require_once(CONST_BasePath.'/lib/Result.php');
require_once(CONST_LibDir.'/Result.php');
class ReverseGeocode
{
@@ -32,10 +40,10 @@ class ReverseGeocode
9 => 12,
10 => 17, // City
11 => 17,
12 => 18, // Town / Village
13 => 18,
14 => 22, // Suburb
15 => 22,
12 => 18, // Town
13 => 19, // Village
14 => 22, // Neighbourhood
15 => 25, // Locality
16 => 26, // major street
17 => 27, // minor street
18 => 30, // or >, Building
@@ -56,12 +64,15 @@ class ReverseGeocode
{
Debug::newFunction('lookupInterpolation');
$sSQL = 'SELECT place_id, parent_place_id, 30 as rank_search,';
$sSQL .= ' ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fraction,';
$sSQL .= ' startnumber, endnumber, interpolationtype,';
$sSQL .= ' (CASE WHEN endnumber != startnumber';
$sSQL .= ' THEN (endnumber - startnumber) * ST_LineLocatePoint(linegeo,'.$sPointSQL.')';
$sSQL .= ' ELSE startnumber END) as fhnr,';
$sSQL .= ' startnumber, endnumber, step,';
$sSQL .= ' ST_Distance(linegeo,'.$sPointSQL.') as distance';
$sSQL .= ' FROM location_property_osmline';
$sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', linegeo, '.$fSearchDiam.')';
$sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
$sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
$sSQL .= ' and parent_place_id != 0';
$sSQL .= ' ORDER BY distance ASC limit 1';
Debug::printSQL($sSQL);
@@ -74,7 +85,10 @@ class ReverseGeocode
protected function lookupLargeArea($sPointSQL, $iMaxRank)
{
$oResult = null;
$sCountryCode = $this->getCountryCode($sPointSQL);
if (CONST_Search_WithinCountries and $sCountryCode == null) {
return null;
}
if ($iMaxRank > 4) {
$aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
@@ -85,12 +99,12 @@ class ReverseGeocode
// If no polygon which contains the searchpoint is found,
// searches in the country_osm_grid table for a polygon.
return $this->lookupInCountry($sPointSQL, $iMaxRank);
return $this->lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode);
}
protected function lookupInCountry($sPointSQL, $iMaxRank)
protected function getCountryCode($sPointSQL)
{
Debug::newFunction('lookupInCountry');
Debug::newFunction('getCountryCode');
// searches for polygon in table country_osm_grid which contains the searchpoint
// and searches for the nearest place node to the searchpoint in this polygon
$sSQL = 'SELECT country_code FROM country_osm_grid';
@@ -102,8 +116,12 @@ class ReverseGeocode
null,
'Could not determine country polygon containing the point.'
);
Debug::printVar('Country code', $sCountryCode);
return $sCountryCode;
}
protected function lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode)
{
Debug::newFunction('lookupInCountry');
if ($sCountryCode) {
if ($iMaxRank > 4) {
// look for place nodes with the given country code
@@ -113,11 +131,13 @@ class ReverseGeocode
$sSQL .= ' FROM placex';
$sSQL .= ' WHERE osm_type = \'N\'';
$sSQL .= ' AND country_code = \''.$sCountryCode.'\'';
$sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
$sSQL .= ' AND rank_search between 5 and ' .min(25, $iMaxRank);
$sSQL .= ' AND class = \'place\' AND type != \'postcode\'';
$sSQL .= ' AND type != \'postcode\'';
$sSQL .= ' AND name IS NOT NULL ';
$sSQL .= ' and indexed_status = 0 and linked_place_id is null';
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, 1.8)) p ';
$sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
$sSQL .= ') as a ';
$sSQL .= 'WHERE distance <= reverse_place_diameter(rank_search)';
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' LIMIT 1';
@@ -167,22 +187,28 @@ class ReverseGeocode
{
Debug::newFunction('lookupPolygon');
// polygon search begins at suburb-level
if ($iMaxRank > 25) $iMaxRank = 25;
if ($iMaxRank > 25) {
$iMaxRank = 25;
}
// no polygon search over country-level
if ($iMaxRank < 5) $iMaxRank = 5;
if ($iMaxRank < 5) {
$iMaxRank = 5;
}
// search for polygon
$sSQL = 'SELECT place_id, parent_place_id, rank_address, rank_search FROM';
$sSQL .= '(select place_id, parent_place_id, rank_address, rank_search, country_code, geometry';
$sSQL .= ' FROM placex';
$sSQL .= ' WHERE ST_GeometryType(geometry) in (\'ST_Polygon\', \'ST_MultiPolygon\')';
$sSQL .= ' AND rank_address Between 5 AND ' .$iMaxRank;
// Ensure that query planner doesn't use the index on rank_search.
$sSQL .= ' AND coalesce(rank_search, 0) between 5 and ' .$iMaxRank;
$sSQL .= ' AND rank_address between 4 and 25'; // needed for index selection
$sSQL .= ' AND geometry && '.$sPointSQL;
$sSQL .= ' AND type != \'postcode\' ';
$sSQL .= ' AND name is not null';
$sSQL .= ' AND indexed_status = 0 and linked_place_id is null';
$sSQL .= ' ORDER BY rank_address DESC LIMIT 50 ) as a';
$sSQL .= ' WHERE ST_CONTAINS(geometry, '.$sPointSQL.' )';
$sSQL .= ' ORDER BY rank_address DESC LIMIT 1';
$sSQL .= ' ORDER BY rank_search DESC LIMIT 50 ) as a';
$sSQL .= ' WHERE ST_Contains(geometry, '.$sPointSQL.' )';
$sSQL .= ' ORDER BY rank_search DESC LIMIT 1';
Debug::printSQL($sSQL);
$aPoly = $this->oDB->getRow($sSQL, null, 'Could not determine polygon containing the point.');
@@ -190,33 +216,28 @@ class ReverseGeocode
if ($aPoly) {
// if a polygon is found, search for placenodes begins ...
$iParentPlaceID = $aPoly['parent_place_id'];
$iRankAddress = $aPoly['rank_address'];
$iRankSearch = $aPoly['rank_search'];
$iPlaceID = $aPoly['place_id'];
if ($iRankAddress != $iMaxRank) {
if ($iRankSearch != $iMaxRank) {
$sSQL = 'SELECT place_id FROM ';
$sSQL .= '(SELECT place_id, rank_search, country_code, geometry,';
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
$sSQL .= ' FROM placex';
$sSQL .= ' WHERE osm_type = \'N\'';
// using rank_search because of a better differentiation
// for place nodes at rank_address 16
$sSQL .= ' AND rank_search > '.$iRankSearch;
$sSQL .= ' AND rank_search <= '.$iMaxRank;
$sSQL .= ' AND rank_address > 0';
$sSQL .= ' AND class = \'place\'';
$sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
$sSQL .= ' AND type != \'postcode\'';
$sSQL .= ' AND name IS NOT NULL ';
$sSQL .= ' AND indexed_status = 0 AND linked_place_id is null';
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, reverse_place_diameter('.$iRankSearch.'::smallint))';
$sSQL .= ' ORDER BY distance ASC,';
$sSQL .= ' rank_address DESC';
$sSQL .= ' limit 500) as a';
$sSQL .= ' WHERE ST_CONTAINS((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
$sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' limit 100) as a';
$sSQL .= ' WHERE ST_Contains((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
$sSQL .= ' AND distance <= reverse_place_diameter(rank_search)';
$sSQL .= ' ORDER BY distance ASC, rank_search DESC';
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' LIMIT 1';
Debug::printSQL($sSQL);
@@ -242,26 +263,24 @@ class ReverseGeocode
public function lookupPoint($sPointSQL, $bDoInterpolation = true)
{
Debug::newFunction('lookupPoint');
// starts if the search is on POI or street level,
// searches for the nearest POI or street,
// if a street is found and a POI is searched for,
// the nearest POI which the found street is a parent of is choosen.
$iMaxRank = $this->iMaxRank;
// Find the nearest point
$fSearchDiam = 0.006;
$oResult = null;
$aPlace = null;
// for POI or street level
if ($iMaxRank >= 26) {
if ($this->iMaxRank >= 26) {
// starts if the search is on POI or street level,
// searches for the nearest POI or street,
// if a street is found and a POI is searched for,
// the nearest POI which the found street is a parent of is chosen.
$sSQL = 'select place_id,parent_place_id,rank_address,country_code,';
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
$sSQL .= ' FROM ';
$sSQL .= ' placex';
$sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', geometry, '.$fSearchDiam.')';
$sSQL .= ' AND';
$sSQL .= ' rank_address between 26 and '.$iMaxRank;
$sSQL .= ' rank_address between 26 and '.$this->iMaxRank;
$sSQL .= ' and (name is not null or housenumber is not null';
$sSQL .= ' or rank_address between 26 and 27)';
$sSQL .= ' and (rank_address between 26 and 27';
@@ -280,34 +299,11 @@ class ReverseGeocode
$iPlaceID = $aPlace['place_id'];
$oResult = new Result($iPlaceID);
$iRankAddress = $aPlace['rank_address'];
$iParentPlaceID = $aPlace['parent_place_id'];
}
if ($bDoInterpolation && $iMaxRank >= 30) {
$fDistance = $fSearchDiam;
if ($aPlace) {
// We can't reliably go from the closest street to an
// interpolation line because the closest interpolation
// may have a different street segments as a parent.
// Therefore allow an interpolation line to take precendence
// even when the street is closer.
$fDistance = $iRankAddress < 28 ? 0.001 : $aPlace['distance'];
}
$aHouse = $this->lookupInterpolation($sPointSQL, $fDistance);
Debug::printVar('Interpolation result', $aPlace);
if ($aHouse) {
$oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE);
$oResult->iHouseNumber = closestHouseNumber($aHouse);
$aPlace = $aHouse;
$iRankAddress = 30;
}
}
if ($aPlace) {
// if street and maxrank > streetlevel
if ($iRankAddress <= 27 && $iMaxRank > 27) {
if ($iRankAddress <= 27 && $this->iMaxRank > 27) {
// find the closest object (up to a certain radius) of which the street is a parent of
$sSQL = ' select place_id,';
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
@@ -328,7 +324,9 @@ class ReverseGeocode
Debug::printVar('Closest POI result', $aStreet);
if ($aStreet) {
$aPlace = $aStreet;
$oResult = new Result($aStreet['place_id']);
$iRankAddress = 30;
}
}
@@ -339,9 +337,9 @@ class ReverseGeocode
&& $this->iMaxRank >= 28
) {
$sSQL = 'SELECT place_id,parent_place_id,30 as rank_search,';
$sSQL .= 'ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fraction,';
$sSQL .= 'ST_distance('.$sPointSQL.', linegeo) as distance,';
$sSQL .= 'startnumber,endnumber,interpolationtype';
$sSQL .= ' (endnumber - startnumber) * ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fhnr,';
$sSQL .= ' startnumber, endnumber, step,';
$sSQL .= ' ST_Distance('.$sPointSQL.', linegeo) as distance';
$sSQL .= ' FROM location_property_tiger WHERE parent_place_id = '.$oResult->iId;
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', linegeo, 0.001)';
$sSQL .= ' ORDER BY distance ASC limit 1';
@@ -351,17 +349,50 @@ class ReverseGeocode
Debug::printVar('Tiger house number result', $aPlaceTiger);
if ($aPlaceTiger) {
$aPlace = $aPlaceTiger;
$oResult = new Result($aPlaceTiger['place_id'], Result::TABLE_TIGER);
$oResult->iHouseNumber = closestHouseNumber($aPlaceTiger);
$iRndNum = max(0, round($aPlaceTiger['fhnr'] / $aPlaceTiger['step']) * $aPlaceTiger['step']);
$oResult->iHouseNumber = $aPlaceTiger['startnumber'] + $iRndNum;
if ($oResult->iHouseNumber > $aPlaceTiger['endnumber']) {
$oResult->iHouseNumber = $aPlaceTiger['endnumber'];
}
$iRankAddress = 30;
}
}
} else {
}
if ($bDoInterpolation && $this->iMaxRank >= 30) {
$fDistance = $fSearchDiam;
if ($aPlace) {
// We can't reliably go from the closest street to an
// interpolation line because the closest interpolation
// may have a different street segments as a parent.
// Therefore allow an interpolation line to take precedence
// even when the street is closer.
$fDistance = $iRankAddress < 28 ? 0.001 : $aPlace['distance'];
}
$aHouse = $this->lookupInterpolation($sPointSQL, $fDistance);
Debug::printVar('Interpolation result', $aPlace);
if ($aHouse) {
$oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE);
$iRndNum = max(0, round($aHouse['fhnr'] / $aHouse['step']) * $aHouse['step']);
$oResult->iHouseNumber = $aHouse['startnumber'] + $iRndNum;
if ($oResult->iHouseNumber > $aHouse['endnumber']) {
$oResult->iHouseNumber = $aHouse['endnumber'];
}
$aPlace = $aHouse;
}
}
if (!$aPlace) {
// if no POI or street is found ...
$oResult = $this->lookupLargeArea($sPointSQL, 25);
}
} else {
// lower than street level ($iMaxRank < 26 )
$oResult = $this->lookupLargeArea($sPointSQL, $iMaxRank);
$oResult = $this->lookupLargeArea($sPointSQL, $this->iMaxRank);
}
Debug::printVar('Final result', $oResult);

View File

@@ -1,8 +1,16 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
require_once(CONST_BasePath.'/lib/lib.php');
require_once(CONST_LibDir.'/lib.php');
/**
@@ -28,6 +36,8 @@ class SearchContext
public $sqlViewboxLarge = '';
/// Reference along a route (as SQL).
public $sqlViewboxCentre = '';
/// List of countries to restrict search to (as array).
public $aCountryList = null;
/// List of countries to restrict search to (as SQL).
public $sqlCountryList = '';
/// List of place IDs to exclude (as SQL).
@@ -187,6 +197,7 @@ class SearchContext
public function setCountryList($aCountries)
{
$this->sqlCountryList = '('.join(',', array_map('addQuotes', $aCountries)).')';
$this->aCountryList = $aCountries;
}
/**
@@ -279,6 +290,19 @@ class SearchContext
return '';
}
/**
* Check if the given country is covered by the search context.
*
* @param string $sCountryCode Country code of the country to check.
*
* @return True, if no country code restrictions are set or the
* country is included in the country list.
*/
public function isCountryApplicable($sCountryCode)
{
return $this->aCountryList === null || in_array($sCountryCode, $this->aCountryList);
}
public function debugInfo()
{
return array(

View File

@@ -1,10 +1,18 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
require_once(CONST_BasePath.'/lib/SearchContext.php');
require_once(CONST_BasePath.'/lib/Result.php');
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
require_once(CONST_LibDir.'/SearchContext.php');
require_once(CONST_LibDir.'/Result.php');
/**
* Description of a single interpretation of a search query.
@@ -19,6 +27,8 @@ class SearchDescription
private $aName = array();
/// True if the name is rare enough to force index use on name.
private $bRareName = false;
/// True if the name requires to be accompanied by address terms.
private $bNameNeedsAddress = false;
/// List of word ids making up the address of the object.
private $aAddress = array();
/// List of word ids that appear in the name but should be ignored.
@@ -67,47 +77,6 @@ class SearchDescription
return $this->iSearchRank;
}
/**
* Make this search a POI search.
*
* In a POI search, objects are not (only) searched by their name
* but also by the primary OSM key/value pair (class and type in Nominatim).
*
* @param integer $iOperator Type of POI search
* @param string $sClass Class (or OSM tag key) of POI.
* @param string $sType Type (or OSM tag value) of POI.
*
* @return void
*/
public function setPoiSearch($iOperator, $sClass, $sType)
{
$this->iOperator = $iOperator;
$this->sClass = $sClass;
$this->sType = $sType;
}
/**
* Check if this might be a full address search.
*
* @return bool True if the search contains name, address and housenumber.
*/
public function looksLikeFullAddress()
{
return (!empty($this->aName))
&& (!empty($this->aAddress) || $this->sCountryCode)
&& preg_match('/[0-9]+/', $this->sHouseNumber);
}
/**
* Check if any operator is set.
*
* @return bool True, if this is a special search operation.
*/
public function hasOperator()
{
return $this->iOperator != Operator::NONE;
}
/**
* Extract key/value pairs from a query.
*
@@ -154,258 +123,255 @@ class SearchDescription
return false;
}
}
if ($this->bNameNeedsAddress && empty($this->aAddress)) {
return false;
}
return true;
}
/////////// Search building functions
/**
* Derive new searches by adding a full term to the existing search.
* Create a copy of this search description adding to search rank.
*
* @param object $oSearchTerm Description of the token.
* @param bool $bHasPartial True if there are also tokens of partial terms
* with the same name.
* @param string $sPhraseType Type of phrase the token is contained in.
* @param bool $bFirstToken True if the token is at the beginning of the
* query.
* @param bool $bFirstPhrase True if the token is in the first phrase of
* the query.
* @param bool $bLastToken True if the token is at the end of the query.
* @param integer $iTermCost Cost to add to the current search rank.
*
* @return SearchDescription[] List of derived search descriptions.
* @return object Cloned search description.
*/
public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
public function clone($iTermCost)
{
$aNewSearches = array();
$oSearch = clone $this;
$oSearch->iSearchRank += $iTermCost;
if (($sPhraseType == '' || $sPhraseType == 'country')
&& is_a($oSearchTerm, '\Nominatim\Token\Country')
) {
if (!$this->sCountryCode) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->sCountryCode = $oSearchTerm->sCountryCode;
// Country is almost always at the end of the string
// - increase score for finding it anywhere else (optimisation)
if (!$bLastToken) {
$oSearch->iSearchRank += 5;
}
$aNewSearches[] = $oSearch;
}
} elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
&& is_a($oSearchTerm, '\Nominatim\Token\Postcode')
) {
if (!$this->sPostcode) {
// If we have structured search or this is the first term,
// make the postcode the primary search element.
if ($this->iOperator == Operator::NONE && $bFirstToken) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->iOperator = Operator::POSTCODE;
$oSearch->aAddress = array_merge($this->aAddress, $this->aName);
$oSearch->aName =
array($oSearchTerm->iId => $oSearchTerm->sPostcode);
$aNewSearches[] = $oSearch;
}
// If we have a structured search or this is not the first term,
// add the postcode as an addendum.
if ($this->iOperator != Operator::POSTCODE
&& ($sPhraseType == 'postalcode' || !empty($this->aName))
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
if (strlen($oSearchTerm->sPostcode) < 4) {
$oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
}
$oSearch->sPostcode = $oSearchTerm->sPostcode;
$aNewSearches[] = $oSearch;
}
}
} elseif (($sPhraseType == '' || $sPhraseType == 'street')
&& is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
) {
if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->sHouseNumber = $oSearchTerm->sToken;
// sanity check: if the housenumber is not mainly made
// up of numbers, add a penalty
if (preg_match('/\\d/', $oSearch->sHouseNumber) === 0
|| preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) {
$oSearch->iSearchRank++;
}
if (empty($oSearchTerm->iId)) {
$oSearch->iSearchRank++;
}
// also must not appear in the middle of the address
if (!empty($this->aAddress)
|| (!empty($this->aAddressNonSearch))
|| $this->sPostcode
) {
$oSearch->iSearchRank++;
}
$aNewSearches[] = $oSearch;
// Housenumbers may appear in the name when the place has its own
// address terms.
if ($oSearchTerm->iId !== null
&& ($this->iNamePhrase >= 0 || empty($this->aName))
&& empty($this->aAddress)
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aAddress = $this->aName;
$oSearch->bRareName = false;
$oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId);
$aNewSearches[] = $oSearch;
}
}
} elseif ($sPhraseType == ''
&& is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
) {
if ($this->iOperator == Operator::NONE) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$iOp = $oSearchTerm->iOperator;
if ($iOp == Operator::NONE) {
if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
$iOp = Operator::NAME;
} else {
$iOp = Operator::NEAR;
}
$oSearch->iSearchRank += 2;
}
$oSearch->setPoiSearch(
$iOp,
$oSearchTerm->sClass,
$oSearchTerm->sType
);
$aNewSearches[] = $oSearch;
}
} elseif ($sPhraseType != 'country'
&& is_a($oSearchTerm, '\Nominatim\Token\Word')
) {
$iWordID = $oSearchTerm->iId;
// Full words can only be a name if they appear at the beginning
// of the phrase. In structured search the name must forcably in
// the first phrase. In unstructured search it may be in a later
// phrase when the first phrase is a house number.
if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
$oSearch = clone $this;
$oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount;
$oSearch->aAddress[$iWordID] = $iWordID;
$aNewSearches[] = $oSearch;
}
} else {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aName = array($iWordID => $iWordID);
if (CONST_Search_NameOnlySearchFrequencyThreshold) {
$oSearch->bRareName =
$oSearchTerm->iSearchNameCount
< CONST_Search_NameOnlySearchFrequencyThreshold;
}
$aNewSearches[] = $oSearch;
}
}
return $aNewSearches;
return $oSearch;
}
/**
* Derive new searches by adding a partial term to the existing search.
* Check if the search currently includes a name.
*
* @param string $sToken Term for the token.
* @param object $oSearchTerm Description of the token.
* @param bool $bStructuredPhrases True if the search is structured.
* @param integer $iPhrase Number of the phrase the token is in.
* @param array[] $aFullTokens List of full term tokens with the
* same name.
* @param bool bIncludeNonNames If true stop-word tokens are taken into
* account, too.
*
* @return SearchDescription[] List of derived search descriptions.
* @return bool True, if search has a name.
*/
public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
public function hasName($bIncludeNonNames = false)
{
// Only allow name terms.
if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) {
return array();
return !empty($this->aName)
|| (!empty($this->aNameNonSearch) && $bIncludeNonNames);
}
/**
* Check if the search currently includes an address term.
*
* @return bool True, if any address term is included, including stop-word
* terms.
*/
public function hasAddress()
{
return !empty($this->aAddress) || !empty($this->aAddressNonSearch);
}
/**
* Check if a country restriction is currently included in the search.
*
* @return bool True, if a country restriction is set.
*/
public function hasCountry()
{
return $this->sCountryCode !== '';
}
/**
* Check if a postcode is currently included in the search.
*
* @return bool True, if a postcode is set.
*/
public function hasPostcode()
{
return $this->sPostcode !== '';
}
/**
* Check if a house number is set for the search.
*
* @return bool True, if a house number is set.
*/
public function hasHousenumber()
{
return $this->sHouseNumber !== '';
}
/**
* Check if a special type of place is requested.
*
* param integer iOperator When set, check for the particular
* operator used for the special type.
*
* @return bool True, if speial type is requested or, if requested,
* a special type with the given operator.
*/
public function hasOperator($iOperator = null)
{
return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator;
}
/**
* Add the given token to the list of terms to search for in the address.
*
* @param integer iID ID of term to add.
* @param bool bSearchable Term should be used to search for result
* (i.e. term is not a stop word).
*/
public function addAddressToken($iId, $bSearchable = true)
{
if ($bSearchable) {
$this->aAddress[$iId] = $iId;
} else {
$this->aAddressNonSearch[$iId] = $iId;
}
}
$aNewSearches = array();
$iWordID = $oSearchTerm->iId;
/**
* Add the given full-word token to the list of terms to search for in the
* name.
*
* @param integer iId ID of term to add.
* @param bool bRareName True if the term is infrequent enough to not
* require other constraints for efficient search.
*/
public function addNameToken($iId, $bRareName)
{
$this->aName[$iId] = $iId;
$this->bRareName = $bRareName;
$this->bNameNeedsAddress = false;
}
if ((!$bStructuredPhrases || $iPhrase > 0)
&& (!empty($this->aName))
&& strpos($sToken, ' ') === false
) {
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
$oSearch = clone $this;
$oSearch->iSearchRank += $oSearchTerm->iTermCount + 1;
if (empty($this->aName)) {
$oSearch->iSearchRank++;
}
if (preg_match('#^[0-9]+$#', $sToken)) {
$oSearch->iSearchRank++;
}
$oSearch->aAddress[$iWordID] = $iWordID;
$aNewSearches[] = $oSearch;
} else {
$oSearch = clone $this;
$oSearch->iSearchRank += $oSearchTerm->iTermCount + 1;
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
if (!empty($aFullTokens)) {
$oSearch->iSearchRank++;
}
$aNewSearches[] = $oSearch;
// revert to the token version?
foreach ($aFullTokens as $oSearchTermToken) {
if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
$oSearch = clone $this;
$oSearch->iSearchRank += 3;
$oSearch->aAddress[$oSearchTermToken->iId]
= $oSearchTermToken->iId;
$aNewSearches[] = $oSearch;
}
}
}
/**
* Add the given partial token to the list of terms to search for in
* the name.
*
* @param integer iID ID of term to add.
* @param bool bSearchable Term should be used to search for result
* (i.e. term is not a stop word).
* @param bool bNeedsAddress True if the term is too unspecific to be used
* in a stand-alone search without an address
* to narrow down the search.
* @param integer iPhraseNumber Index of phrase, where the partial term
* appears.
*/
public function addPartialNameToken($iId, $bSearchable, $bNeedsAddress, $iPhraseNumber)
{
if (empty($this->aName)) {
$this->bNameNeedsAddress = $bNeedsAddress;
} elseif ($bSearchable && count($this->aName) >= 2) {
$this->bNameNeedsAddress = false;
} else {
$this->bNameNeedsAddress &= $bNeedsAddress;
}
if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
&& (empty($this->aName) || $this->iNamePhrase == $iPhrase)
) {
$oSearch = clone $this;
$oSearch->iSearchRank += 2;
if (empty($this->aName)) {
$oSearch->iSearchRank += 1;
}
if (preg_match('#^[0-9]+$#', $sToken)) {
$oSearch->iSearchRank += 2;
}
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
if (empty($this->aName)
&& CONST_Search_NameOnlySearchFrequencyThreshold
) {
$oSearch->bRareName =
$oSearchTerm->iSearchNameCount
< CONST_Search_NameOnlySearchFrequencyThreshold;
} else {
$oSearch->bRareName = false;
}
$oSearch->aName[$iWordID] = $iWordID;
} else {
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
}
$oSearch->iNamePhrase = $iPhrase;
$aNewSearches[] = $oSearch;
if ($bSearchable) {
$this->aName[$iId] = $iId;
} else {
$this->aNameNonSearch[$iId] = $iId;
}
$this->iNamePhrase = $iPhraseNumber;
}
return $aNewSearches;
/**
* Set country restriction for the search.
*
* @param string sCountryCode Country code of country to restrict search to.
*/
public function setCountry($sCountryCode)
{
$this->sCountryCode = $sCountryCode;
$this->iNamePhrase = -1;
}
/**
* Set postcode search constraint.
*
* @param string sPostcode Postcode the result should have.
*/
public function setPostcode($sPostcode)
{
$this->sPostcode = $sPostcode;
$this->iNamePhrase = -1;
}
/**
* Make this search a search for a postcode object.
*
* @param integer iId Token Id for the postcode.
* @param string sPostcode Postcode to look for.
*/
public function setPostcodeAsName($iId, $sPostcode)
{
$this->iOperator = Operator::POSTCODE;
$this->aAddress = array_merge($this->aAddress, $this->aName);
$this->aName = array($iId => $sPostcode);
$this->bRareName = true;
$this->iNamePhrase = -1;
}
/**
* Set house number search cnstraint.
*
* @param string sNumber House number the result should have.
*/
public function setHousenumber($sNumber)
{
$this->sHouseNumber = $sNumber;
$this->iNamePhrase = -1;
}
/**
* Make this search a search for a house number.
*
* @param integer iId Token Id for the house number.
*/
public function setHousenumberAsName($iId)
{
$this->aAddress = array_merge($this->aAddress, $this->aName);
$this->bRareName = false;
$this->bNameNeedsAddress = true;
$this->aName = array($iId => $iId);
$this->iNamePhrase = -1;
}
/**
* Make this search a POI search.
*
* In a POI search, objects are not (only) searched by their name
* but also by the primary OSM key/value pair (class and type in Nominatim).
*
* @param integer $iOperator Type of POI search
* @param string $sClass Class (or OSM tag key) of POI.
* @param string $sType Type (or OSM tag value) of POI.
*
* @return void
*/
public function setPoiSearch($iOperator, $sClass, $sType)
{
$this->iOperator = $iOperator;
$this->sClass = $sClass;
$this->sType = $sType;
$this->iNamePhrase = -1;
}
public function getNamePhrase()
{
return $this->iNamePhrase;
}
/**
* Get the global search context.
*
* @return object Objects of global search constraints.
*/
public function getContext()
{
return $this->oContext;
}
/////////// Query functions
@@ -421,12 +387,11 @@ class SearchDescription
*
* @return mixed[] An array with two fields: IDs contains the list of
* matching place IDs and houseNumber the houseNumber
* if appicable or -1 if not.
* if applicable or -1 if not.
*/
public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
{
$aResults = array();
$iHousenumber = -1;
if ($this->sCountryCode
&& empty($this->aName)
@@ -457,23 +422,6 @@ class SearchDescription
$iLimit
);
// Now search for housenumber, if housenumber provided. Can be zero.
if (($this->sHouseNumber || $this->sHouseNumber === '0') && !empty($aResults)) {
// Downgrade the rank of the street results, they are missing
// the housenumber.
foreach ($aResults as $oRes) {
$oRes->iResultRank++;
}
$aHnResults = $this->queryHouseNumber($oDB, $aResults);
if (!empty($aHnResults)) {
foreach ($aHnResults as $oRes) {
$aResults[$oRes->iId] = $oRes;
}
}
}
// finally get POIs if requested
if ($this->sClass && !empty($aResults)) {
$aResults = $this->queryPoiByOperator($oDB, $aResults, $iLimit);
@@ -619,36 +567,6 @@ class SearchDescription
$aTerms = array();
$aOrder = array();
// Sort by existence of the requested house number but only if not
// too many results are expected for the street, i.e. if the result
// will be narrowed down by an address. Remeber that with ordering
// every single result has to be checked.
if ($this->sHouseNumber && (!empty($this->aAddress) || $this->sPostcode)) {
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$aOrder[] = ' (';
$aOrder[0] .= 'EXISTS(';
$aOrder[0] .= ' SELECT place_id';
$aOrder[0] .= ' FROM placex';
$aOrder[0] .= ' WHERE parent_place_id = search_name.place_id';
$aOrder[0] .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
$aOrder[0] .= ' LIMIT 1';
$aOrder[0] .= ') ';
// also housenumbers from interpolation lines table are needed
if (preg_match('/[0-9]+/', $this->sHouseNumber)) {
$iHouseNumber = intval($this->sHouseNumber);
$aOrder[0] .= 'OR EXISTS(';
$aOrder[0] .= ' SELECT place_id ';
$aOrder[0] .= ' FROM location_property_osmline ';
$aOrder[0] .= ' WHERE parent_place_id = search_name.place_id';
$aOrder[0] .= ' AND startnumber is not NULL';
$aOrder[0] .= ' AND '.$iHouseNumber.'>=startnumber ';
$aOrder[0] .= ' AND '.$iHouseNumber.'<=endnumber ';
$aOrder[0] .= ' LIMIT 1';
$aOrder[0] .= ')';
}
$aOrder[0] .= ') DESC';
}
if (!empty($this->aName)) {
$aTerms[] = 'name_vector @> '.$oDB->getArraySQL($this->aName);
}
@@ -679,7 +597,7 @@ class SearchDescription
$aOrder[] = $this->oContext->distanceSQL('centroid');
} elseif ($this->sPostcode) {
if (empty($this->aAddress)) {
$aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))";
$aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.12))";
} else {
$aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."')";
}
@@ -694,10 +612,6 @@ class SearchDescription
$aTerms[] = 'centroid && '.$this->oContext->sqlViewboxSmall;
}
if ($this->oContext->hasNearPoint()) {
$aOrder[] = $this->oContext->distanceSQL('centroid');
}
if ($this->sHouseNumber) {
$sImportanceSQL = '- abs(26 - address_rank) + 3';
} else {
@@ -720,118 +634,128 @@ class SearchDescription
$sExactMatchSQL = '0::int as exactmatch';
}
if ($this->sHouseNumber || $this->sClass) {
$iLimit = 40;
if (empty($aTerms)) {
return array();
}
$aResults = array();
if ($this->hasHousenumber()) {
$sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M');
if (!empty($aTerms)) {
$sSQL = 'SELECT place_id,'.$sExactMatchSQL;
// Housenumbers on streets and places.
$sPlacexSql = 'SELECT array_agg(place_id) FROM placex';
$sPlacexSql .= ' WHERE parent_place_id = sin.place_id AND sin.address_rank < 30';
$sPlacexSql .= $this->oContext->excludeSQL(' AND place_id');
$sPlacexSql .= ' and housenumber ~* E'.$sHouseNumberRegex;
// Interpolations on streets and places.
$sInterpolSql = 'null';
$sTigerSql = 'null';
if (preg_match('/^[0-9]+$/', $this->sHouseNumber)) {
$sIpolHnr = 'WHERE parent_place_id = sin.place_id ';
$sIpolHnr .= ' AND startnumber is not NULL AND sin.address_rank < 30';
$sIpolHnr .= ' AND '.$this->sHouseNumber.' between startnumber and endnumber';
$sIpolHnr .= ' AND ('.$this->sHouseNumber.' - startnumber) % step = 0';
$sInterpolSql = 'SELECT array_agg(place_id) FROM location_property_osmline '.$sIpolHnr;
if (CONST_Use_US_Tiger_Data) {
$sTigerSql = 'SELECT array_agg(place_id) FROM location_property_tiger '.$sIpolHnr;
$sTigerSql .= " and sin.country_code = 'us'";
}
}
if ($this->sClass) {
$iLimit = 40;
}
$sSelfHnr = 'SELECT * FROM placex WHERE place_id = search_name.place_id';
$sSelfHnr .= ' AND housenumber ~* E'.$sHouseNumberRegex;
$aTerms[] = '(address_rank < 30 or exists('.$sSelfHnr.'))';
$sSQL = 'SELECT sin.*, ';
$sSQL .= '('.$sPlacexSql.') as placex_hnr, ';
$sSQL .= '('.$sInterpolSql.') as interpol_hnr, ';
$sSQL .= '('.$sTigerSql.') as tiger_hnr ';
$sSQL .= ' FROM (';
$sSQL .= ' SELECT place_id, address_rank, country_code,'.$sExactMatchSQL.',';
$sSQL .= ' CASE WHEN importance = 0 OR importance IS NULL';
$sSQL .= ' THEN 0.75001-(search_rank::float/40) ELSE importance END as importance';
$sSQL .= ' FROM search_name';
$sSQL .= ' WHERE '.join(' and ', $aTerms);
$sSQL .= ' ORDER BY '.join(', ', $aOrder);
$sSQL .= ' LIMIT 40000';
$sSQL .= ') as sin';
$sSQL .= ' ORDER BY address_rank = 30 desc, placex_hnr, interpol_hnr, tiger_hnr,';
$sSQL .= ' importance';
$sSQL .= ' LIMIT '.$iLimit;
} else {
if ($this->sClass) {
$iLimit = 40;
}
$sSQL = 'SELECT place_id, address_rank, '.$sExactMatchSQL;
$sSQL .= ' FROM search_name';
$sSQL .= ' WHERE '.join(' and ', $aTerms);
$sSQL .= ' ORDER BY '.join(', ', $aOrder);
$sSQL .= ' LIMIT '.$iLimit;
Debug::printSQL($sSQL);
$aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.');
foreach ($aDBResults as $aResult) {
$oResult = new Result($aResult['place_id']);
$oResult->iExactMatches = $aResult['exactmatch'];
$aResults[$aResult['place_id']] = $oResult;
}
}
return $aResults;
}
private function queryHouseNumber(&$oDB, $aRoadPlaceIDs)
{
$aResults = array();
$sPlaceIDs = Result::joinIdsByTable($aRoadPlaceIDs, Result::TABLE_PLACEX);
if (!$sPlaceIDs) {
return $aResults;
}
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$sSQL = 'SELECT place_id FROM placex ';
$sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
$sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);
// XXX should inherit the exactMatches from its parent
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
}
$aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.');
$bIsIntHouseNumber= (bool) preg_match('/[0-9]+/', $this->sHouseNumber);
$iHousenumber = intval($this->sHouseNumber);
if ($bIsIntHouseNumber && empty($aResults)) {
// if nothing found, search in the interpolation line table
$sSQL = 'SELECT distinct place_id FROM location_property_osmline';
$sSQL .= ' WHERE startnumber is not NULL';
$sSQL .= ' AND parent_place_id in ('.$sPlaceIDs.') AND (';
if ($iHousenumber % 2 == 0) {
// If housenumber is even, look for housenumber in streets
// with interpolationtype even or all.
$sSQL .= "interpolationtype='even'";
} else {
// Else look for housenumber with interpolationtype odd or all.
$sSQL .= "interpolationtype='odd'";
$aResults = array();
foreach ($aDBResults as $aResult) {
$oResult = new Result($aResult['place_id']);
$oResult->iExactMatches = $aResult['exactmatch'];
$oResult->iAddressRank = $aResult['address_rank'];
$bNeedResult = true;
if ($this->hasHousenumber() && $aResult['address_rank'] < 30) {
if ($aResult['placex_hnr']) {
foreach (explode(',', substr($aResult['placex_hnr'], 1, -1)) as $sPlaceID) {
$iPlaceID = intval($sPlaceID);
$oHnrResult = new Result($iPlaceID);
$oHnrResult->iExactMatches = $aResult['exactmatch'];
$oHnrResult->iAddressRank = 30;
$aResults[$iPlaceID] = $oHnrResult;
$bNeedResult = false;
}
}
if ($aResult['interpol_hnr']) {
foreach (explode(',', substr($aResult['interpol_hnr'], 1, -1)) as $sPlaceID) {
$iPlaceID = intval($sPlaceID);
$oHnrResult = new Result($iPlaceID, Result::TABLE_OSMLINE);
$oHnrResult->iExactMatches = $aResult['exactmatch'];
$oHnrResult->iAddressRank = 30;
$oHnrResult->iHouseNumber = intval($this->sHouseNumber);
$aResults[$iPlaceID] = $oHnrResult;
$bNeedResult = false;
}
}
if ($aResult['tiger_hnr']) {
foreach (explode(',', substr($aResult['tiger_hnr'], 1, -1)) as $sPlaceID) {
$iPlaceID = intval($sPlaceID);
$oHnrResult = new Result($iPlaceID, Result::TABLE_TIGER);
$oHnrResult->iExactMatches = $aResult['exactmatch'];
$oHnrResult->iAddressRank = 30;
$oHnrResult->iHouseNumber = intval($this->sHouseNumber);
$aResults[$iPlaceID] = $oHnrResult;
$bNeedResult = false;
}
}
if ($aResult['address_rank'] < 26) {
$oResult->iResultRank += 2;
} else {
$oResult->iResultRank++;
}
}
$sSQL .= " or interpolationtype='all') and ";
$sSQL .= $iHousenumber.'>=startnumber and ';
$sSQL .= $iHousenumber.'<=endnumber';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_OSMLINE);
$oResult->iHouseNumber = $iHousenumber;
$aResults[$iPlaceId] = $oResult;
}
}
// If nothing found try the aux fallback table
if (CONST_Use_Aux_Location_data && empty($aResults)) {
$sSQL = 'SELECT place_id FROM location_property_aux';
$sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')';
$sSQL .= " AND housenumber = '".$this->sHouseNumber."'";
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX);
}
}
// If nothing found then search in Tiger data (location_property_tiger)
if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && empty($aResults)) {
$sSQL = 'SELECT place_id FROM location_property_tiger';
$sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.') and (';
if ($iHousenumber % 2 == 0) {
$sSQL .= "interpolationtype='even'";
} else {
$sSQL .= "interpolationtype='odd'";
}
$sSQL .= " or interpolationtype='all') and ";
$sSQL .= $iHousenumber.'>=startnumber and ';
$sSQL .= $iHousenumber.'<=endnumber';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_TIGER);
$oResult->iHouseNumber = $iHousenumber;
$aResults[$iPlaceId] = $oResult;
if ($bNeedResult) {
$aResults[$aResult['place_id']] = $oResult;
}
}
@@ -883,6 +807,7 @@ class SearchDescription
$sSQL = 'SELECT geometry FROM placex';
$sSQL .= " WHERE place_id in ($sPlaceIDs)";
$sSQL .= " AND rank_search < $iMaxRank + 5";
$sSQL .= ' AND ST_Area(Box2d(geometry)) < 20';
$sSQL .= " AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')";
$sSQL .= ' ORDER BY rank_search ASC ';
$sSQL .= ' LIMIT 1';
@@ -1027,7 +952,7 @@ class SearchDescription
'Name terms (stop words)' => $this->aNameNonSearch,
'Address terms' => $this->aAddress,
'Address terms (stop words)' => $this->aAddressNonSearch,
'Address terms (full words)' => $this->aFullNameAddress,
'Address terms (full words)' => $this->aFullNameAddress ?? '',
'Special search' => $this->iOperator,
'Class' => $this->sClass,
'Type' => $this->sType,
@@ -1039,7 +964,7 @@ class SearchDescription
public function dumpAsHtmlTableRow(&$aWordIDs)
{
$kf = function ($k) use (&$aWordIDs) {
return $aWordIDs[$k];
return $aWordIDs[$k] ?? '['.$k.']';
};
echo '<tr>';

View File

@@ -0,0 +1,95 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
/**
* Description of the position of a token within a query.
*/
class SearchPosition
{
private $sPhraseType;
private $iPhrase;
private $iNumPhrases;
private $iToken;
private $iNumTokens;
public function __construct($sPhraseType, $iPhrase, $iNumPhrases)
{
$this->sPhraseType = $sPhraseType;
$this->iPhrase = $iPhrase;
$this->iNumPhrases = $iNumPhrases;
}
public function setTokenPosition($iToken, $iNumTokens)
{
$this->iToken = $iToken;
$this->iNumTokens = $iNumTokens;
}
/**
* Check if the phrase can be of the given type.
*
* @param string $sType Type of phrse requested.
*
* @return True if the phrase is untyped or of the given type.
*/
public function maybePhrase($sType)
{
return $this->sPhraseType == '' || $this->sPhraseType == $sType;
}
/**
* Check if the phrase is exactly of the given type.
*
* @param string $sType Type of phrse requested.
*
* @return True if the phrase of the given type.
*/
public function isPhrase($sType)
{
return $this->sPhraseType == $sType;
}
/**
* Return true if the token is the very first in the query.
*/
public function isFirstToken()
{
return $this->iPhrase == 0 && $this->iToken == 0;
}
/**
* Check if the token is the final one in the query.
*/
public function isLastToken()
{
return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases;
}
/**
* Check if the current token is part of the first phrase in the query.
*/
public function isFirstPhrase()
{
return $this->iPhrase == 0;
}
/**
* Get the phrase position in the query.
*/
public function getPhrase()
{
return $this->iPhrase;
}
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -7,7 +15,7 @@ class Shell
public function __construct($sBaseCmd, ...$aParams)
{
if (!$sBaseCmd) {
throw new Exception('Command missing in new() call');
throw new \Exception('Command missing in new() call');
}
$this->baseCmd = $sBaseCmd;
$this->aParams = array();
@@ -33,7 +41,9 @@ class Shell
public function addEnvPair($sKey, $sVal)
{
if (isset($sKey) && $sKey && isset($sVal)) {
if (!isset($this->aEnv)) $this->aEnv = $_ENV;
if (!isset($this->aEnv)) {
$this->aEnv = $_ENV;
}
$this->aEnv = array_merge($this->aEnv, array($sKey => $sVal), $_ENV);
}
return $this;
@@ -48,7 +58,7 @@ class Shell
return join(' ', $aEscaped);
}
public function run()
public function run($bExitOnFail = false)
{
$sCmd = $this->escapedCmd();
// $aEnv does not need escaping, proc_open seems to handle it fine
@@ -67,14 +77,16 @@ class Shell
fclose($aPipes[0]); // no stdin
$iStat = proc_close($hProc);
if ($iStat != 0 && $bExitOnFail) {
exit($iStat);
}
return $iStat;
}
private function escapeParam($sParam)
{
if (preg_match('/^-*\w+$/', $sParam)) return $sParam;
return escapeshellarg($sParam);
return (preg_match('/^-*\w+$/', $sParam)) ? $sParam : escapeshellarg($sParam);
}
}

View File

@@ -1,107 +1,80 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
/**
* Segment of a query string.
* A word list creator based on simple splitting by space.
*
* The parts of a query strings are usually separated by commas.
* Creates possible permutations of split phrases by finding all combination
* of splitting the phrase on space boundaries.
*/
class Phrase
class SimpleWordList
{
const MAX_WORDSET_LEN = 20;
const MAX_WORDSETS = 100;
// Complete phrase as a string.
private $sPhrase;
// Element type for structured searches.
private $sPhraseType;
// Space-separated words of the phrase.
// The phrase as a list of simple terms (without spaces).
private $aWords;
// Possible segmentations of the phrase.
private $aWordSets;
public static function cmpByArraylen($aA, $aB)
/**
* Create a new word list
*
* @param string sPhrase Phrase to create the word list from. The phrase is
* expected to be normalised, so that there are no
* subsequent spaces.
*/
public function __construct($sPhrase)
{
$iALen = count($aA);
$iBLen = count($aB);
if ($iALen == $iBLen) {
return 0;
if (strlen($sPhrase) > 0) {
$this->aWords = explode(' ', $sPhrase);
} else {
$this->aWords = array();
}
return ($iALen < $iBLen) ? -1 : 1;
}
public function __construct($sPhrase, $sPhraseType)
{
$this->sPhrase = trim($sPhrase);
$this->sPhraseType = $sPhraseType;
$this->aWords = explode(' ', $this->sPhrase);
}
/**
* Return the element type of the phrase.
* Get all possible tokens that are present in this word list.
*
* @return string Pharse type if the phrase comes from a structured query
* or empty string otherwise.
* @return array The list of string tokens in the word list.
*/
public function getPhraseType()
{
return $this->sPhraseType;
}
/**
* Return the array of possible segmentations of the phrase.
*
* @return string[][] Array of segmentations, each consisting of an
* array of terms.
*/
public function getWordSets()
{
return $this->aWordSets;
}
/**
* Add the tokens from this phrase to the given list of tokens.
*
* @param string[] $aTokens List of tokens to append.
*
* @return void
*/
public function addTokens(&$aTokens)
public function getTokens()
{
$aTokens = array();
$iNumWords = count($this->aWords);
for ($i = 0; $i < $iNumWords; $i++) {
$sPhrase = $this->aWords[$i];
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
$aTokens[$sPhrase] = $sPhrase;
for ($j = $i + 1; $j < $iNumWords; $j++) {
$sPhrase .= ' '.$this->aWords[$j];
$aTokens[' '.$sPhrase] = ' '.$sPhrase;
$aTokens[$sPhrase] = $sPhrase;
}
}
return $aTokens;
}
/**
* Invert the set of possible segmentations.
*
* @return void
* Compute all possible permutations of phrase splits that result in
* words which are in the token list.
*/
public function invertWordSets()
{
foreach ($this->aWordSets as $i => $aSet) {
$this->aWordSets[$i] = array_reverse($aSet);
}
}
public function computeWordSets($oTokens)
public function getWordSets($oTokens)
{
$iNumWords = count($this->aWords);
if ($iNumWords == 0) {
return null;
}
// Caches the word set for the partial phrase up to word i.
$aSetCache = array_fill(0, $iNumWords, array());
@@ -117,19 +90,19 @@ class Phrase
if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
$aPartial = array($sPartial);
foreach ($aSetCache[$j - 1] as $aSet) {
if (count($aSet) < Phrase::MAX_WORDSET_LEN) {
if (count($aSet) < SimpleWordList::MAX_WORDSET_LEN) {
$aSetCache[$i][] = array_merge($aSet, $aPartial);
}
}
if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) {
if (count($aSetCache[$i]) > 2 * SimpleWordList::MAX_WORDSETS) {
usort(
$aSetCache[$i],
array('\Nominatim\Phrase', 'cmpByArraylen')
array('\Nominatim\SimpleWordList', 'cmpByArraylen')
);
$aSetCache[$i] = array_slice(
$aSetCache[$i],
0,
Phrase::MAX_WORDSETS
SimpleWordList::MAX_WORDSETS
);
}
}
@@ -142,19 +115,30 @@ class Phrase
}
}
$this->aWordSets = $aSetCache[$iNumWords - 1];
usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen'));
$this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS);
$aWordSets = $aSetCache[$iNumWords - 1];
usort($aWordSets, array('\Nominatim\SimpleWordList', 'cmpByArraylen'));
return array_slice($aWordSets, 0, SimpleWordList::MAX_WORDSETS);
}
/**
* Custom search routine which takes two arrays. The array with the fewest
* items wins. If same number of items then the one with the longest first
* element wins.
*/
public static function cmpByArraylen($aA, $aB)
{
$iALen = count($aA);
$iBLen = count($aB);
if ($iALen == $iBLen) {
return strlen($aB[0]) <=> strlen($aA[0]);
}
return ($iALen < $iBLen) ? -1 : 1;
}
public function debugInfo()
{
return array(
'Type' => $this->sPhraseType,
'Phrase' => $this->sPhrase,
'Words' => $this->aWords,
'WordSets' => $this->aWordSets
);
return $this->aWords;
}
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

Some files were not shown because too many files have changed in this diff Show More