Compare commits

..

1227 Commits

Author SHA1 Message Date
Sarah Hoffmann
896a40b7d5 prepare release 4.4.1 2024-08-20 13:54:37 +02:00
Sarah Hoffmann
1e71085004 update CI scripts 2024-08-20 13:54:37 +02:00
Sarah Hoffmann
3c05d98b80 make sure SQLAlchemy can handle the loaded dialect
The psycopg dialect was only added in SQLAlchemy 2.0. To avoid loading
errors when SQLAlchemy 1.4 is installed together with psycopg3,
check that the dialect is really available.
2024-08-20 10:10:09 +02:00
Sarah Hoffmann
7bbdf57b08 restrict interpolation housenumbers to 0-999999
Ensures that the numbers fit into integers.
2024-08-20 09:45:01 +02:00
Markus Döring
5e91b78ff4 update Search.md, fix typo (#3454) 2024-08-20 09:43:43 +02:00
Sarah Hoffmann
ad72641424 restrict invalidation of child objects on large street features
When streets become very large, it is more likely a mapping error.
So ignore such changes.
2024-08-20 09:43:20 +02:00
marc tobias
75130e4332 docs: use nominatim-project everywhere 2024-08-20 09:41:42 +02:00
marc tobias
47b41ed510 Import documentation: reverse-only only a little faster import [skip ci] 2024-08-20 09:40:52 +02:00
marc tobias
b4e2e7de16 geocodejson: admin level output should only print boundaries 2024-08-20 09:40:21 +02:00
marc tobias
1c3ed66ca1 docs: correct URL of an internal link 2024-08-20 09:39:06 +02:00
Sarah Hoffmann
e5a5f02666 prepare release 4.4.0 2024-03-07 11:43:01 +01:00
Sarah Hoffmann
11ced26025 Merge pull request #3358 from lonvia/pg-module-for-pg16
Fix compilation of legacy module for PostgreSQL 16
2024-03-07 11:39:24 +01:00
Sarah Hoffmann
edb1eec46d actions: run legacy test against newest postgresql 16 2024-03-05 19:38:06 +01:00
Sarah Hoffmann
63eacc5589 fix compilation of PG module for PostgreSQL 16
This version requires an additional include for the macros used.
2024-03-05 16:31:02 +01:00
Sarah Hoffmann
e929693cae Merge pull request #3356 from lonvia/use-date-from-osm2pgsql-prop
Use import date from osm2pgsql property table if available
2024-03-05 15:32:16 +01:00
Sarah Hoffmann
ae7c584e28 use import date from osm2pgsql property table if available 2024-03-05 11:33:32 +01:00
Sarah Hoffmann
4d5faf9423 Merge pull request #3353 from mtmail/add-codespell
Github Actions: add codespell linter, warn only
2024-03-04 14:02:00 +01:00
marc tobias
b7eea4d53a Github Actions: add codespell linter, warn only 2024-03-04 00:22:24 +01:00
Sarah Hoffmann
dd2c794de5 Merge pull request #3350 from lonvia/improve-postcode-handling
Improve handling of postcode areas
2024-02-28 18:45:31 +01:00
Sarah Hoffmann
3b6d35fc12 Merge pull request #3349 from lonvia/remove-way-geometry-table-after-import
Drop lower-rank-ways index after import
2024-02-28 17:47:34 +01:00
Sarah Hoffmann
9fa73cfb15 improve display name for postcodes
Don't add the postcode again in the list of address details and
make sure that the result proper always comes before anything else
independently of the address rank.
2024-02-28 16:50:40 +01:00
Sarah Hoffmann
62b7670e0c for postcodes use rank_search as base rank for finding addresses
The rank_address reflects the position in the address which is
usually lower than what one would expect for a postcode area.
2024-02-28 14:40:36 +01:00
Sarah Hoffmann
d7bb449e74 drop lower-rank-ways index after import
The index becomes quite big and is only needed during import
because the full geometry import does not exist yet.
2024-02-28 14:35:56 +01:00
Sarah Hoffmann
247065ff6f Merge pull request #3342 from mtmail/tyops
Correct some typos
2024-02-28 14:25:16 +01:00
Sarah Hoffmann
9a84adef59 Merge pull request #3347 from lonvia/tweak-boundary-imports
Assorted style tweaks
2024-02-28 14:22:08 +01:00
Sarah Hoffmann
1879cf902c Merge pull request #3346 from lonvia/reduce-artificial-importance
Reduce default importance
2024-02-28 14:21:46 +01:00
Sarah Hoffmann
019a68a4bb Merge pull request #3345 from lonvia/simplify-large-geometries
Simplify very large polygons that are not used in addresses
2024-02-28 12:06:49 +01:00
Sarah Hoffmann
110491011f Merge pull request #3344 from lonvia/osm2pgsql-new-middle
Update osm2pgsql to latest 1.11.0 and add support for new middle format
2024-02-28 12:06:33 +01:00
Sarah Hoffmann
36b1660121 add support for new middle table format of osm2pgsql
Functions are adapted according to the format detected from the
osm2pgsql property table.
2024-02-27 18:18:19 +01:00
Sarah Hoffmann
56201feb28 simplify very large polygons non used in addresses
Polygons with rank_address = 0 are only used in search and (rarely)
for reverse lookup. Geometries do not need to be precise for that
because topology does not matter. OSM has some very large polygons
of natural features with sizes of more than 10MB. Simplify these
polygons to keep the database and indexes smaller.
2024-02-27 10:16:18 +01:00
Sarah Hoffmann
c6d40d4bf4 reduce importance when computed from search rank 2024-02-27 10:15:54 +01:00
Sarah Hoffmann
a4f2e6a893 do not send outdated parameters to osm2pgsql flex 2024-02-27 10:15:36 +01:00
Sarah Hoffmann
b427fc7965 update osm2pgsql to 1.11.0 2024-02-27 10:15:36 +01:00
Sarah Hoffmann
e264604894 drop more railway tags 2024-02-27 10:15:08 +01:00
Sarah Hoffmann
3a5d9f0377 drop amenity=parking_space/entrance objects
Parking is sufficiently covered with amenity=parking.
2024-02-27 10:15:08 +01:00
Sarah Hoffmann
8be27015b2 drop boundary=land_area
Usually a version of administrative boundaries without ocean area.
Resulting polygons are pretty larrge and having the boundaries
should be enough.
2024-02-27 10:15:08 +01:00
Sarah Hoffmann
100391fb8e import leisure=natural_reserve as fallback only
About half of the natural reserves have a double tagging with
boundary=protected_area. Avoid importing these objects twice.
2024-02-27 10:15:08 +01:00
Sarah Hoffmann
dc1baaa0af prefer min() function over if construct
Fixes a linter complaint.
2024-02-27 09:26:50 +01:00
marc tobias
7205491b84 Correct some typos 2024-02-26 18:13:30 +01:00
Sarah Hoffmann
918fec73c6 Merge pull request #3341 from mtmail/remove-php-faq-entries
PHP related FAQ entries are no longer needed
2024-02-21 08:56:18 +01:00
marc tobias
b6df486525 PHP related FAQ entries are no longer needed 2024-02-20 18:55:02 +01:00
Sarah Hoffmann
8bd8a040e0 Merge pull request #3340 from lonvia/fix-lua-liniting-issues
Fix some issues in the style files found by luacheck
2024-02-20 12:00:01 +01:00
Sarah Hoffmann
781e83ddc3 fix issues found by luacheck
The variable shadowing causes bad results when used with LuaJIT.
2024-02-20 10:43:51 +01:00
Sarah Hoffmann
5afd96d210 Merge pull request #3339 from lonvia/python-frontend-as-default
Switch to Python frontend as the default
2024-02-20 10:17:21 +01:00
Sarah Hoffmann
cf49a070fd switch Ubuntu installation scripts to Python frontend 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
4aba36c5ac API debug: properly escape non-highlighted code 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
ca6e65fff1 bdd: be more verbose on HTML parsing error 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
1e0025b095 also switch unit tests for cli 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
173e85c9e6 actions: make php the legacy tests 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
ffb467028e update documentation to recommend Python frontend 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
05fad607ff make Python frontend default and PHP optional 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
19360a9552 Merge pull request #3338 from lonvia/remove-nested-cli-groups
Add documentation for importing without superuser rights
2024-02-16 19:27:25 +01:00
Sarah Hoffmann
b087f3ab7b actions: switch no-superuser test to PostgreSQL 16
The new premission restrictions on the public schema took only
place in PG15, so the previsouly used PG14 does not show any issues.
2024-02-16 17:14:47 +01:00
Sarah Hoffmann
2c8fb31381 add documentation for non-superuser import 2024-02-16 17:14:47 +01:00
Sarah Hoffmann
b2d3f0a8b3 remove unnecessary nested group in CLI import command 2024-02-16 11:32:50 +01:00
Sarah Hoffmann
bd8025feab Merge pull request #3333 from lonvia/set-empty-extratags-to-null
Extratags should become null when empty
2024-02-08 14:26:49 +01:00
Sarah Hoffmann
4c19762e33 extratags should become null when empty
Removing the artifical entries in the extratags may lead to
an empty hstore. Set it to null in that case.

Fixes #3055.
2024-02-08 10:21:48 +01:00
Sarah Hoffmann
1015ac40ae Merge pull request #3332 from lonvia/improve-cutting-of-result-list
Prefilter bad results before adding details and reranking
2024-02-07 10:50:32 +01:00
Sarah Hoffmann
4ce13f5c1f prefilter bad results before adding details and reranking
Move the first cutting of the result list before reranking
by result match. This means that results with significantly
less importance are removed early and independently of the
fact how well they match the original query.

Fixes #3266.
2024-02-06 20:29:48 +01:00
Sarah Hoffmann
2833362cf6 Merge pull request #3331 from lonvia/fix-word-table-rights
Properly grant rights to read-only user when switching out word table
2024-02-06 20:21:25 +01:00
Sarah Hoffmann
bc51378aee properly grant rights to read-only user when switching out word table 2024-02-06 17:30:01 +01:00
Sarah Hoffmann
39039e2a55 docs: mark layer parameter as python-only 2024-02-06 15:59:04 +01:00
Sarah Hoffmann
f523c01571 Merge pull request #3328 from lonvia/word-count-into-new-table
Recreate word table when refreshing counts
2024-02-05 11:58:11 +01:00
Sarah Hoffmann
81eed0680c recreate word table when refreshing counts
The counting touches a large part of the word table, leaving
bloated tables and indexes. Thus recreate the table instead and
swap it in.
2024-02-04 21:35:10 +01:00
Sarah Hoffmann
33c0f249b1 avoid LookupAny with address and too many name tokens
The index for nameaddress_vector has grown so large that PostgreSQL
will resort to a sequential scan if there are too many items
in the LookupAny list.
2024-01-29 16:52:14 +01:00
Sarah Hoffmann
76eadc562c print any collected debug output when returning a timeout error 2024-01-28 22:30:34 +01:00
Sarah Hoffmann
3cc3e3b2e3 Merge pull request #3321 from lonvia/remove-duplicate-partials
Improvements to query parsing
2024-01-28 20:32:58 +01:00
Sarah Hoffmann
f07f8530a8 housenumber-only searches cannot be combined with qualifiers 2024-01-28 19:03:11 +01:00
Sarah Hoffmann
103800a732 adjust rankings for housenumber-only searches
A normal address search with housenumber will use name rankings for
the street name. This is slightly different than weighing for
address parts. Use the same ranking for the first part of the
address for housenumber-only searches to make sure that penalties
remain comparable.
2024-01-28 19:03:11 +01:00
Sarah Hoffmann
f9ba7a465a always add a penalty for name + address search fallback
If there already was a search by full names, the search is likely
a repeatition that yields the same results, only running slower.
2024-01-28 19:03:11 +01:00
Sarah Hoffmann
fed46240d5 disallow category tokens in the middle of a query string
This already worked for left-to-right readings and now is also
implemented for right-to-left reading. A qualifier must always be
before or after the name.
2024-01-28 19:03:11 +01:00
Sarah Hoffmann
2703442fd2 protect against very frequent bad partials 2024-01-28 19:03:11 +01:00
Sarah Hoffmann
2813bf18e6 avoid duplicates in the list of partial tokens for a query
This messes with the estimates for expected results.
2024-01-28 19:03:11 +01:00
Sarah Hoffmann
dcebea376d Merge pull request #3320 from lonvia/fix-timeout-return-code
Fix returned HTTP error when query runs too long
2024-01-28 10:37:37 +01:00
Sarah Hoffmann
b3a2b3d484 catch special async timeout error in servers
In Python <= 3.10 this is not yet the same as TimeoutError.

Fixes #3303.
2024-01-27 20:57:23 +01:00
Sarah Hoffmann
7321e66d08 Merge pull request #3317 from lonvia/postcodes-for-highway-areas
Search postcodes for highway areas around the area
2024-01-26 19:51:02 +01:00
Sarah Hoffmann
9627352ee4 search postcodes for highway areas around the area
So far the code would only accept postcodes that are inside the area.

Fixes #3304.
2024-01-26 18:14:11 +01:00
Sarah Hoffmann
bfc7acbb18 Merge pull request #3301 from lonvia/fix-class-search-regression
Interpret stand-alone special terms always as near term
2024-01-17 10:47:35 +01:00
Sarah Hoffmann
e0ca2ce6ec interpret stand-alone special terms always as near term
Fixes #3298.
2024-01-16 17:19:21 +01:00
Sarah Hoffmann
b969c5a62f Merge pull request #3293 from lonvia/rematch-against-country-code
Add country code to words to be rematched
2024-01-08 18:48:32 +01:00
Sarah Hoffmann
28f7e51279 add country code to words to be rematched 2024-01-08 12:23:23 +01:00
Sarah Hoffmann
d35eb4105e Merge pull request #3292 from lonvia/faster-country-search
Speed up country search
2024-01-07 20:42:20 +01:00
Sarah Hoffmann
b2afe3ce3e when a country is in the results, restrict further searches to places
A country search result usually comes with a very high importance.
As a result only other very well known places will show up together
with country results and that means only places with lower address
ranks. Name searches for country names tend to yield a lot of POI
results because the country name is part of the name
(think "embassy of Sweden"). By excluding POIs from further searches,
the search is sped up quite a bit.
2024-01-07 17:29:12 +01:00
Sarah Hoffmann
7337898b84 dump params in log view 2024-01-07 15:37:53 +01:00
Sarah Hoffmann
4305160c91 prioritize country searches when penaly is equal 2024-01-07 15:28:37 +01:00
Sarah Hoffmann
dc52d0954e Merge pull request #3238 from mtmail/check-database-for-version-match
admin --check-database also checks database vs nominatim version
2024-01-07 15:24:00 +01:00
Sarah Hoffmann
d3a575319f Merge pull request #3289 from lonvia/viewbox-and-housenumbers
Do not restrict by viewbox when housenumber or postcode is available
2024-01-07 15:23:14 +01:00
Sarah Hoffmann
2592bf1954 Merge pull request #3290 from lonvia/near-vs-quaifier-words
Do not run near queries on qualifier words
2024-01-07 15:23:00 +01:00
Sarah Hoffmann
88d7ffa274 Merge pull request #3291 from lonvia/fix-timezone-handling
Fix timezone handling for timestamps from the database
2024-01-07 15:22:42 +01:00
Sarah Hoffmann
474d4230b8 fix timezone handling for timestamps from the database
SQLite is not timezone-aware, so make sure to convert to UTC
before inserting any data.
2024-01-07 11:37:40 +01:00
Sarah Hoffmann
10a5424a71 do not run near queries on qualifier words
There is too much potential for confusion (e.g. 'Rio Grande' read
as 'river near Grande') fir too little gain. Use near phrases
instead.
2024-01-07 11:33:11 +01:00
Sarah Hoffmann
7eb04f67e2 do not restrict by viewbox when housenumber or postcode is available
Fixes #3274.
2024-01-07 11:29:26 +01:00
Marc Tobias
1d7e078a2c check-database also checks database vs nominatim version 2024-01-06 20:56:56 +01:00
Sarah Hoffmann
f03ec3ea12 Merge pull request #3286 from lonvia/avoid-bind-parameters-in-lambdas
Avoid closure variables in lambda statements
2024-01-05 21:24:48 +01:00
Sarah Hoffmann
8e90fa3395 avoid closure variables in lambda statements
There is a bug in SQLAlchemy that assigns the wrong value to bind
parameters from closure variables when reusing lambda statements
that are later extended with other non-lambda expressions.

Thus either avoid lambda statements with closure variables or extending
them with non-lambda expressions.
2024-01-05 17:49:28 +01:00
Sarah Hoffmann
02af0a2c87 use correct SQLAlchemy pool for asynchronous connections
See https://github.com/sqlalchemy/sqlalchemy/issues/8771
2024-01-02 16:15:44 +01:00
Sarah Hoffmann
fa4e5513d1 API: avoid engine disposal on startup 2024-01-02 16:10:30 +01:00
Sarah Hoffmann
93afe5a7c3 update typing for latest changes in SQLAlchemy 2023-12-29 20:55:33 +01:00
Sarah Hoffmann
af85ad390f Merge pull request #3273 from lonvia/search-with-sqlite
Add forward search capability for SQLite databases
2023-12-12 12:15:22 +01:00
Sarah Hoffmann
ab45db5360 add minimal documentation for the SQLite usage 2023-12-09 16:30:31 +01:00
Sarah Hoffmann
89094cf92e error out when a SQLite database does not exist
Requires to mark the databse r/w when it is newly created in the
convert function.
2023-12-07 10:24:53 +01:00
Sarah Hoffmann
3f5484f48f enable search for sqlite conversion by default 2023-12-07 09:33:42 +01:00
Sarah Hoffmann
ff06b64329 enable all BDD API tests for sqlite 2023-12-07 09:32:02 +01:00
Sarah Hoffmann
6d39563b87 enable all API tests for sqlite and port missing features 2023-12-07 09:32:02 +01:00
Sarah Hoffmann
0d840c8d4e extend sqlite converter for search tables 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
381bd0b576 remove unused function 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
b5c61e0b5b improve typing for @compiles constructs
The first parameter is in fact the self parameter referring to
the function class.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
df6eddebcd void unnecessary aliases 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
b6c8c0e72b factor out SQL for filtering by location
Also improves on the decision if an indexed is used or not.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
b06f5fddcb simplify handling of SQL lookup code for search_name
Use function classes which can be instantiated directly.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
8791c6cb69 correctly close API objects during testing 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
615b166c68 clean up ST_DWithin and intersects() functions
A non-index version of ST_DWithin is not necessary. ST_Distance
can be used for that purpose. Index use for intersects can be
covered with a simple parameter.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
c41f2fed21 simplify weigh_search() function
Use JSON arrays which can have mixed types and therefore have
a more logical structure than separate arrays. Avoid JSON dicts
because of their verboseness.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
05e47fbb28 fix parameter formatting in sqlite debug output 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
1b7c8240ba enable connection pools for sqlite
Connecting is reasonably expensive because the spatialite extension
needs to be loaded. Disable pooling for tests because there is some
memory leak when quickly opening and closing QueuePools with sqlite
connections.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
c4fd3ab97f hide type differences between Postgres and Sqlite in custom types
Also define a custom set of operators in preparation of differences
in implementation.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
8c7140d92b Merge pull request #3108 from mtmail/remove-legacy-wikipedia-tag-syntax
These days the OSM wikipedia tab no longer contains URLs
2023-12-07 09:24:32 +01:00
Sarah Hoffmann
3969ce0f55 Merge pull request #3268 from mtmail/wikipedia-file-path-warning
Improve error message when Wikipedia importance file is not found
2023-12-07 09:21:59 +01:00
Robbe Haesendonck
4f5f5ea8fc Removed unnecessary check for --prepare-database flag 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
5f7cc91cf9 Connect using localhost instead of socket 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
424c1f0d41 Added changing permissions of nominatim-project dir 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
cff05394a1 Added osm-import to sudoers file for tokenizer setup 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
638b40c3ec Fixing CI tests for install-no-superuser 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
53d2050dc5 Fixed typechecking error 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
97ac036df5 Added missing return types to functions 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
482f7fe3ba CI tests: Removed creation of user www-data.
Removed creation of user www-data as it already exists
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
567c31ab6a Fixed legacy import command
Check whether prepare-database is true, if so exit early
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
7d28fc35d1 Disabled pylint too-many-branches 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
c06f902398 Fixed setting tokenizer property 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
59ae63e6f5 Updated no-superuser install ci test
Added nominatim_database_webuser (www-data).
Set non-superuser password for importing
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
9c7d947fd1 Updated check to see if osm_file is set 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
58db0ad6d8 Fixed formatting 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
3b09c39dbf Updated ci tests to reflect changes in Nominatim CLI 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
db917cb0d4 Made sure legacy import command still works 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
ba6cdd875d Removed unused variable, fixed connection 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
d231ff60ed Removed _is_complete_import check 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
c74904d075 Improved logic.
Fixed small oversight in mutually exclusiveness of arguments
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
22204050f2 Added new psql user for importing the data 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
667197a47e Changed naming of flags.
Made sure legacy import is also still ok
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
e8b866aa88 Added check to see if hstore is loaded 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
e7b8e1a2c2 Fixed ci-tests, osm-file flag
Preparing database should work without osm-file
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
279b4fd6d2 Renamed flags 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
b7c83d3580 Fixed pylint warnings 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
d4018f2e3b Added check for hstore extension 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
38369ca3cf Fixed typo 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
cc0bdd34e9 Fixed linting errors 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
8e71ff329c Added version check for PostGis and Postgres 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
b4e3d0ea44 Updated CI tests to check new import flags 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
992703b15e Added --prepare-database flag 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
ba5ec80611 Added --no-superuser flag
To allow import into already existing databases.
Refs: #2719
2023-12-07 09:04:33 +01:00
marc tobias
1c1447e709 Improve error message when Wikipedia importance file is not found 2023-12-06 14:31:07 +01:00
Sarah Hoffmann
3c32c0354a actions: pin pytest-asyncio version for Ubuntu 20
Newest versions are no longer compatible with older pytest.
2023-12-04 14:32:03 +01:00
Sarah Hoffmann
8a2c6067a2 skip lookup with full names when there are none 2023-12-01 12:11:58 +01:00
Sarah Hoffmann
d60a45715a Merge pull request #3263 from lonvia/near-search-penalty-inheritance
More tweaks for Python frontend search
2023-11-29 14:02:24 +01:00
Sarah Hoffmann
3c7a28dab0 further restrict stop search criterion 2023-11-29 11:28:54 +01:00
Sarah Hoffmann
0c72a434e0 use restrict for housenumber lookups with few numbers 2023-11-29 11:28:54 +01:00
Sarah Hoffmann
32e7b59b1f NearSearch needs to inherit penalty from inner search 2023-11-29 11:28:52 +01:00
Sarah Hoffmann
f448423727 Merge pull request #3262 from lonvia/fix-category-search
Fix use of category parameter for search() function
2023-11-28 20:02:36 +01:00
Sarah Hoffmann
b2319e52ff correctly exclude streets with housenumber searches
Street result are not subject to the full filtering in the SQL
query, so recheck.
2023-11-28 17:53:37 +01:00
Sarah Hoffmann
25279d009a add tests for interaction of category parameter with category terms 2023-11-28 16:56:08 +01:00
Sarah Hoffmann
3f72ca4bca rename use of category as POI search to near_item
Use the term category only as a short-cut for "tuple of key and value".
2023-11-28 16:27:05 +01:00
Sarah Hoffmann
70dc4957dc the category parameter in search should result in a qualifier 2023-11-28 12:01:49 +01:00
Sarah Hoffmann
d8ed565bce Merge pull request #3260 from lonvia/improve-catgeory-search
Various improvements to search with special phrases for Python frontend
2023-11-27 12:02:11 +01:00
Sarah Hoffmann
a7f5c6c8f5 drop category tokens when they make up a full phrase 2023-11-26 20:58:50 +01:00
Sarah Hoffmann
a8b023e57e restrict base results in near search by rank
This avoids in particular that roads or POIs are used as base
for the near search when a place result is present.
2023-11-26 17:41:29 +01:00
Sarah Hoffmann
47ca56f21b deduplicate categories/qualifiers 2023-11-26 17:11:15 +01:00
Sarah Hoffmann
580a7b032f order near searches by distance instead of importance 2023-11-26 16:48:04 +01:00
Sarah Hoffmann
8fcc2bb7f5 avoid duplicate lines during category search 2023-11-26 14:53:20 +01:00
Sarah Hoffmann
d6fe58f84e fix polygon selection for classtable lookups
Polygons should be used preferably with higher address ranks
where the areas are smaller.
2023-11-25 21:01:27 +01:00
Sarah Hoffmann
2d54de09bb Merge pull request #3257 from lonvia/slow-queries
Performance tweaks for search
2023-11-23 12:05:50 +01:00
Sarah Hoffmann
4e4d29f653 increase penalty for one-letter words 2023-11-23 10:51:58 +01:00
Sarah Hoffmann
195c13ee8a more preference for name-only queries in search 2023-11-22 23:57:23 +01:00
Sarah Hoffmann
ac5ef64701 avoid index use when filtering by layer 2023-11-22 20:54:04 +01:00
Sarah Hoffmann
e7dc24c026 add timestamps to text logging 2023-11-22 17:38:32 +01:00
Sarah Hoffmann
155f26060d avoid index on rank_address in near search 2023-11-22 17:33:17 +01:00
Sarah Hoffmann
a87fe8d8bf exclude country-level searches with non-address layers 2023-11-22 17:01:41 +01:00
Sarah Hoffmann
158df6b2e8 Merge pull request #3252 from kumarUjjawal/patch-1
minor typo fix
2023-11-22 12:51:37 +01:00
Kumar Ujjawal
b8db76c925 minor typo fix
fixed a minor typo, from 'wit' to 'with'.
2023-11-20 17:23:42 +05:30
Sarah Hoffmann
fffdfc9b88 add report on newest vulnerability 2023-11-20 10:44:48 +01:00
Sarah Hoffmann
6478409b05 improve code to collect the PostGIS version
The SQL contained an unchecked string literal, which may in theory be
used to attack the database.
2023-11-16 11:14:29 +01:00
Sarah Hoffmann
ee556fd42e Merge pull request #3248 from lonvia/fix-postcode-area-computation
PHP frontent: fix postcode area computation for address points
2023-11-15 22:10:21 +01:00
Sarah Hoffmann
9a1b8a67d6 adapt typing to newest version of SQLAlchemy 2023-11-15 19:59:26 +01:00
Sarah Hoffmann
383e3ccd25 php frontend: fix on-the-fly lookup of postcode areas 2023-11-15 17:45:12 +01:00
Sarah Hoffmann
b4ce1fb599 remove now unnecessary type igonre comment 2023-11-01 11:57:57 +01:00
Sarah Hoffmann
2bf8e62580 fix assertion on address list, it may be empty
Fixes #3237.
2023-10-31 21:10:54 +01:00
Sarah Hoffmann
afb439b089 Merge pull request #3235 from lonvia/fix-python-deploy
Python frontend deployment: add systemd service for the socket
2023-10-25 14:54:13 +02:00
Sarah Hoffmann
78a87ad16b python deployment: add systemd service for the socket 2023-10-25 11:31:42 +02:00
Sarah Hoffmann
5bf55a69a5 Merge pull request #3234 from lonvia/reduce-admin-style
Streamline admin style
2023-10-25 09:38:17 +02:00
Sarah Hoffmann
ca782e2f20 Merge pull request #3233 from lonvia/support-for-sqlite
Add support for SQLite DBs in frontend: reverse
2023-10-24 11:54:35 +02:00
Sarah Hoffmann
308de35802 further reduce admin style import to admin-only objects 2023-10-24 09:50:43 +02:00
Sarah Hoffmann
a9ac68a729 restrict geometry size for SQLite 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
84d6b481ae convert sqlite: add index on parent_place_id 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
613c8635a8 remove type info from SQLALchemy condition functions
A boolean type makes the SQLite dialect produce a costruct like
'func() = 1' in WHERE condition. While syntactically correct, it tends
to confuse the query planer.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
899a04ad26 make code work with Spatialite 4.3
Transfer is_address_point into SQLAlchemy function, so that
json.has_key() can use the older json_extract() function.
And work around broken Distance function.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
d8dca2a3a9 enable BDD tests for sqlite databases
The database must currently be created by hand and the name handed
in via -DAPI_TEST_DB='sqlite:...'.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
8216899a9a trim all coordinate output to 7 digits 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
b1d419f458 add indexing support for DWithin and intersects for sqlite 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
0417946153 make reverse API work with sqlite 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
1149578e8f actions: enable sqlite testing 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
37488ee82b try future annotations to resolve Generics compatibility 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
06bbd501fd make status API work with sqlite incl. unit tests 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
07e6c5cf69 make details API work with sqlite incl. unit tests 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
d0c91e4acf make lookup call work with sqlite
Includes porting unit tests.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
114cdafe7e add exporting of SQLite table 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
837bdecde8 add skeleton code for convert function 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
d9d0e70e5b streamline SQLAlchemy DB schema
Remove all information not strictly used by the frontend as well as
any index information. This will make it easier to create a SQLite
database from the schema.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
1255efba7f remove unused code 2023-10-23 11:55:18 +02:00
Sarah Hoffmann
6ad397d4a9 Merge branch 'clean-deleted-relations' of https://github.com/lujoh/Nominatim into lujoh-clean-deleted-relations 2023-10-23 10:47:31 +02:00
Sarah Hoffmann
570ca22d71 Merge pull request #3228 from pawel-wroniszewski/fix/postcode-validation
Properly validate postcodes with country code
2023-10-23 10:35:42 +02:00
lujoh
418f381b49 made age a required argument for the -clean-deleted command 2023-10-20 15:31:55 -04:00
Paweł Wroniszewski
2cae37ccde Revert country settings 2023-10-20 12:50:28 +02:00
lujoh
650fbc2563 added --clean deleted command to the documentation 2023-10-18 02:05:46 -04:00
lujoh
9ec26c60ff adjusted tests for --clean-deleted-relations command 2023-10-17 23:03:37 -04:00
lujoh
06204dfcd8 moved sql function flush_deleted_places() to utils 2023-10-17 18:22:27 -04:00
Paweł Wroniszewski
fbe40e005d Properly validate postcodes with country code
Include postcode pattern in postcode normalisation regex, instead of
removing it from postcode pattern in config.

It properly handles postcode validation and normalization when country code
is part of the postcode, e.g. for Isle of Man, Jersey, Anguilla, Andorra,
Cayman Islands and more.

Fixes #3227.
2023-10-17 01:04:07 +02:00
lujoh
e9efef9095 added subcommand to clean deleted relations for issue # 2444 2023-10-16 11:30:58 -04:00
Sarah Hoffmann
95c3181a35 adapt typing for newer version of mypy 2023-10-16 17:03:48 +02:00
Sarah Hoffmann
12dbfb0777 Merge pull request #3222 from lonvia/fix-river-output
Make sure the place name always comes first in output
2023-10-10 11:20:12 +02:00
Sarah Hoffmann
b62dbd1f92 reduce influence of viewbox
Perfectly matching city names should still get priority.
2023-10-07 22:00:52 +02:00
Sarah Hoffmann
5011fde176 make sure the place name always comes first in output
Also deleted some now unused code.
2023-10-05 16:37:28 +02:00
Sarah Hoffmann
54cb9a33b1 Merge pull request #3212 from lonvia/more-tests
Add more unit tests for search
2023-10-05 16:36:53 +02:00
Sarah Hoffmann
f1fbcd863d switch back meaning of reverse and search in warm
Also do'n try to warm up searches on reverse-only databases.

Fixes #3213.
2023-10-04 15:19:24 +02:00
Sarah Hoffmann
b00b16aa3a more unit tests for search 2023-09-27 15:00:05 +02:00
Sarah Hoffmann
0f19695225 Merge pull request #3211 from lonvia/replace-get-addressdata
Move get_addressdata() implementation to Python
2023-09-26 21:29:15 +02:00
Sarah Hoffmann
7fcbe13669 move get_addressdata() implementation to Python
The pgsql function get_addressdata() does a lookup of a lot of data
that is already available in Python.
2023-09-26 11:21:36 +02:00
Sarah Hoffmann
87c91ec5c4 Merge pull request #3206 from lonvia/rerank-by-locale
Reintroduce result reranking
2023-09-21 09:24:18 +02:00
Sarah Hoffmann
0e10916b07 adapt tests to improved search 2023-09-20 14:58:54 +02:00
Sarah Hoffmann
21df87dedc filter duplicate results after DB query 2023-09-20 14:58:54 +02:00
Sarah Hoffmann
fd26310d6a rerank results by query
The algorithm is similar to the PHP reranking and uses the terms from
the display name to check against the query terms. However instead of
exact matching it uses a per-word-edit-distance, so that it is less
strict when it comes to mismatching accents or other one letter
differences.

Country names get a higher penalty because they don't receive a
penalty during token matching right now.

This will work badly with the legacy tokenizer. Given that it is
marked for removal, it is simply not worth optimising for it.
2023-09-20 14:52:05 +02:00
Sarah Hoffmann
5762a5bc80 move localization into add_result_details
This means that the locale now needs to be handed in into the search
functions already. At least search needs them for reranking.
2023-09-19 11:17:04 +02:00
Sarah Hoffmann
8106e67f14 Merge pull request #3201 from lonvia/tweak-expected-count
Reduce expected count for multi-part words
2023-09-17 17:04:27 +02:00
Sarah Hoffmann
f029fb3c65 'fix' issues with recent changes to psycopg2 typing
Some of the changes just make the warnings go away. The typing info
is still incorrect on the stub side, as far as I can determine.
2023-09-17 15:09:34 +02:00
Sarah Hoffmann
44da684d1d reduce expected count for multi-part words
Fixes #3196.
2023-09-11 17:45:34 +02:00
Sarah Hoffmann
64c1a4fc8c add changelog for 4.2.3 2023-09-07 10:07:16 +02:00
Sarah Hoffmann
ec47459410 prepare release 4.3.0 2023-09-06 20:08:28 +02:00
Sarah Hoffmann
c55c3657c3 reenable logging of details calls 2023-09-06 11:35:53 +02:00
Sarah Hoffmann
8b56b55761 restrict deduplication to results from placex
All other sources do not have deduplicatable entries.
2023-09-06 10:43:55 +02:00
Sarah Hoffmann
9056c9276f add bbox output to lookup results
Fixes #3149.
2023-09-06 10:27:03 +02:00
marc tobias
09ae312f09 documentation: fix a broken link, typos 2023-09-05 17:06:32 +02:00
Sarah Hoffmann
d6960c72e4 Merge pull request #3190 from lonvia/fix-internal-server-errors
Fix more failing queries
2023-09-05 17:00:31 +02:00
Sarah Hoffmann
b529e054cf Merge pull request #3189 from lonvia/add-country-area-restriction
Implement NOMINATIM_SEARCH_WITHIN_COUNTRIES for Python frontend
2023-09-05 14:29:44 +02:00
Sarah Hoffmann
b4a4ca81d1 actions: update to checkout v4 2023-09-05 11:44:12 +02:00
Sarah Hoffmann
c284df2dc9 restrict range for interpolated housenumbers
Interpolations are only supported up to 2^32 by the database.
Limit to 8 digits, which is still more than should be needed.
2023-09-05 11:41:41 +02:00
Sarah Hoffmann
18b2a4c204 avoid interpreting integral numbers as coordinates
There are addresses of type '123 W 345 N, Reigh', where 'W 345 N' is
the actual name of the street.
2023-09-05 11:26:47 +02:00
Sarah Hoffmann
e1303fb592 Merge pull request #3188 from lonvia/update-osm2pgsql
Update osm2pgsql to 1.9.2
2023-09-05 10:44:32 +02:00
Sarah Hoffmann
bd25cf04ed update osm2pgsql to 1.9.2 2023-09-04 14:53:46 +02:00
Sarah Hoffmann
ce1f4cbbdc allow lower case OSM types in lookup query 2023-09-04 14:39:35 +02:00
Sarah Hoffmann
e0aea0f27a Merge pull request #3186 from lonvia/tablespaces-for-flex
Make tablespace arguments work again for flex tables
2023-09-04 14:25:45 +02:00
Sarah Hoffmann
9848c4c56c implement NOMINATIM_SEARCH_WITHIN_COUNTRIES setting 2023-09-04 14:10:36 +02:00
Sarah Hoffmann
cb8149f8ea reenable tablespace arguments for flex tables 2023-09-04 10:45:52 +02:00
Sarah Hoffmann
0e74e82a38 add a shutdown handler for falcon server closing DB connections 2023-08-31 09:40:56 +02:00
Sarah Hoffmann
15e09f2b24 remove alias where it does not work with lambdas
Fixes #3177.
2023-08-30 21:55:34 +02:00
Sarah Hoffmann
2de8256863 fix parameter name for layer filter 2023-08-30 09:57:46 +02:00
Sarah Hoffmann
aff43fb1a3 Merge pull request #3178 from lonvia/library-documentation
Update documentation for new Python frontend
2023-08-30 09:17:24 +02:00
Sarah Hoffmann
cafd8e2b1e fix typos and grammar issues 2023-08-29 12:14:44 +02:00
Sarah Hoffmann
3794080327 rename documentation to 'Nominatim Manual' 2023-08-28 10:59:26 +02:00
Sarah Hoffmann
4e2683f068 adapt installation instruction for different frontends 2023-08-28 10:48:34 +02:00
Sarah Hoffmann
6e5f595d48 update API documentation 2023-08-28 10:31:58 +02:00
Sarah Hoffmann
2c24ba6d2d add deployment documentation for Python frontend 2023-08-27 21:27:16 +02:00
Sarah Hoffmann
3bb27fbee6 update documentation for configuration 2023-08-27 16:08:06 +02:00
Sarah Hoffmann
ef1b52eee5 add getting started section for library docs 2023-08-27 14:42:04 +02:00
Sarah Hoffmann
f917fa67aa switch to material theme
Needed for tabbed code blocks.
2023-08-25 22:47:07 +02:00
Sarah Hoffmann
386b4c82da preliminary library reference finished 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
a987f22cfb more library documentation 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
558c42ec83 finish configuration section 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
309ac46b98 restructure library documentation 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
399b04596e initial configuration documentation 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
75513a23a8 docs: documentation of API classes 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
e672de036e more adaptions for mkdocstrings griffin plugin 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
6533af6a91 add base framework for library reference 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
d3372e69ec update to modern mkdocstrings python handler 2023-08-25 21:40:20 +02:00
Sarah Hoffmann
f3809a52e8 Merge pull request #3174 from lonvia/osm2pgsql-update-fix
Be more precise on what is deleted on updates
2023-08-25 21:26:23 +02:00
Sarah Hoffmann
c5f5ab5363 be more strict about removal from place_to_be_deleted
If the type of a place is changed and then the same insert issued
again, the old data will effectively remain in the table.

Fixed #3168.
2023-08-25 14:22:49 +02:00
Sarah Hoffmann
26dfb868e9 add test case for bad update 2023-08-25 14:22:49 +02:00
Sarah Hoffmann
d5b6042118 Merge pull request #3172 from lonvia/query-timeout
Introduce timeouts for queries
2023-08-25 10:00:22 +02:00
Sarah Hoffmann
1115705cbc add additional timeout for entire request 2023-08-25 09:16:53 +02:00
Sarah Hoffmann
161d17d85b use backwards-compatible asyncio timeout implementation 2023-08-25 08:50:03 +02:00
Sarah Hoffmann
5a2ebfcd4a translate query timeouts into proper HTTP responses
Need to use a 503 here because a 408 (Request timeout) will motivate
browsers to immediately resent the request.
2023-08-25 08:50:03 +02:00
Sarah Hoffmann
06a974df36 add a timeout for DB queries 2023-08-25 08:50:03 +02:00
Sarah Hoffmann
2762c45569 apply adjusted counts only to final result 2023-08-24 21:37:02 +02:00
Sarah Hoffmann
fd85483ce3 Merge pull request #3169 from lonvia/tweak-search-with-frequent-names
Further tweak detection of queries that would return a massive amount of results
2023-08-24 14:22:35 +02:00
Sarah Hoffmann
0a2d0c3b5c allow terms with frequent searches together with viewbox 2023-08-24 09:21:09 +02:00
Sarah Hoffmann
de7f9a4bd9 Merge pull request #3167 from lonvia/explicit-encoding
Send explicit charset in content-type when returning json
2023-08-24 08:53:11 +02:00
Sarah Hoffmann
c5836c8090 Merge pull request #3139 from mtmail/update-search-examples
Search.md - update XML and JSON example output
2023-08-23 23:41:30 +02:00
Sarah Hoffmann
dcdda314e2 further tweak search containing very frequent tokens
Excluding non-rare full names is not really possible because it makes
addresses with street names like 'main st' unsearchable. This tries to
leav all names in but refrain from ordering results by accuracy
when too many results are expected. This means that the DB will simply
get the first n results without any particular order.
2023-08-23 23:04:12 +02:00
Sarah Hoffmann
a9edd57fe2 send charset again in content-type when returning json
There are quite a few applications out there that will use some local
encoding when the charset is not explicitly given.
2023-08-23 20:55:57 +02:00
Sarah Hoffmann
cbd9fad94b Merge pull request #3165 from lonvia/update-osm2pgsql
Update osm2pgsql to 1.9.1
2023-08-23 15:57:55 +02:00
Sarah Hoffmann
bc1009f8c2 vagrant scripts: add missing yaml library 2023-08-23 08:34:12 +02:00
Sarah Hoffmann
719b66e5ed add new required json library for osm2pgsql 2023-08-23 00:24:31 +02:00
Sarah Hoffmann
5f09ba4e10 update osm2pgsql to 1.9.1 2023-08-23 00:24:31 +02:00
Sarah Hoffmann
517a0cb673 Merge pull request #3159 from lonvia/fix-name-lookup-for-housenames
Fix name lookup for housenames
2023-08-20 15:52:20 +02:00
Sarah Hoffmann
7c79b07817 fix parameter use for ST_Project
Before postgis 3.4 ST_Project required a geography as input and seemed
to have implicitly converted to geography. Since 3.4 geometry input
is supported but leads to a completely different result.
2023-08-20 13:52:03 +02:00
Sarah Hoffmann
23eed4ff2f fix tag name for housename addresses in layer selection
Fixes #3156.
2023-08-19 15:57:33 +02:00
Sarah Hoffmann
4559886d83 Merge pull request #3155 from lonvia/caching-of-transliterators
Cache ICU transliterators between calls
2023-08-16 22:55:54 +02:00
Sarah Hoffmann
bfc706a596 cache ICU transliterators and reuse them 2023-08-15 23:08:44 +02:00
Sarah Hoffmann
9805a461eb Merge pull request #3150 from alfmarcua/allow_neg_id_details
Allow negative osm_id in details page by setting default parameter to 0
2023-08-14 11:41:42 +02:00
Sarah Hoffmann
bcf8433ba8 fix regression in default setting for details linkedplaces 2023-08-13 16:24:19 +02:00
Sarah Hoffmann
746dd057b9 prefer name-only searches more 2023-08-13 15:24:16 +02:00
Sarah Hoffmann
b710297d05 return bbox of full country for country searches
Fixes #3149.
2023-08-13 14:37:28 +02:00
Sarah Hoffmann
0a8e8cec0f fix application of label to wrong expression 2023-08-13 11:59:01 +02:00
Sarah Hoffmann
96e5a23727 avoid lambda SQL in connection with alias tables 2023-08-13 11:40:49 +02:00
Sarah Hoffmann
611b925368 use coalsce() instead of indexless postgis functions
ST_Intersects is deemed too expensive by the query planer which
leads to odd plans when index use is avoided.
2023-08-12 19:14:13 +02:00
Sarah Hoffmann
cab2a74740 do not use index when searching in large areas
This concerns viewboxes as well as radius search.
2023-08-12 16:12:44 +02:00
Sarah Hoffmann
fa3ac22a8f adapt tests to changes in search 2023-08-12 16:12:31 +02:00
Sarah Hoffmann
95d1048789 take token_assignment penalty into account
Also computes the expected count differently when addresses are
involved. Address token counts do not bare a direct relation to
real counts.
2023-08-12 15:33:50 +02:00
Sarah Hoffmann
38b2b8a143 fix debug output for NearSearch
The search info is in a subsearch and was therefore not taken into
account.
2023-08-12 11:27:55 +02:00
Sarah Hoffmann
3d0bc85b4d improve penalty for token-split words
The rematch penalty for partial words created by the transliteration
need to take into account that they are rematched against the full word.
That means that missing beginning and end should not get a significant
penalty.
2023-08-12 11:26:02 +02:00
alfmarcua
25a391070b Allow negative osm_id in details page by setting default parameter to 0 2023-08-11 10:46:28 +02:00
Sarah Hoffmann
926c4a7d04 fix lookup polygon output
Fixes #3147.
2023-08-08 21:31:25 +02:00
marc tobias
5683f55646 Search.md - update XML and JSON example output 2023-08-08 17:49:11 +02:00
Sarah Hoffmann
671f4e943e Merge pull request #3146 from lonvia/forbid-mixed-queries
Do not allow to mix structured and unstructured search
2023-08-08 17:34:32 +02:00
Sarah Hoffmann
282c0da941 return an error when q is used together with structured parameters 2023-08-08 15:54:55 +02:00
Sarah Hoffmann
78648f1faf remove lookup by address only
There are too many lookups where the address is very frequent,
even when many address parts are present.
2023-08-06 21:00:10 +02:00
Sarah Hoffmann
8d9b5e4775 allow oversized viewboxes again
This seems to be a rather regular thing when unconditionally sending
the current view and being zoomed out.

Fixes #3137.
2023-08-06 17:50:35 +02:00
Sarah Hoffmann
996026e5ed provide full URL in more field
This is a regression against the PHP version.

Fixes #3138.
2023-08-06 17:50:02 +02:00
Sarah Hoffmann
2c7e1db5f6 remove SQL lambdas with IN expressions
The values of IN expressions are incorrectly cached.
2023-08-02 12:34:07 +02:00
Sarah Hoffmann
2171b38551 only print non-empty search tables 2023-08-02 09:25:47 +02:00
Sarah Hoffmann
afdbdb02a1 do not lookup by address vector when only few tokens are available
Names of countries and states are exceedingly rare in the word count
but are very frequent in the address. A short name has the danger
of producing too many results.
2023-08-02 09:25:47 +02:00
Sarah Hoffmann
8adeaa2c7e fix regression with lat/lon in json output
lat, lon is returned as strings in the PHP output. Reproduce that in the
Python frontend.

See #3115.
2023-08-01 14:27:43 +02:00
Sarah Hoffmann
d15f605129 allow OPTIONS method in starlette CORS middleware
If not allowed, then the middleware will return a 400 on pre-flight
CORS requests.

Fixes #3129.
2023-08-01 11:12:36 +02:00
Sarah Hoffmann
252fe42612 Merge pull request #3122 from miku0/sanitizer-final
Adds sanitizer for Japanese addresses to correspond to block address
2023-08-01 10:38:58 +02:00
miku0
67e1c7dc72 Moved KANJI_MAP to icu-rules 2023-07-31 11:57:49 +00:00
Sarah Hoffmann
c29ffc38e6 Merge pull request #3128 from lonvia/fix-classtype-lookup
Fix query over classtype tables
2023-07-31 10:20:58 +02:00
miku0
4d61cc87cf Add the test of reconbine_place 2023-07-31 02:39:56 +00:00
miku0
2350018106 Fixed cosmetic issues 2023-07-31 02:39:04 +00:00
Sarah Hoffmann
8fc3dd9457 fix query over classtype tables
The case statement prevented the index on the classtype tables
from being used. Move the case statement inside the geometry
function instead.
2023-07-30 23:51:36 +02:00
Sarah Hoffmann
d97ca9fcb2 Merge pull request #3127 from lonvia/file-logging
Add file logging for Python frontend
2023-07-30 21:22:48 +02:00
Sarah Hoffmann
e523da9e12 reintroduce file logging for Python frontend 2023-07-30 19:58:00 +02:00
miku0
67706cec4e add @fail-legacy 2023-07-27 07:33:53 +00:00
miku0
fac8c32cda Moved KANJI_MAP to global variable 2023-07-26 21:43:22 +00:00
Sarah Hoffmann
1c6f426363 Merge pull request #3125 from lonvia/warm-to-python
Port warm and export functions to Python
2023-07-26 22:37:37 +02:00
Sarah Hoffmann
8cba65809c older version of Postgres cannot convert jsonb to int 2023-07-26 17:45:21 +02:00
Sarah Hoffmann
77ed4f98bb older Python versions need a reference to the loop for a lock 2023-07-26 15:17:18 +02:00
miku0
848e5ac5de Correction to PR's comment 2023-07-26 09:50:25 +00:00
Sarah Hoffmann
9448c5e16f add tests for new arm and export Python functions 2023-07-26 11:09:52 +02:00
miku0
0722495434 add japanese sanitizer 2023-07-26 07:54:58 +00:00
Sarah Hoffmann
d545c6d73c mostly remove php-cgi requirement
This is now only needed for BDD tests against the php API.
2023-07-26 00:10:11 +02:00
Sarah Hoffmann
f69fea4210 remove now unused run_api_script function 2023-07-25 22:45:29 +02:00
Sarah Hoffmann
4cd0a4ced4 remove now unused run_legacy_script() 2023-07-25 21:39:23 +02:00
Sarah Hoffmann
0804cc0cff port export function to Python
Some of the parameters have been renoved as they don't make sense
anymore.
2023-07-25 21:39:23 +02:00
Sarah Hoffmann
faeee7528f move warm script to python code 2023-07-25 21:39:23 +02:00
Sarah Hoffmann
261e0cfd5a Merge pull request #3121 from lonvia/port-remaining-api-calls
Port remaining API endpoints to Python
2023-07-25 20:56:38 +02:00
Sarah Hoffmann
66ecb56cea add tests for new endpoints 2023-07-25 10:57:19 +02:00
Sarah Hoffmann
79bd54f610 add /polygons endpoint to Python v1 API 2023-07-22 20:59:13 +02:00
Sarah Hoffmann
30cef4d5fd add deletable endpoint 2023-07-22 17:21:24 +02:00
Sarah Hoffmann
8d52032263 remove debug print 2023-07-22 13:54:23 +02:00
Sarah Hoffmann
4a5786334b Merge pull request #3117 from lonvia/fix-assorted-search-errors
More improvements to the Python search algorithm
2023-07-22 11:45:36 +02:00
Sarah Hoffmann
587698a6f3 disallow special housenumber search with a single frequent partial 2023-07-20 18:05:54 +02:00
Sarah Hoffmann
927d2cc824 do not split names from typed phrases
When phrases are typed, they should only contain exactly one term.
2023-07-17 20:09:08 +02:00
Sarah Hoffmann
7f9cb4e68d split up get_assignment functon in more readable parts 2023-07-17 16:27:25 +02:00
Sarah Hoffmann
d48ea4f22c disallow address searches that start with a postcode
These are postcode searches and nothing else.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
412bd2ec20 block search queries with too many tokens 2023-07-17 16:27:25 +02:00
Sarah Hoffmann
1c189060c2 simplify yield_lookups() function
Move creation of field lookups in separate functions to make the code
more readable.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
4a00a3c0f5 penalize name token splitting when phrases are used 2023-07-17 16:27:25 +02:00
Sarah Hoffmann
8366e4ca83 penalize search with frequent partials
Avoid search against frequent partials if we have already looked for
the full name equivalents.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
283db76e45 avoid splitting of first token when a housenumber is present
This only covers the case of <poi name> <street name> <housenumber>
which is exceedingly rare.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
8a36ed4f6f increase threshold for full name searches
They still should be preferrred over expensive partial name searches.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
d0f45155c8 fix search for housenumber names
The search still included a lookup of housenumbers in children which is
wrong.
2023-07-17 16:27:25 +02:00
Sarah Hoffmann
9fc235d670 Merge pull request #3112 from jenkin/fix-polgyon-polygon-typo
fix polgyon polygon typo
2023-07-17 14:20:14 +02:00
Alessio Cimarelli
42c549274f fix polgyon polygon typo 2023-07-17 10:11:57 +02:00
Sarah Hoffmann
2e56182a7f Merge pull request #3110 from lonvia/sql-lambda-queries
Use SQLAlchemy's lambda statements
2023-07-14 18:57:31 +02:00
Sarah Hoffmann
7932b1849b selected lambdas for search 2023-07-14 15:43:29 +02:00
Sarah Hoffmann
886374d779 add lambdas for layer filters 2023-07-14 15:27:20 +02:00
Sarah Hoffmann
d42e2e391f avoid forwarding variables via SQL 2023-07-14 15:27:16 +02:00
Sarah Hoffmann
f264eaeda2 make SQL statements in reverse lambda functions
Further improves internal caching of statements by SQLAlchemy.
2023-07-14 14:39:14 +02:00
Sarah Hoffmann
35fd74af6d Merge pull request #3096 from alfmarcua/search-within-countries-parameter
Parameterise the search only within countries
2023-07-12 17:45:55 +02:00
alfmarcua
4b53cf1464 Split lookupInCountry in two functions and document NOMINATIM_SEARCH_WITHIN_COUNTRIES parameter 2023-07-12 13:53:23 +02:00
alfmarcua
26e78efbb9 Parameterise the search only within countries 2023-07-12 13:53:23 +02:00
Sarah Hoffmann
157f0b8a83 Merge pull request #3109 from lonvia/prepared-statements
Make prepared statements work with Python API
2023-07-10 11:45:29 +02:00
Sarah Hoffmann
d743cf308e avoid index-use on rank parameters for reverse lookups 2023-07-09 22:49:31 +02:00
Sarah Hoffmann
f4cdcb995c force a fixed pool size and make it configurable 2023-07-09 00:31:53 +02:00
Sarah Hoffmann
75139961a3 use constant expressions to select partial indexes in reverse
When expressions are generated with SQLAlchemy, any constants are
replaced with bind parameters. The bind parameters become parameters of
prepared statements. The result is that the query planner tends to
oversee that the partial indexes can be used.
2023-07-09 00:31:53 +02:00
Sarah Hoffmann
3e2dd59a94 remove disabling of prepared statements
Fixes for query planning with prepared statements to follow in
subsequent commits.
2023-07-08 15:59:15 +02:00
Sarah Hoffmann
c01386b5b4 Merge pull request #3107 from lonvia/performance-search
Assorted fixes to Python search code
2023-07-08 15:53:42 +02:00
Marc Tobias
f59a072aa6 These days the OSM wikipedia tab no longer contains URLs 2023-07-07 17:36:11 +02:00
Sarah Hoffmann
9cb8447673 increase minimum required SQLAlchemy version to 1.4.31 2023-07-06 14:15:51 +02:00
Sarah Hoffmann
e67355ab0e make get_addressdata calls cachable
VALUEs() is not a cachable construct in SQLAlchemy, so use arrays
instead. Also add a special case for single results, the usual result
for reverse queries.
2023-07-06 12:16:57 +02:00
Sarah Hoffmann
9cb9b670d1 add support for postcompile literals in debug output 2023-07-06 12:16:57 +02:00
Sarah Hoffmann
3e725bb2db fix SRID handling in Geometry type 2023-07-06 12:16:57 +02:00
Sarah Hoffmann
cc45930ef9 avoid lookup via partials on frequent words
Drops expensive searches via partials on terms like 'rue de'.

See #2979.
2023-07-06 12:16:57 +02:00
Sarah Hoffmann
3266daa8fd add a small penalty to lookups in address vectors 2023-07-04 16:54:42 +02:00
Sarah Hoffmann
ce17b0eeca Merge pull request #3101 from lonvia/custom-geometry-type
Improve use of SQLAlchemy statement cache with search queries
2023-07-03 11:03:26 +02:00
Sarah Hoffmann
17a65d82bb make types compatible with older Python versions 2023-07-02 23:06:42 +02:00
Sarah Hoffmann
cc7646665c remove GeoAlchemy as dependency 2023-07-02 23:06:42 +02:00
Sarah Hoffmann
82216ebf8b always run function update on migrations
This means that we can have migrations which require nothing but
an update of the functions.
2023-07-01 20:18:59 +02:00
Sarah Hoffmann
49e0d83d5d fix linting issues 2023-07-01 20:18:59 +02:00
Sarah Hoffmann
673c3c7a55 replace regexp_match with generic op() functions
Works around a bug in SQLAlchemy where regexp_match creates an
unstable cache key.
2023-07-01 18:15:22 +02:00
Sarah Hoffmann
5135041405 replace CASE construct with plpgsql function 2023-07-01 18:15:22 +02:00
Sarah Hoffmann
42631b85c7 band-aid for SQLAlchemy 1.4 2023-07-01 18:03:07 +02:00
Sarah Hoffmann
9f6f12cfeb move search to bind parameters 2023-07-01 18:03:07 +02:00
Sarah Hoffmann
6c4c9ec1f2 switch reverse() to new Geometry datatype
Also switches to using bind parameters for recurring parameters.
2023-07-01 18:03:07 +02:00
Sarah Hoffmann
4bb4db0668 introduce slim Geometry database type 2023-07-01 18:03:07 +02:00
Sarah Hoffmann
505fdd02ca Merge pull request #3100 from lonvia/fix-name-merging-in-unnamed-boundaries
Fix merging of linked names into unnamed boundaries
2023-07-01 15:36:56 +02:00
Sarah Hoffmann
a873f260cf fix merging of linked names into unnamed boundaries
The NULL value of the boundaries' name field was erasing all
content when used in SQL operations.
2023-06-30 22:14:11 +02:00
Sarah Hoffmann
b45f761227 Merge pull request #3099 from lonvia/determine-place-address-from-tokenizer
Use information from tokenizer to determine street vs. place address
2023-06-30 21:47:57 +02:00
Sarah Hoffmann
d7a3039c2a also switch legacy tokenizer to new street/place choice behaviour 2023-06-30 17:03:17 +02:00
Sarah Hoffmann
6c5589c9d2 fix optional string representation or repr(PlaceName) 2023-06-30 11:10:44 +02:00
Sarah Hoffmann
645ea5a057 use information from tokenizer to determine street vs. place address
So far the SQL logic used the information from the address field
to determine if an address is attached to a street or place.
This changes the logic to use the information provided in the
token_info. This allows sanitizers to enforce a certain parenting
without changing the visible address information.
2023-06-30 11:08:25 +02:00
Sarah Hoffmann
2755ebe883 Merge pull request #3094 from lonvia/fix-failing-bdd-tests
Add BDD tests against Python frontend to CI
2023-06-22 22:28:31 +02:00
Sarah Hoffmann
4b829b5ff9 Merge pull request #3090 from mtmail/check-database-on-frozen-database
check-database on frozen db shouldnt recommend indexing
2023-06-22 20:11:30 +02:00
Sarah Hoffmann
ed19340af0 add python frontend tests to CI 2023-06-22 17:29:44 +02:00
Sarah Hoffmann
2d05ff0190 slightly adapt postcode tests 2023-06-22 16:51:59 +02:00
Sarah Hoffmann
0d338fa4c0 bdd: fix faking HTTP headers for python web frameworks 2023-06-22 14:00:33 +02:00
mtmail
15a66e7b7d Merge branch 'osm-search:master' into check-database-on-frozen-database 2023-06-22 12:14:55 +02:00
Sarah Hoffmann
3a21999a17 move text normalization into extra function 2023-06-22 10:48:05 +02:00
Sarah Hoffmann
08dcd05d7b Merge pull request #3093 from lonvia/remove-sanic
Remove support for Sanic
2023-06-22 09:55:32 +02:00
Marc Tobias
2337cc653b check-database on frozen db shouldnt recommend indexing 2023-06-21 17:47:57 +02:00
Sarah Hoffmann
0deb9262c9 use in operator for enum tests
Fixes a complaint by mypy.
2023-06-21 15:30:40 +02:00
Sarah Hoffmann
9bc5be837b remove useless check
Found by new mypy version.
2023-06-21 11:56:39 +02:00
Sarah Hoffmann
b79d5494f9 remove support for sanic framework
There is no performance gain over falcon or starlette but the special
structure of sanic makes it hard to have exchangable code
2023-06-21 10:53:57 +02:00
Sarah Hoffmann
ded2c5bf68 update project information for taginfo.json 2023-06-21 10:06:25 +02:00
Sarah Hoffmann
bd2c64876f Merge pull request #3045 from biswajit-k/taginfo
Add script to generate taginfo project description
2023-06-21 10:01:40 +02:00
Sarah Hoffmann
7c66fef63f Merge pull request #3091 from lonvia/fix-postcode-search
Assorted smaller fixes for Python-based search code
2023-06-20 16:13:20 +02:00
Sarah Hoffmann
4ad8818809 avoid fallback country lookup when places are excluded 2023-06-20 12:22:08 +02:00
Sarah Hoffmann
2f4342810d allow empty parts in excluded_place_id
This worked before.
2023-06-20 11:56:43 +02:00
Sarah Hoffmann
36df56b093 fix header name for browser languages 2023-06-20 11:56:43 +02:00
Sarah Hoffmann
d0a1e8e311 tweak postcode search
Give a preference to left-right reading, i.e <postcode>,<address>
prefers a postcode search while <address>,<postcode> rather does
an address search.

Also exclude non-addressables, countries and state from results when a
postcode is contained in the query.
2023-06-20 11:56:43 +02:00
Sarah Hoffmann
1b50381852 use string representation when dumping variables 2023-06-20 10:15:51 +02:00
Sarah Hoffmann
3443d2c129 fix debug output for searches
Fields have been renamed.
2023-06-20 10:12:19 +02:00
Sarah Hoffmann
1f83efa8f2 Merge pull request #3086 from lonvia/close-connection-on-replication
Close database connections while waiting for the next update cycle
2023-06-19 15:48:00 +02:00
Sarah Hoffmann
a7bd39b62a Merge pull request #3087 from lonvia/conditional-spgist
Disable SPGist for PostgreSQL < 11
2023-06-19 15:47:37 +02:00
Sarah Hoffmann
1177b30a60 Merge pull request #3085 from lonvia/allow-brackets-in-password
Do not fail php script generation when curly braces are present
2023-06-19 14:44:22 +02:00
Sarah Hoffmann
10e56e0de7 no longer recommend continuous update mode 2023-06-19 12:07:09 +02:00
Sarah Hoffmann
6f3339cc49 close DB connection when waiting for next update cycle 2023-06-19 12:02:51 +02:00
Sarah Hoffmann
771be0e056 do not fail php script generation when curly braces are present
Fixes #3084.
2023-06-19 11:23:30 +02:00
Sarah Hoffmann
71ad4fc406 Add custom config to information required for bug reports 2023-06-18 09:27:55 +02:00
Sarah Hoffmann
6a5695d059 disable SPGist for PostgreSQL < 11
Before that version, an operator needed to be given.
2023-06-13 15:15:43 +02:00
Sarah Hoffmann
aaf0e7db06 add instructions for switching to user for install scripts
Fixes #3057.
2023-06-13 14:12:56 +02:00
Sarah Hoffmann
7aa0aba382 remove Ubuntu 18 installation from TOC
Fixes #3072.
2023-06-13 14:05:54 +02:00
Sarah Hoffmann
9af190a43c Merge pull request #3073 from mtmail/vagrantfile-remove-ubuntu-18
Vagrantfile - remove Ubuntu 18
2023-06-13 12:31:10 +02:00
marc tobias
2e46bc0aea Vagrantfile - remove Ubuntu 18 2023-05-30 00:22:18 +02:00
Sarah Hoffmann
a413aae8a3 Merge pull request #3067 from lonvia/python-search-api
Python implementation of Search API
2023-05-27 08:56:06 +02:00
Sarah Hoffmann
317cc5c544 make code backwards-compatible with older Python versions 2023-05-26 23:33:35 +02:00
Sarah Hoffmann
41bf162306 remove tests for old PHP cli commands 2023-05-26 17:36:05 +02:00
Sarah Hoffmann
43c27dffd2 fix new linting warning 2023-05-26 16:31:43 +02:00
Sarah Hoffmann
8f299838f7 fix various failing BDD tests 2023-05-26 15:08:48 +02:00
Sarah Hoffmann
146a0b29c0 add support for search by houenumber 2023-05-26 14:10:57 +02:00
Sarah Hoffmann
964bc7fbe0 ignore name rankings without variants 2023-05-26 13:53:34 +02:00
Sarah Hoffmann
75aa3cc9bd properly close connections when shutting down starlette 2023-05-26 13:53:34 +02:00
Sarah Hoffmann
0843fefad3 fix geocodejson address assignment
The categories should be assigned the address part with the
highest address rank not lowest.
2023-05-26 11:43:11 +02:00
Sarah Hoffmann
371a780ef4 add server fronting for search endpoint
This also implements some of the quirks of free-text search of the
V1 API, in particular, search for categories and coordinates.
2023-05-26 11:40:45 +02:00
Sarah Hoffmann
c7db69a30c add timestamps to HTML debug output 2023-05-26 09:05:44 +02:00
Sarah Hoffmann
b48cda7173 mingle names from linked places into results 2023-05-24 23:17:15 +02:00
Sarah Hoffmann
0608cf1476 switch CLI search command to python implementation 2023-05-24 22:54:54 +02:00
Sarah Hoffmann
f335e78d1e make localisation of results explicit
Localisation was previously done as part of the formatting but might
also be useful on its own when working with the results directly.
2023-05-24 18:12:34 +02:00
Sarah Hoffmann
dcfb228c9a add API functions for search functions
Search is now split into three functions: for free-text search,
for structured search and for search by category. Note that the
free-text search does not have as many hidden features like
coordinate search. Use the search parameters for that.
2023-05-24 18:05:43 +02:00
Sarah Hoffmann
dc99bbb0af implement actual database searches 2023-05-24 13:52:31 +02:00
Sarah Hoffmann
c42273a4db implement search builder 2023-05-23 11:23:44 +02:00
Sarah Hoffmann
3bf489cd7c implement token assignment 2023-05-22 15:49:03 +02:00
Sarah Hoffmann
d8240f9ee4 add query analyser for legacy tokenizer 2023-05-22 11:07:14 +02:00
Sarah Hoffmann
2448cf2a14 add factory for query analyzer 2023-05-22 09:23:19 +02:00
Sarah Hoffmann
004883bdb1 query analyzer for ICU tokenizer 2023-05-22 08:46:19 +02:00
biswajit-k
11a1191ba0 add ci-test for taginfo file generation 2023-05-21 23:49:04 +05:30
Sarah Hoffmann
ff66595f7a add data structure for tokenized query 2023-05-21 09:30:57 +02:00
biswajit-k
9de2a342e8 docs: add installation instruction for dkjson package 2023-05-19 23:22:42 +05:30
biswajit-k
562f8bc84a Add script to generate taginfo project description 2023-05-19 18:02:06 +05:30
Sarah Hoffmann
d69411f414 Merge pull request #3064 from lonvia/clicmd-debug-output
Enable debug output on command line
2023-05-19 08:55:26 +02:00
Sarah Hoffmann
39ccb15880 enable debug output on command line 2023-05-18 22:37:46 +02:00
Sarah Hoffmann
d2c56f9f96 Merge pull request #3063 from lonvia/variable-parameters
Rework how search parameters are handed to the Python API
2023-05-18 22:27:18 +02:00
Sarah Hoffmann
7f1a0ce94a fix use of subquery in reverse 2023-05-18 20:27:07 +02:00
Sarah Hoffmann
32dbf83747 move zoom_to_rank computation to extra file 2023-05-18 18:29:41 +02:00
Sarah Hoffmann
d9d8b9c526 add tests for parameter converter 2023-05-18 18:09:07 +02:00
Sarah Hoffmann
9036bf3398 Merge pull request #3062 from lonvia/enable-psycopg
Add support for psycopg 3 in the frontend
2023-05-18 18:07:11 +02:00
Sarah Hoffmann
bef5cea48e switch API parameters to keyword arguments
This switches the input parameters for API calls to a generic
keyword argument catch-all which is then loaded into a dataclass
where the parameters are checked and forwarded to internal
function.

The dataclass gives more flexibility with the parameters and makes
it easier to reuse common parameters for the different API calls.
2023-05-18 17:42:23 +02:00
Sarah Hoffmann
84abf7c95a actions: run tests against psycopg 2023-05-18 16:08:32 +02:00
Sarah Hoffmann
1f0e1bec0e enable API use with psycopg 3 2023-05-18 16:08:32 +02:00
Sarah Hoffmann
8f88613a6b Merge pull request #3050 from mtmail/tiger-check-if-database-frozen
when adding Tiger data, check first if database is in frozen state
2023-05-08 16:35:31 +02:00
Marc Tobias
e5f332bd71 when adding Tiger data, check first if database is in frozen state 2023-05-08 14:35:30 +02:00
Sarah Hoffmann
07589cfc34 Merge pull request #3054 from lonvia/add-amenity-to-documentation
Docs: reintroduce amenity parameter for structured search
2023-05-08 11:12:17 +02:00
Sarah Hoffmann
68e0306e62 docs: reintroduce amenity parameter for structured search 2023-05-08 10:24:12 +02:00
Sarah Hoffmann
5751686fdc Merge pull request #3006 from biswajit-k/generalize-filter
generalize filter function for sanitizers
2023-04-11 19:20:08 +02:00
Sarah Hoffmann
2af20f8df8 Merge pull request #3030 from lonvia/interpolation-corner-cases
Fix a number of corner cases with interpolation splitting
2023-04-07 13:59:52 +02:00
Sarah Hoffmann
60c1301fca fix a number of corner cases with interpolation splitting
Snapping a line to a point before splitting was meant to ensure
that the split point is really on the line. However, ST_Snap() does
not always behave well for this case. It may shorten the interpolation
line in some cases with the result that two points housenumbers
suddenly fall on the same point. It might also shorten the line down
to a single point which then makes ST_Split() crash.

Switch to a combination of ST_LineLocatePoint and ST_LineSubString
instead, which guarantees to keep the original geometry. Explicitly
handle the corner cases, where the split point falls on the beginning
or end of the line.
2023-04-06 16:54:00 +02:00
Sarah Hoffmann
b8a7319212 use place_to_be_deleted when force deleting objects 2023-04-04 11:09:17 +02:00
Sarah Hoffmann
6ef4d04b46 Merge pull request #3023 from lonvia/lookup-api
Python implementation of Lookup API
2023-04-03 16:20:47 +02:00
Sarah Hoffmann
1dce2b98b4 switch CLI lookup command to Python implementation 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
86c4897c9b add lookup call to server glue 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
2237603677 add tests for new lookup API 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
6e81596609 rename lookup() API to details and add lookup call
The initial plan to serve /details and /lookup endpoints from
the same API call turned out to be impractical, so the API now
also has deparate functions for both.
2023-04-03 14:40:41 +02:00
Sarah Hoffmann
4607c7ed04 python lookup: add function for simple lookups 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
63638eb447 python lookup: factor out finding in tables into own function 2023-04-03 14:40:41 +02:00
Sarah Hoffmann
c92ac84679 Merge pull request #3021 from lonvia/readd-postalcode-relations
Flex style: reinstate postalcode relations
2023-04-03 12:11:49 +02:00
Sarah Hoffmann
ed9cd9f0e5 bdd: disable detail tests searching by place ID
Place IDs are not stable and cannot be used in tests.
2023-04-03 10:07:06 +02:00
Sarah Hoffmann
7d30dbebc5 flex style: reinstate postcode boundaries
Postcode boundaries don't have a name, so need to be imported
unconditionally.
2023-04-03 09:17:50 +02:00
biswajit-k
8f03c80ce8 generalize filter for sanitizers 2023-04-01 19:24:09 +05:30
Sarah Hoffmann
ee0366af88 Merge pull request #3019 from lonvia/add-data-postprocessing
Add postprocessing to add-data function
2023-03-31 22:19:46 +02:00
Sarah Hoffmann
683a3cb3ec call osm2pgsql postprocessing flush_deleted_places() when adding data 2023-03-31 18:05:07 +02:00
Sarah Hoffmann
f8bca4fbcb Merge pull request #3020 from lonvia/reverse-api
Python implementation of reverse
2023-03-31 18:01:50 +02:00
Sarah Hoffmann
1e2a1d9ce5 limit results for country lookup 2023-03-30 10:00:19 +02:00
Sarah Hoffmann
1feac2069b add BDD tests for new layers parameter 2023-03-30 09:54:55 +02:00
Sarah Hoffmann
26ee6b6dde python reverse: add support for point geometries in interpolations 2023-03-29 17:21:33 +02:00
Sarah Hoffmann
c150ca4889 add wsgi entry point for falcon server 2023-03-28 15:05:52 +02:00
Sarah Hoffmann
e717e349d0 add wsgi entry point for starlette
uvicorn needs a parameter-free function to start.
2023-03-28 15:03:00 +02:00
Sarah Hoffmann
e158017086 ignore broken data in interpolation table 2023-03-28 14:57:39 +02:00
Sarah Hoffmann
36d068871d disable prepared statements
Prepared statements do not work well with the partial indexes that
Nominatim uses because all Python constants are replaced with
parameters. A query like:

  placex.select().where(placex.c.rank_address.between(4, 25)

gets translated into a prepared query with two parameters:

  SELECT * FROM placex WHERE rank_address BETWEEN %s and %s

And this does not work with a partial index of:

  CREATE INDEX on placex(geometry) WHERE rank_address between 4 and 25
2023-03-28 14:53:45 +02:00
Sarah Hoffmann
6c67a4b500 switch reverse CLI command to Python implementation 2023-03-26 18:09:33 +02:00
Sarah Hoffmann
86b43dc605 make sure PHP and Python reverse code does the same
The only allowable difference is precision of coordinates. Python uses
a precision of 7 digits where possible, which corresponds to the
precision of OSM data.

Also fixes some smaller bugs found by the BDD tests.
2023-03-26 16:21:43 +02:00
Sarah Hoffmann
300921a93e add server glue for reverse API call 2023-03-25 17:04:02 +01:00
Sarah Hoffmann
35b52c4656 add output formatters for ReverseResults
These formatters are written in a way that they can be reused for
search results later.
2023-03-25 15:45:03 +01:00
Sarah Hoffmann
878302a622 ignore NotImplementedErrors when compiling SQL 2023-03-24 11:16:02 +01:00
Sarah Hoffmann
55277738d4 factor out layer checks in reverse function 2023-03-24 10:08:01 +01:00
Sarah Hoffmann
2f54732500 python: implement reverse lookup function
The implementation follows for most part the PHP code but introduces an
additional layer parameter with which the kind of places to be returned
can be restricted. This replaces the hard-coded exclusion lists.
2023-03-23 22:38:37 +01:00
Sarah Hoffmann
41da298b18 add python implementation of reverse
This adds an additional layer parameter and slightly changes the
queries to do more efficient lookups for large area features.
2023-03-23 10:16:50 +01:00
Sarah Hoffmann
ebcf8c2b6b api: make details parameter optional 2023-03-23 10:16:50 +01:00
Sarah Hoffmann
1facfd019b api: generalize error handling
Return a consistent error response which takes into account the chosen
content type. Also adds tests for V1 server glue.
2023-03-23 10:16:50 +01:00
Sarah Hoffmann
00e3a752c9 split SearchResult type
Use adapted types for the different result types. This makes it
easier to have adapted output formatting and means there are only
result fields that are filled.
2023-03-23 10:16:50 +01:00
Sarah Hoffmann
d03fd3f883 Merge pull request #3013 from mtmail/changelog-4-2-x
update ChangeLog to verison 4.2.2
2023-03-23 09:20:25 +01:00
marc tobias
fa3d13ac7e update ChangeLog to verison 4.2.2 2023-03-22 23:45:59 +01:00
Sarah Hoffmann
434bd5a5bb Merge pull request #3011 from lonvia/fix-flex-scripts
Fix extra tag handling in some flex scripts
2023-03-21 09:58:53 +01:00
Sarah Hoffmann
9aca389bda Merge pull request #3010 from lonvia/adapt-zoom-for-reverse
Minor adjustments to reverse zoom translation
2023-03-21 08:56:25 +01:00
Sarah Hoffmann
69ce42b22c remove more tags from full style
The full style should only save the necessary tags needed for
processing.
2023-03-20 21:42:24 +01:00
Sarah Hoffmann
114cc776be fix handling of unused extra tags
The tags can only be moved to extra tags after the main tags have been
handled.
2023-03-20 21:20:27 +01:00
Sarah Hoffmann
5e5cff897f minor adjustment to reverse zoom translation
Add a 'village' zoom level at 13 between town and neighbourhood
and a all locality-like objects for zoom 15. These zoom levels had
the same behaviour as the lower level so far. However, the distinction
for village and locality may be useful at times.
2023-03-20 20:47:42 +01:00
Sarah Hoffmann
a8bedb6ab9 Merge pull request #3003 from lonvia/rework-bdd-api-tests
Reorganize code around BDD API tests and extend reverse API tests
2023-03-10 10:01:24 +01:00
Sarah Hoffmann
81430bd3bd bdd: be more fuzzy with coordinate comparisons 2023-03-09 22:37:45 +01:00
Sarah Hoffmann
93203f355a avoid recent Python dialect 2023-03-09 20:57:43 +01:00
Sarah Hoffmann
b730d286ad fix polygon simplification in reverse results
polygon_threshold has never really worked for reverse.
2023-03-09 20:24:07 +01:00
Sarah Hoffmann
3f2296e3ea bdd: extend reverse API tests for format checks
Reorganise the API reverse tests and extend the checks for the
output format, testing for all expected fields.
2023-03-09 20:20:50 +01:00
Sarah Hoffmann
2b7eb4906a bdd: add tests for valid debug output 2023-03-09 20:10:51 +01:00
Sarah Hoffmann
db1aa4d02e bdd: replace old formatting strings 2023-03-09 19:49:55 +01:00
Sarah Hoffmann
ad88d7a3e0 bdd: more format checks for reverse XML 2023-03-09 19:40:24 +01:00
Sarah Hoffmann
e42c1c9c7a bdd: new step variant 'result contains in field'
This replaces the + notation for recursing into result dictionaries.
2023-03-09 19:31:21 +01:00
Sarah Hoffmann
556bb2386d bdd: factor out computation of result to-check lists 2023-03-09 18:01:45 +01:00
Sarah Hoffmann
1e58cef174 bdd: replace property_list construct with standard check functions 2023-03-09 17:56:28 +01:00
Sarah Hoffmann
01010e443f bdd: remove special case for osm_type field
The fuzzy field check hide cover formatting errors. Use 'osm' when
only caring about the conent.
2023-03-09 17:44:34 +01:00
Sarah Hoffmann
da0a7a765e bdd: reorganise field comparisons
Move comparision on Field values from assert_field() into a
comparator class. Replace BadRowValueAssert with a simpler
check_row() function.
2023-03-09 17:05:05 +01:00
Sarah Hoffmann
9769a0dcdb bdd: use new check_for_attributes() function also in steps 2023-03-09 16:44:07 +01:00
Sarah Hoffmann
fbff4fa218 bdd: fully check correctness of geojson and geocodejson
Parse code now checks presence of all required fields and exports
all fields for inspection.
2023-03-09 16:36:46 +01:00
Sarah Hoffmann
d17ec56e54 bdd: remove OrderedDict
dicts are guaranteed to keep insertion order by since Python 3.7, making
use of ORderedDict mute.
2023-03-09 16:08:39 +01:00
Sarah Hoffmann
9a5f75dba7 Merge pull request #2993 from biswajit-k/delete-tags
Adds sanitizer for preventing certain tags to enter search index based on parameters
2023-03-09 14:31:45 +01:00
biswajit-k
ca149fb796 Adds sanitizer for preventing certain tags to enter search index based on parameters
fix: pylint error

added docs for delete tags sanitizer

fixed typos in docs and code comments

fix: python typechecking error

fixed rank address type

Revert "fixed typos in docs and code comments"

This reverts commit 6839eea755a87f557895f30524fb5c03dd983d60.

added default parameters and refactored code

added test for all parameters
2023-03-09 14:18:39 +05:30
Sarah Hoffmann
08f19e074b Merge pull request #2999 from biswajit-k/fix-typos
fixed typos in docs and code comments
2023-03-08 08:55:27 +01:00
biswajit-k
36388cafe9 fixed typos in docs and code comments 2023-03-06 17:09:38 +05:30
Sarah Hoffmann
8191c747b9 add latest security incident
Also removes 3.6 which is no longer supported.
2023-02-22 11:24:04 +01:00
Sarah Hoffmann
d078763fa1 harmonize flags for PHP's htmlspecialchars 2023-02-20 16:46:53 +01:00
Sarah Hoffmann
412ead5f2d adapt PHP tests for debug output 2023-02-20 16:23:28 +01:00
Sarah Hoffmann
513175ce23 properly encode special HTML characters in debug mode 2023-02-20 15:43:03 +01:00
Sarah Hoffmann
8db6dd995a Merge pull request #2986 from mtmail/add-more-languages-from-osm-wiki
sp_wiki_loader: add more default languages
2023-02-20 11:12:08 +01:00
marc tobias
4be6970bd4 sp_wiki_loader: add more default languages 2023-02-18 13:54:48 +01:00
Sarah Hoffmann
fa681ce246 add timestamp to debug SQL output 2023-02-18 09:49:30 +01:00
Sarah Hoffmann
dd5cd97713 add warning for reverse index migration 2023-02-18 09:43:37 +01:00
Sarah Hoffmann
89d47d26f0 Merge pull request #2985 from lonvia/fix-place-rank-inheritance
Restrict place rank inheritance to address items
2023-02-18 09:35:19 +01:00
Sarah Hoffmann
d574ceb598 restrict place rank inheritance to address items
Place tags must have no influence on street- or POI-level
objects.
2023-02-17 16:25:26 +01:00
Sarah Hoffmann
92e2f5ca8e Merge pull request #2983 from lonvia/improve-reverse-place-node-lookup
Improve reverse place node lookup
2023-02-17 15:51:55 +01:00
Sarah Hoffmann
f2bc792178 use reverse buffered index for search within country 2023-02-17 14:10:01 +01:00
Sarah Hoffmann
8ed096f938 speed up reverse lookup of place nodes
Add a special index that contains the place nodes buffered by their
respective area according to their search rank. This replaces the
maximum area search for place nodes and reduces drastically the number
of place nodes that need to be retrieved.
2023-02-17 14:10:01 +01:00
Sarah Hoffmann
3405dbf90e Merge pull request #2981 from lonvia/add-point-wkb-decoder
Python frontend: add a WKB decoder for the Point class
2023-02-17 08:40:14 +01:00
Sarah Hoffmann
ee0c5e24bb add a WKB decoder for the Point class
This allows to return point geometries from the database and makes
the SQL a bit simpler.
2023-02-16 17:29:56 +01:00
Sarah Hoffmann
b320f1c7e3 Merge pull request #2978 from lonvia/add-debug-view
Add debug view to Python API
2023-02-14 14:08:42 +01:00
Sarah Hoffmann
6c6b1c0606 add typing information for pygments to requirements 2023-02-14 11:58:08 +01:00
Sarah Hoffmann
a5f5add630 actions: use token to avoid rate limiting 2023-02-14 11:57:37 +01:00
Sarah Hoffmann
8557105c40 add debug output for unit tests
This uses the debug output facility meant for pretty HTML output
to give us debugging output for the unit tests.
2023-02-14 11:57:37 +01:00
Sarah Hoffmann
24e7ffb289 add HTML-formatted debug output to lookup 2023-02-14 11:57:37 +01:00
Sarah Hoffmann
0b9bcfe01d Merge pull request #2974 from bloom256/main-tag-in-reverse-lookup
Main tag information added to geocodejson in reverse geocoding
2023-02-11 11:01:11 +01:00
Sarah Hoffmann
1a0e8f810b Merge pull request #2975 from bloom256/typo-fix
typo fixed
2023-02-11 11:00:15 +01:00
danil
7698f0672d typo fixed 2023-02-08 00:21:58 +04:00
danil
da064ea702 Main tag information added to geocodejson in reverse geocoding 2023-02-08 00:19:49 +04:00
Sarah Hoffmann
0c65289a80 Merge pull request #2970 from lonvia/add-details-endpoint
Python implementation of details endpoint
2023-02-06 16:32:35 +01:00
Sarah Hoffmann
95c90a785f actions: force PHPUnit 9
PHPUnit 10 is incompatible with our tests. Not worth adapting anymore.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
42c3754dcd add tests for details result formatting and trim results
Values that are None are no longer included in the output to save
a bit of bandwidth.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
b742200442 expand details BDD tests
There are now minor differences in the output between PHP and
Python versions, so introduce specific tests.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
3ac70f7cc2 implement details endpoint in Python servers 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
104722a56a switch details cli command to new Python implementation 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
1924beeb20 add lookup of postcdoe data 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
70f6f9a711 add lookup of tiger data 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
f1ceefe9a6 add lookup of address interpolations 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
e1fc1566f3 fix new linting issues from pylint 2.16 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
189f74a40d add unit tests for lookup function 2023-02-04 21:22:22 +01:00
Sarah Hoffmann
370c9b38c0 improve scaffolding for API unit tests
Use the static table definition to create the test database.
Add helper function to simplify filling the tables.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
df65c10360 add lookup() call to the library API
Currently only looks places up in placex.
2023-02-04 21:22:22 +01:00
Sarah Hoffmann
4573389da7 docs: fix internal links
Fixes #2968.
2023-02-04 21:21:23 +01:00
Sarah Hoffmann
5c55c1d8a1 Merge pull request #2963 from lonvia/add-sqlalchemy-schema
Add table definitions for SQLAlchemy
2023-01-30 11:17:22 +01:00
Sarah Hoffmann
a1d4e53eb8 add pytest-asyncio as a requirement for testing 2023-01-30 09:36:17 +01:00
Sarah Hoffmann
16b6484c65 add property cache for API
This caches results from querying nominatim_properties.
2023-01-30 09:36:17 +01:00
Sarah Hoffmann
2156fd4909 adapt annotations for SQLAlchemy 2.x
It is not possible to produce type annotations that work with both
versions 1.4 and 2.0. So keep to the principle of only supporting
newest versions when it comes to mypy. This means that some types
may have to be string quoted to not cause issues when running with
SQLAlchemy 1.4.
2023-01-28 11:51:26 +01:00
Sarah Hoffmann
7f5fbe1dc7 add new geoalchemy requirement 2023-01-28 11:51:26 +01:00
Sarah Hoffmann
2e9090d121 adapt status to use SQLAlchemy tables 2023-01-28 11:51:26 +01:00
Sarah Hoffmann
23f2690c54 convert StatusResult to a dataclass
This gives us nice str() functions.
2023-01-28 11:51:26 +01:00
Sarah Hoffmann
5226cd2a0b add SQLAlchemy table schema for Nominatim tables
Also defines an extended connection object that includes access to
the table definitions. Makes it easier to access the tables from
code that has been split off into separate modules.
2023-01-28 11:51:26 +01:00
Sarah Hoffmann
c7e8a82d68 Merge pull request #2958 from lonvia/streaming-json-writer
Introduce streaming json writer for JSON output
2023-01-25 17:36:23 +01:00
Sarah Hoffmann
77bec1261e add streaming json writer for JSON output 2023-01-25 15:05:33 +01:00
Sarah Hoffmann
dfcb24061e Merge pull request #2957 from lonvia/reorganise-api-module
Assorted improvements to the new api library module
2023-01-25 09:29:31 +01:00
Sarah Hoffmann
f85b0c6208 allow to add php-compatible endpoints
If the new setting NOMINATIM_SERVE_LEGACY_URLS is set, the servers
expose the endpoints also with the .php suffix to ensure backwards
compatibility.
2023-01-24 21:39:19 +01:00
Sarah Hoffmann
e490a30a4a add support for CORS headers
Adds the additional dependency to sanic-cors for the Sanic server.
2023-01-24 21:39:19 +01:00
Sarah Hoffmann
654b652530 factor out common server implementation code
Most of the server implementation of V1 API now resides in
api.v1.server_glue. The webframeworks only supply some glue code
which is independent to changes in the API code.
2023-01-24 21:39:19 +01:00
Sarah Hoffmann
8f4426fbc8 reorganize code around result formatting
Code is now organized by api version. So formatting has moved to
the api.v1 module. Instead of holding a separate ResultFormatter
object per result format, simply move the functions to the
formater collector and hand in the requested format as a parameter.
Thus reorganized, the api.v1 module can export three simple functions
for result formatting which in turn makes the code that uses
the formatters much simpler.
2023-01-24 17:20:51 +01:00
Sarah Hoffmann
32c1e59622 reorganize api submodule
Use a directory for the submodule where the __init__ file contains
the public API. This makes it easier to separate public interface
from the internal implementation.
2023-01-24 13:28:04 +01:00
Sarah Hoffmann
e56957f047 api: delay setup of initial database connection
Defer database setup until the first call to a function. Needs an
additional lock because the setup still needs to be done sequentially.
2023-01-24 10:56:22 +01:00
Sarah Hoffmann
3cc357bffa Merge pull request #2955 from lonvia/fix-importance-refresh
Fix importance recalculation
2023-01-23 09:07:43 +01:00
Sarah Hoffmann
388faa2c54 Merge pull request #2954 from lonvia/remove-comma-as-separator
Remove comma as name separator
2023-01-23 09:06:23 +01:00
Sarah Hoffmann
ce9ed993c8 fix importance recalculation
The signature of the compute_importance() function has changed.
2023-01-22 22:32:16 +01:00
Sarah Hoffmann
929a13d4cd remove comma as name separator
Commas are most of the time used as a part of a name, not to
separate multiple names.

See also #2950.
2023-01-22 22:29:36 +01:00
Sarah Hoffmann
56f0d678e3 exclude names ending in :wikipedia from indexing
The wikipedia prefix is used for referencing a wikipedia article
for the given tag, not the object, so not useful to search.
2023-01-21 11:16:08 +01:00
Sarah Hoffmann
02645277c8 fix typo in argument to details CLI command
Fixes #2951.
2023-01-21 10:44:10 +01:00
Sarah Hoffmann
5f4e98e0d9 update Makefile in test directory 2023-01-09 20:49:33 +01:00
Sarah Hoffmann
e9e14834bc Merge pull request #2940 from lonvia/remove-ubuntu-18-from-actions
Actions: use Ubuntu 20 image for testing old versions of dependent software
2023-01-05 20:16:19 +01:00
Sarah Hoffmann
5a57d6308e increase minimum required version of PHP to 7.3
Other versions are not tested anymore.
2023-01-05 16:58:46 +01:00
Sarah Hoffmann
cb73d562d5 actions: move tests from Ubuntu 18 to 20
These are the tests that try to make sure old library version really
still work as expected. Increases PHP to 7.3 because 7.2 has errors.
2023-01-05 16:55:32 +01:00
Sarah Hoffmann
6c61690ef3 do not run osm2pgsql append with mutliple threads
As the updates modify the placex table, there may be deadlocks
when different objects want to forward modifications to the same
place (for example because they are both linked to it).
2023-01-05 11:34:56 +01:00
Sarah Hoffmann
bf1f6a997c Merge pull request #2937 from lonvia/python-server-stub
Scaffolding for new Python-based search frontend
2023-01-03 14:26:33 +01:00
Sarah Hoffmann
90b29aa808 Merge pull request #2856 from t-tomek/patch-1
Update DB.php
2023-01-03 10:58:50 +01:00
Sarah Hoffmann
31d0468cd2 no type checking on old Ubuntu 18 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
38f467bae3 add event handler for default connection settings 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
5c7c4bb9a8 update versions for Python packages 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
9646ec4edd drop official support for Ubuntu 18 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
c665796c52 drop support for Python 3.6 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
a72e2ecb3f update dependencies for Actions 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
0c47558729 convert version to named tuple
Also return the new NominatimVersion rather than a string in the
status result.
2023-01-03 10:03:00 +01:00
Sarah Hoffmann
93b9288c30 fix error message for non-existing database 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
9d31a67116 add unit tests for new Python API 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
cf19036ce6 docs: extend dependency documentation 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
7219ee6532 extend BDD API tests to query via Python frameworks
A new config option ENGINE allows to choose between php and any of the
supported Python engines.
2023-01-03 10:03:00 +01:00
Sarah Hoffmann
d7bc846c3c fix liniting issues and add type annotations 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
1adb0a9886 add support for starlette framework 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
8aa01c9c8e add support for falcon as server framework 2023-01-03 10:03:00 +01:00
Sarah Hoffmann
23dabad0b0 add sanic development server implementation 2023-01-03 10:02:53 +01:00
Sarah Hoffmann
45c675bd78 implement command line status call in Python 2023-01-03 10:02:35 +01:00
Sarah Hoffmann
860c6ecbcc Merge pull request #2936 from lonvia/fix-query-for-index-use
Split query that deletes old objects from placex
2023-01-03 09:56:03 +01:00
Sarah Hoffmann
3f38091421 split query that deletes old objects from placex
placex only has partial indexes over OSM types, so the OSM type
needs to be hardcoded to ensure these indexes are used.
2023-01-02 17:25:38 +01:00
Sarah Hoffmann
7704b3fc7b Merge pull request #2932 from lonvia/rework-flex-framework
Switch to osm2pgsql flex output
2022-12-29 17:23:38 +01:00
t-tomek
151b3c4021 Update DB.php 2022-12-27 08:34:20 +01:00
Sarah Hoffmann
610af95ed1 remove old import styles 2022-12-23 19:29:07 +01:00
Sarah Hoffmann
018ef5bd53 bdd: recreate project directory for every run 2022-12-23 18:36:41 +01:00
Sarah Hoffmann
200eae3bc0 add tests for examples in lua style documentation
And fix all the errors the tests have found.
2022-12-23 17:35:28 +01:00
Sarah Hoffmann
9321e425a4 add documentation for flex style
Includes minor adaptions to bring the code in line with the
documentation.
2022-12-23 11:10:40 +01:00
Sarah Hoffmann
9395c0dadc Merge pull request #2931 from mtmail/vagrant-md-use-dotenv
VAGRANT.md - replace local.php settings with .env
2022-12-22 10:20:15 +01:00
marc tobias
a40065878b VAGRANT.md - replace local.php settings with .env 2022-12-22 03:12:30 +01:00
t-tomek
e8d3c0a99a Update DB.php 2022-12-21 19:56:24 +01:00
t-tomek
2e6ff1b750 Update DB.php 2022-12-21 16:32:33 +01:00
t-tomek
15f9e397dd Update DB.php 2022-12-21 16:31:39 +01:00
t-tomek
87a109d0e9 Update DB.php 2022-12-21 16:30:55 +01:00
Sarah Hoffmann
f3ffbe167d use canonical url for nominatim.org 2022-12-20 16:55:47 +01:00
Sarah Hoffmann
89a34e7508 adapt tests for new lua styles 2022-12-19 17:32:28 +01:00
Sarah Hoffmann
f6fc750f08 install new lua import scripts 2022-12-19 17:27:40 +01:00
Sarah Hoffmann
c32f686c62 switch to osm2pgsql flex style by default 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
2ca83efc36 flez: add other default styles 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
06796745ff flex: hide compiled matchers 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
093d531509 flex: switch to functions for substyles
This gives us a bit more flexibility about the implementation
in the future.
2022-12-18 10:10:58 +01:00
Sarah Hoffmann
a915815e4d explicit export for functions in flex-base 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
de3c28104c flex: add combining clean function 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
d9d13a6204 flex: simplify name handling 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
d1f5820711 flex: simplify address configuration 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
7592f8f189 update osm2pgsql (flex not building index) 2022-12-18 10:10:58 +01:00
Sarah Hoffmann
ffbb61713a Merge pull request #2927 from lonvia/remove-forward-dependencies
Remove shortcut disabling forward dependencies
2022-12-17 15:59:09 +01:00
Sarah Hoffmann
6f51c1ba33 remove code that disables processing of forward dependencies 2022-12-11 19:35:58 +01:00
Sarah Hoffmann
cd861345b7 Merge pull request #2921 from lonvia/case-insensitive-utf8
Use grapheme_stripos instead of stripos in PHP code
2022-12-11 15:57:01 +01:00
Sarah Hoffmann
823502a40a use grapheme_stripos instead of stripos in PHP code
The stripos() does not handle non-ASCII correctly.
2022-12-11 13:55:27 +01:00
Sarah Hoffmann
4efad0bb95 Merge pull request #2920 from lonvia/no-postcode-for-rivers
Do not assign postcodes to long linear features
2022-12-10 17:30:32 +01:00
Sarah Hoffmann
922352e215 do not assign postcodes to long linear features
This avoids a postcode in particular for waterway features and
long natural featues like ridges and valleys.

Fixes #2915.
2022-12-10 14:53:08 +01:00
Frederik Ramm
8a0b2dc0be Fix typo in NOMINATIM_LOG_FILE (#2919)
* fix typo in docs (NOMINATIM_LOG_FILE uses s not ms)
2022-12-08 17:34:54 +01:00
Sarah Hoffmann
7eced34e20 Merge pull request #2916 from lonvia/faq-postcodes
Add FAQ about finding bad postcodes
2022-12-08 08:42:10 +01:00
Sarah Hoffmann
64d00c1c8e add FAQ about finding bad postcodes 2022-12-07 21:38:53 +01:00
Sarah Hoffmann
6d48798d45 Merge pull request #2913 from lonvia/remove-duplicate-spaces-in-transliteration
contract duplicate spaces in transliteration string
2022-12-02 19:43:07 +01:00
Sarah Hoffmann
0e186835b9 contract duplicate spaces in transliteration string
There are some pathological cases where an isolated letter may
be deleted because it is in itself meaningless. If this happens in
the middle of a sentence, then the transliteration contains two
consecutive spaces. Add a final rule to fix this.

See #2909.
2022-12-02 10:15:02 +01:00
Sarah Hoffmann
b607eb9678 Merge pull request #2908 from n-timofeev/update-vagrantfile
Update Vagrantfile
2022-11-29 10:33:16 +01:00
n-timofeev
85a68f1e56 Update Vagrantfile 2022-11-29 11:12:10 +03:00
Sarah Hoffmann
45d13bc295 Merge pull request #2906 from lonvia/move-data-dirs-into-module
Reorganize how Nominatim finds its extra data and libraries
2022-11-28 08:44:29 +01:00
Sarah Hoffmann
2231401483 clean up uses of cli.nominatim()
They should not hand in data paths anymore.
2022-11-27 15:27:04 +01:00
Sarah Hoffmann
2abe9e6fd9 use data paths from new nominatim.paths 2022-11-27 12:15:41 +01:00
Sarah Hoffmann
20f56dfc77 introduce paths module which contains the data paths
This means that nominatim can find its data even when not called
from the command line tool.
2022-11-27 12:02:37 +01:00
Sarah Hoffmann
67664406da Merge pull request #2905 from lonvia/remove-nominatim-tool-var
Remove NOMINATIM_NOMINATIM_TOOL variable
2022-11-26 20:24:58 +01:00
Sarah Hoffmann
0ed60d29cb remove NOMINATIM_NOMINATIM_TOOL variable
This was used by the old PHP scripts to call the Python tool.
With the scripts now gone, the variable can be removed.
2022-11-26 16:40:20 +01:00
Sarah Hoffmann
04ee39467a actions: install keys for postgres repo 2022-11-24 14:04:05 +01:00
Sarah Hoffmann
1f3edf6eba prepare release 4.2.0 2022-11-24 10:43:29 +01:00
Sarah Hoffmann
a15c197547 add checklist for releases 2022-11-24 10:43:25 +01:00
Sarah Hoffmann
13dbeb75c7 Merge pull request #2903 from lonvia/migration-for-index-reorganization
Add migration for reorganization of pending indexes
2022-11-24 10:13:38 +01:00
Sarah Hoffmann
6aded60045 add migration for reorganization of pending indexes
Fixes #2900.
2022-11-24 08:48:05 +01:00
Sarah Hoffmann
8dfdf64dd5 Merge pull request #2902 from lonvia/tiger-county-sanitizer
Tiger county sanitizer
2022-11-23 17:58:42 +01:00
Sarah Hoffmann
41e8bddaa9 remove BDD test for tiger:county
We no longer rely on the import to strip the tag.
2022-11-23 10:37:27 +01:00
Sarah Hoffmann
fd3dec8efe add sanitizer for TIGER tags
Currently only takes over cleaning the tiger:county data. This was
done by the import until now.
2022-11-23 10:37:27 +01:00
Sarah Hoffmann
55ee08f42b Merge pull request #2892 from lonvia/optional-forward-dependecies
Add experimental configuration switch for enabling forward dependencies
2022-11-21 16:57:45 +01:00
Sarah Hoffmann
b6ff697ff0 add experimental option for enabling forward dependencies 2022-11-21 14:48:00 +01:00
Sarah Hoffmann
925ac1e1b9 Merge pull request #2890 from lonvia/use-rank-search-for-reverse-polygon-match
Use rank search for reverse polygon match
2022-11-20 22:11:35 +01:00
Sarah Hoffmann
77acc1c2be force use of geometry index for reverse polygon lookup 2022-11-20 20:22:44 +01:00
Sarah Hoffmann
ebe489c227 use rank_search for reverse polygon match 2022-11-20 20:22:23 +01:00
Sarah Hoffmann
9c152a030a fix condition under which place_to_be_deleted is created
It is needed for updates, independently if reverse-only is set.
2022-11-19 21:53:14 +01:00
Sarah Hoffmann
b310c86c55 Merge pull request #2889 from lonvia/fix-interpolation-updates
Drop illegal values for addr:interpolation on update
2022-11-18 18:51:11 +01:00
Sarah Hoffmann
c9ff7d2130 drop illegal values for addr:interpolation on update 2022-11-18 17:26:56 +01:00
Sarah Hoffmann
52456230cc Merge pull request #2887 from lonvia/lookup-linked-places
Add support for lookup of linked places
2022-11-17 13:35:53 +01:00
Sarah Hoffmann
4422533adb Merge pull request #2886 from lonvia/closest-street-in-associated
Handle associatedStreet relations with multiple streets correctly
2022-11-17 07:29:25 +01:00
Sarah Hoffmann
c4b13f2b7f add support for lookup of linked places 2022-11-16 21:34:45 +01:00
Sarah Hoffmann
4f05a03d13 handle associatedStreet relations with multiple streets
When a associatedStreet relation has multiple street members
always take the closest one. Avoid geometry operations for
the frequent case that there is only one street.
2022-11-16 17:25:51 +01:00
Sarah Hoffmann
7a2e586cce Merge pull request #2884 from lonvia/tweak-special-term-penalties
Correctly handle special term + name combination
2022-11-15 19:29:55 +01:00
Sarah Hoffmann
98ce424650 Merge pull request #2885 from lonvia/remove-unused-countries
Remove dependent territories from country list
2022-11-15 19:29:39 +01:00
Sarah Hoffmann
3059a3da4e correctly handle special term + name combination
Special terms with operator name usually appear in combination with the
name. The current penalties only took name + special term into account
not special term + name.

Fixes #2876.
2022-11-15 11:55:40 +01:00
Sarah Hoffmann
d63d7cb9a8 remove dependent territories from country list
Removes territories of US, France, Australia and Netherlands from the
country list. These territories have their own country code (which is
why they are in the list in the first place) but are mapped as part of
the admin_level 2 relations for the respective parent countries.
Therefore they never had any places attached. In practical terms, the
change only affects the number of tables created.
2022-11-15 11:37:30 +01:00
Sarah Hoffmann
f3f542e864 Merge pull request #2881 from lonvia/more-update-tests-for-osm2pgsql
Experimental support for osm2pgsql flex output
2022-11-15 09:39:46 +01:00
Sarah Hoffmann
93ada250f7 bdd: add tests for osm2pgsql update of postcode nodes 2022-11-14 17:27:04 +01:00
Sarah Hoffmann
d8e3ba3b54 bdd: add osm2pgsql tests for updating interpolations 2022-11-14 16:57:31 +01:00
Sarah Hoffmann
a46348da38 bdd: test placex content when updating with osm2pgsql 2022-11-14 14:48:44 +01:00
Sarah Hoffmann
36cf0eb922 reorganize handling of place type changes
Always replace existing entries in place, never delete them because
a direct delete will cause conflicts.
2022-11-14 13:57:26 +01:00
Sarah Hoffmann
63a9bc94f7 fix country handling in flex style
If the country tag does not match a 2-letter code, it needs to
be dropped.
2022-11-10 15:52:13 +01:00
Sarah Hoffmann
2dafc4cf4f remove tests that differ between lua and gazetteer versions 2022-11-10 15:51:55 +01:00
Sarah Hoffmann
68d09f9cad node locations must be stable for osm2pgsql update tests 2022-11-10 11:11:45 +01:00
Sarah Hoffmann
b98d3d3f00 bdd: extend osm2pgsql update tests
Now also checks for correct indexing state of placex table.
2022-11-10 09:38:25 +01:00
Sarah Hoffmann
3683cf7ddc optimise tag match function 2022-11-10 09:38:25 +01:00
Sarah Hoffmann
84e5e601e1 add lua requirements for vagrant scripts 2022-11-10 09:38:25 +01:00
Sarah Hoffmann
a1da149211 CI: require lua libraries 2022-11-10 09:38:25 +01:00
Sarah Hoffmann
74405e9684 add migration for place_to_be_deleted table 2022-11-10 09:38:25 +01:00
Sarah Hoffmann
2fac507453 change updates to handle delete/insert workflow
This makes Nominatim compatible with osm2pgsql's default update
modus operandi of deleting and reinserting data. Deletes are diverted
into a TODO table instead of executing them. When data is reinserted,
the corresponding entry in the TODO table is deleted. After updates are
finished, the remaining entries in the TODO table are executed, doing
the same work as the delete trigger did before.

The new behaviour also works against the gazetteer output with its
insert-only mechanism.
2022-11-10 09:38:23 +01:00
Sarah Hoffmann
51ed55cc32 initial flex import scripts
Only implements the extratags style for the moment. Tests pass
for the same behaviour as the gazetteer output. Updates still need
to be done.
2022-11-10 09:37:38 +01:00
Sarah Hoffmann
de2a3bd5f8 bdd tests: make import style configurable
The switch is for development. Tests are not guaranteed to still
work when run with anything but the 'extratags' style.
2022-11-10 09:37:38 +01:00
Sarah Hoffmann
981e9700be add osm2pgsql gazetteer tests
This ports the gazetteer tests from osm2pgsql to BDD tests.
2022-11-10 09:37:38 +01:00
Sarah Hoffmann
b52ce4f9f2 Merge pull request #2869 from mtmail/improve-tiger-install-doc
Tiger install doc: add -refresh website- step
2022-11-09 20:48:39 +01:00
Sarah Hoffmann
64c591da7f fix type issues with calls to pyosmium 2022-11-09 20:46:33 +01:00
Marc Tobias
2387648a85 Tiger install doc: add -refresh website- step 2022-11-09 17:33:31 +01:00
Sarah Hoffmann
846ecff0c5 Merge pull request #2871 from lonvia/fix-timeout-for-updates
Fix timeout for updates
2022-11-09 14:26:39 +01:00
Sarah Hoffmann
26a5b59c28 add types-requests dependency 2022-11-09 09:12:37 +01:00
Sarah Hoffmann
6ddb39fda3 respect socket timeout also in other replication functions 2022-11-09 09:12:37 +01:00
Sarah Hoffmann
1fdcec985a fix timeout use for replication timeout
The timeout parameter is no longer taken into account since
pyosmium switched to the requests library. This adds the parameter
back.
2022-11-09 09:12:37 +01:00
Sarah Hoffmann
30f526c943 Merge pull request #2870 from mtmail/update-github-actions-to-node-16
update those github action packages still using node12
2022-11-08 17:24:53 +01:00
Marc Tobias
253127cb9f update those github action packages still using node12 2022-11-08 15:16:55 +01:00
Sarah Hoffmann
3237ca587f Merge pull request #2866 from lonvia/reverse-ignore-interpolations-without-parent
Ignore interpolations without parent on reverse search
2022-11-07 09:00:59 +01:00
Sarah Hoffmann
0dbc0ae6d5 ignore interpolations without parent on reverse search
If no parent can be found for an interpolation, there is most
likely a data error involved. So don' t show these interpolations
in reverse search results.
2022-11-05 22:16:09 +01:00
Sarah Hoffmann
7461ff4680 Merge pull request #2865 from Romeo-PHILLIPS/fix/documentation_status_code
Fix: documentation status code
2022-11-05 22:14:44 +01:00
Romeo
afc714e1d3 fix: format 2022-11-04 18:05:40 +01:00
Romeo
3bc0db8d91 fix: markup 2022-11-04 18:04:28 +01:00
Romeo
d573da5b2c fix: 705 Status Code Documentation 2022-11-04 18:03:49 +01:00
Romeo
ecd5a3fdf9 fix: 705 Status Code Documenation 2022-11-04 17:59:36 +01:00
Sarah Hoffmann
543d63e7a9 Merge pull request #2862 from mtmail/remove-version-from-fpm-sock-file
Install scripts: remove version from /var/run/php-fpm filenames
2022-11-04 17:32:50 +01:00
Sarah Hoffmann
7a22ae6bf9 Merge pull request #2863 from lonvia/add-support-for-postgresql-15
Update CI tests to postgresql 15
2022-11-04 17:32:06 +01:00
Sarah Hoffmann
ebe23d6882 update CI tests to postgresql 15 2022-11-04 16:21:15 +01:00
marc tobias
33c805aee0 Install scripts: remove version from /var/run/php-fpm filenames 2022-11-04 14:22:11 +01:00
t-tomek
244b6fcef6 Update DB.php
Use SET instead of UPDATE queries to support read only databases
2022-11-02 14:11:52 +01:00
Sarah Hoffmann
616ff4ae25 actions: pin pyicu to 2.9 2022-10-24 14:21:44 +02:00
Sarah Hoffmann
e221eaa977 Merge pull request #2836 from mtmail/tiger2022
Documentation: remove year from TIGER filename, new 2022 data
2022-10-24 11:21:55 +02:00
Sarah Hoffmann
eed7abb839 Merge pull request #2838 from lonvia/update-osm2pgsql
Update osm2pgsql to latest 1.7.1 release
2022-10-05 18:59:13 +02:00
Sarah Hoffmann
5f6dcd36ed fix flaky API test
The search 'landstr' produces many duplicates so that with
some bad luck 4 or less results may appear. Disable deduplication
to make it more predictable.
2022-10-05 15:16:14 +02:00
Sarah Hoffmann
f395054536 update osm2pgsql to 1.7.1 2022-10-04 21:16:57 +02:00
Sarah Hoffmann
afeafc8aa7 Merge pull request #2835 from lonvia/secondary-importance
Secondary importance
2022-10-04 16:25:47 +02:00
marc tobias
f1ece658f8 Documentation: remove year from TIGER filename 2022-10-04 14:19:36 +02:00
Sarah Hoffmann
b3abb355eb docs: add customization hints for secondary importance
Removing the download links for now as the tile importance
is still too experimental.
2022-10-01 11:01:49 +02:00
Sarah Hoffmann
5877b69d51 do not run unit test when postgis_raster is not available 2022-10-01 11:01:49 +02:00
Sarah Hoffmann
5ec2c1b712 adapt unit tests to changed function names 2022-10-01 11:01:49 +02:00
Sarah Hoffmann
0a73ed7d64 add secondary importance to API BDD tests
Also fixes a path issue during API test DB creation that could
never possibly have worked.
2022-10-01 11:01:49 +02:00
Sarah Hoffmann
abf349fb0d simplify use of secondary importance
The values in the raster are already normalized between 0 and 2**16,
so a simple conversion to [0, 1] will do.

Check for existance of secondary_importance table statically when
creating the SQL function. For that to work importance tables need
to be created before the functions.
2022-10-01 11:01:49 +02:00
Sarah Hoffmann
3185fad918 load views as a SQL file and rename to 'secondary importance'
The only requirement for secondary importance is that a raster table
comes out of it. The generic name leaves open where the data comes
from.
2022-10-01 11:01:49 +02:00
Tareq Al-Ahdal
0ab0f0ea44 Integrated OSM views into importance computation 2022-10-01 11:01:49 +02:00
Tareq Al-Ahdal
ac467c7a2d Enhanced the implementation of OSM views GeoTIFF import functionality 2022-10-01 11:01:49 +02:00
Tareq Al-Ahdal
c85b74497b Initial implementation of GeoTIFF import functionality 2022-10-01 11:01:49 +02:00
Sarah Hoffmann
3381a92d92 Merge pull request #2832 from lonvia/conditional-analyze-on-indexing
Only run analyze on indexing when work was done
2022-09-28 15:17:40 +02:00
Sarah Hoffmann
a2ee58d8a1 only run analyze on indexing when work was done
This speeds up processing when continuing indexing after it was
interrupted.
2022-09-28 10:22:54 +02:00
Sarah Hoffmann
051f3720ce Merge pull request #2829 from lonvia/optimize-indexes
Further optimize indexes
2022-09-26 10:02:51 +02:00
Sarah Hoffmann
f017e1e9a1 make sure indexes are used 2022-09-25 14:09:45 +02:00
Sarah Hoffmann
33ba6896a8 further split up the big geometry index
Adds partial indexes for all geometry queries used during import.
A full index is not necessary anymore at that point. Still create
the index afterwards for use in queries.

Also adds documentation for all indexes on where they are used.
2022-09-21 16:21:41 +02:00
Sarah Hoffmann
f4d3ae6f70 consolidate indexes over geometry_sectors
The index over geometry_sectors are mainly used for ordering
the places which need indexing. That means they function effectively
as a TODO list. Consolodate them so that they always only contain
the places which are still to do. Also add the appropriate index
for the boundary indexing phase.
2022-09-21 10:38:58 +02:00
Sarah Hoffmann
860f3559a1 split up large osmid index on placex
This doesn't do anything in terms of lookup speeds but the resulting
indexes are quite a bit smaller.
2022-09-21 09:24:57 +02:00
Sarah Hoffmann
d8be8a7293 fix funding link 2022-09-19 15:39:58 +02:00
Sarah Hoffmann
9750a361c9 add Github Sponsering to funding page 2022-09-19 15:38:56 +02:00
Sarah Hoffmann
ed3dd81d04 run final index creation in parallel 2022-09-19 11:55:25 +02:00
Sarah Hoffmann
bef1aebf1c add function for parallel execution of SQL scripts 2022-09-19 11:52:17 +02:00
Sarah Hoffmann
26688ba35d add link to funding page 2022-09-19 10:30:58 +02:00
Sarah Hoffmann
a1158feeb8 Merge pull request #2818 from lonvia/better-geometry-index
Add index for lookup of addressable areas
2022-09-19 10:18:43 +02:00
Sarah Hoffmann
aef014a47d add indexes for lookup of addressable areas
The generic geometry index has become to slow for that purpose.
2022-09-18 16:57:12 +02:00
Sarah Hoffmann
d6a0947e5a update security policy for 4.1 version 2022-09-13 08:58:31 +02:00
Sarah Hoffmann
bc94318d83 mypy: fix new warnings due to external type updates 2022-09-05 17:39:35 +02:00
Sarah Hoffmann
d4c6e58b57 Merge pull request #2812 from mausch/patch-1
docs: fix links to rank docs
2022-09-05 17:27:09 +02:00
Mauricio Scheffer
66832cf0a5 docs: fix links to rank docs 2022-09-05 11:11:13 +01:00
Sarah Hoffmann
bcfe817212 Merge pull request #2799 from lonvia/fix-inclusions-with-extratags
Ignore irrelevant extra tags on address interpolations
2022-08-13 19:02:27 +02:00
Sarah Hoffmann
07d72f950b Merge pull request #2739 from tareqpi/collect_os_info.sh
integration of host system information script into Nominatim CLI tool
2022-08-13 19:02:14 +02:00
Sarah Hoffmann
dddfa3a075 ignore irrelevant extra tags on address interpolations
When deciding if an address interpolation has address information, only
look for addr:street and addr:place. If they are not there go looking
for the address on the address nodes. Ignores irrelevant tags like
addr:inclusion.

Fixes #2797.
2022-08-13 14:07:06 +02:00
Tareq Al-Ahdal
74019877a4 Added the feature of collecting host system information to the CI tests 2022-08-13 06:22:13 +08:00
Tareq Al-Ahdal
465d82a92f Integrated 'collect_os_info.py' into Nominatim's CLI tool 2022-08-13 06:18:10 +08:00
Tareq Al-Ahdal
49f889bf09 Enhanced and refactored 'collect_os_info.py'
Changed the script to functional programming paradigm to remove the big number of local attributes to decrease memory usage when running it. Additional OS info are now included.
2022-08-13 06:13:05 +08:00
Tareq Al-Ahdal
5e477e3b5b Merge remote-tracking branch 'upstream/master' into collect_os_info.sh 2022-08-13 05:53:39 +08:00
Sarah Hoffmann
67cfad6a2c Merge pull request #2798 from lonvia/more-rank-change-fixes
Invalidations when boundaries and places change their rank
2022-08-12 11:42:03 +02:00
Sarah Hoffmann
487e81fe3c more invalidations when boundary changes rank
When a boundary or place changes its address rank, all places where
it participates as address need to be potentially reindexed.
Also use the computed rank when testing place nodes against
boundaries. Boundaries are computed earlier.

Fixes #2794.
2022-08-12 09:48:46 +02:00
Sarah Hoffmann
18f525ac54 Merge pull request #2793 from lonvia/increase-minimum-results
Fix minimum number of results that are searched for
2022-08-09 20:08:45 +02:00
Sarah Hoffmann
e0c184e097 fix base number of returned results
The intent was to always search for at least 10 results.

Improves on #882.
2022-08-09 13:53:20 +02:00
Sarah Hoffmann
78716ab8b9 Merge pull request #2792 from lonvia/new-type-annotations
Adapt to new type annotations from typeshed
2022-08-09 13:52:20 +02:00
Sarah Hoffmann
8d082c13e0 adapt to new type annotations from typeshed
Some more functions frrom psycopg are now properly annotated.
No ignoring necessary anymore.
2022-08-09 11:06:54 +02:00
Sarah Hoffmann
196dc2a659 docs: add types-psutil requirement 2022-08-08 09:46:25 +02:00
Sarah Hoffmann
4fe797d704 remove mypy ignore for psutil.virtual_memory()
Now available in typeshed.
2022-08-08 09:44:45 +02:00
Sarah Hoffmann
3c188164ab Merge pull request #2789 from lonvia/update-osm2pgsql
Update osm2pgsql (fixes admin_level parsing)
2022-08-08 09:15:58 +02:00
Sarah Hoffmann
5330370076 update osm2pgsql (fix admin_level parsing) 2022-08-07 18:34:47 +02:00
Sarah Hoffmann
eecc73ea1a docs: fix dangling links 2022-08-05 15:29:43 +02:00
Sarah Hoffmann
8c73c0795e docs: update links to vagrant instructions 2022-08-05 15:27:11 +02:00
Sarah Hoffmann
7d68aa8f04 prepare release 4.1.0 2022-08-05 14:33:11 +02:00
Sarah Hoffmann
a0cd96e05e Merge pull request #2786 from lonvia/export-centroid-for-tokenizer
Export centroid to tokenizer
2022-08-01 11:38:24 +02:00
Sarah Hoffmann
b19c90b9a6 export centroid to tokenizer
May come in handy when developping sanitizers for an area smaller
than country size.
2022-07-31 22:10:58 +02:00
Sarah Hoffmann
e427712cb0 Merge pull request #2784 from lonvia/doscs-customizing-icu-tokenizer
Document the public API of sanitizers and token analysis modules
2022-07-31 19:15:50 +02:00
Sarah Hoffmann
9864b191b1 fix various typos 2022-07-31 17:10:35 +02:00
Sarah Hoffmann
e7574f119e add simple examples of sanitizers and token analysis 2022-07-29 17:15:25 +02:00
Sarah Hoffmann
51b6d16dc6 overhaul the token analysis interface
The functional split betweenthe two functions is now that the
first one creates the ID that is used in the word table and
the second one creates the variants. There no longer is a
requirement that the ID is the normalized version. We might
later reintroduce the requirement that a normalized version be available
but it doesn't necessarily need to be through the ID.

The function that creates the ID now gets the full PlaceName. That way
it might take into account attributes that were set by the sanitizers.

Finally rename both functions to something more sane.
2022-07-29 15:14:11 +02:00
Sarah Hoffmann
34d27ed45c move PlaceName into the generic data module 2022-07-29 11:42:20 +02:00
Sarah Hoffmann
094100bbf6 harmonize spelling
Stick with the American spelling of Analyze.
2022-07-29 10:52:01 +02:00
Sarah Hoffmann
c8873d34af harmonize interface of token analysis module
The configure() function now receives a Transliterator object instead
of the ICU rules. This harmonizes the parameters with the create
function.
2022-07-29 10:43:07 +02:00
Sarah Hoffmann
f0d640961a add documentation for custom token analysis 2022-07-29 09:41:28 +02:00
Sarah Hoffmann
3746befd88 add documentation for sanitizer interface
Also switches mkdocstrings to 0.18 with the rather unfortunate
consequence that now mkdocstrings-python-legacy is needed as well.
2022-07-28 22:00:29 +02:00
Sarah Hoffmann
a8b037669a Merge pull request #2780 from lonvia/python-modules-in-project-directory
Support for external sanitizer and token analysis modules
2022-07-28 21:58:04 +02:00
Sarah Hoffmann
d819036daa add support for external token analysis modules 2022-07-25 16:27:22 +02:00
Sarah Hoffmann
6d41046b15 add support for external sanitizer modules 2022-07-25 16:10:19 +02:00
Sarah Hoffmann
7b7203c149 add function for loading plugin modules
Loads modules for configurable code like tokenizers, sanitizers, etc.
Supports internal modules, external libraries and code from the
project directory.
2022-07-25 16:10:10 +02:00
Sarah Hoffmann
95d4061b2a Merge pull request #2775 from lonvia/remove-centos-instructions
Remove vagrant scripts for CentOS
2022-07-25 10:29:32 +02:00
Sarah Hoffmann
375b57a96a vagrant: remove proj dependency and only require php-cli 2022-07-24 10:24:18 +02:00
Sarah Hoffmann
12ace4329d remove CentOS installation instructions
Fixes #2601.
2022-07-24 10:22:22 +02:00
Sarah Hoffmann
09e0be0e39 Merge pull request #2774 from lonvia/parameter-arrays
Ignore URL parameters in array notation
2022-07-23 23:56:32 +02:00
Sarah Hoffmann
cd4bcea894 ignore API parameters in array notation
PHP automatically parses parameters in an array notation(foo[]) into
array types. Ignore these parameters as 'unknown'.

Fixes #2763.
2022-07-23 10:51:44 +02:00
Sarah Hoffmann
1bee151fe3 Merge pull request #2772 from kianmeng/fix-typos
docs: fix typos
2022-07-20 17:13:30 +02:00
Kian-Meng Ang
f5e52e748f docs: fix typos 2022-07-20 22:05:31 +08:00
Sarah Hoffmann
b7f6c7c76a docs: slightly increase recommended hardware requirements 2022-07-20 10:16:23 +02:00
Sarah Hoffmann
bc7f6209d8 Merge pull request #2770 from lonvia/typed-python
Type annotations for Python code
2022-07-19 09:03:30 +02:00
Sarah Hoffmann
372a548c28 CI: remove installation of pip on Ubuntu 20 2022-07-18 12:19:04 +02:00
Sarah Hoffmann
5aad105c73 add explicit cast for fetchone 2022-07-18 10:18:51 +02:00
Sarah Hoffmann
f40c83d025 CIL use psutil type stubs 2022-07-18 09:55:58 +02:00
Sarah Hoffmann
83054af46f remove typing_extensions requirement
The typing_extensions package is only necessary now when running mypy.
It won't be used at runtime anymore.
2022-07-18 09:55:58 +02:00
Sarah Hoffmann
cb81f11422 CI: make type checking strict 2022-07-18 09:55:58 +02:00
Sarah Hoffmann
a849f3c9ec add type annotations for command line functions 2022-07-18 09:55:54 +02:00
Sarah Hoffmann
25d854dc5c add type annotations for Tiger import function 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
9963261d8d add type annotations to special phrase importer 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
459ab3bbdc add type annotations to database check functions 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
a21d4d3ac4 add type annotations for database import functions 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
4da1f0da6f add type annotations for migrations 2022-07-18 09:54:29 +02:00
Sarah Hoffmann
17bbe2637a add type annotations to tool functions 2022-07-18 09:54:27 +02:00
Sarah Hoffmann
6c6bbe5747 add type annotations for ICU tokenizer 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
18b16e06ca add type annotations for legacy tokenizer 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
e37cfc64d2 add type annotations to ICU tokenizer helper modules 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
77510f4a3b add typing extensions for Ubuntu22.04 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
d35e3c25b6 add type annotations for token analysis
No annotations for ICU types yet.
2022-07-18 09:47:57 +02:00
Sarah Hoffmann
62eedbb8f6 add type hints for sanitizers 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
5617bffe2f add type annotations for indexer 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
8adab2c6ca add typing information for postcode formatter 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
d0c44431d0 add typing information for place_info and country_info 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
282a61ce51 add typing information for utils submodule 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
7a1d22ff15 type annotations for non-blocking DB connection 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
0dff71a410 add type annotations for SQL preprocessor 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
26f30bff28 add type annotation to DB utils
As a cursor is needed as type, make this a public type.
2022-07-18 09:47:57 +02:00
Sarah Hoffmann
e6775e713c add typing information to DB properties 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
69f9122bef add typing annotations for DB status module
Requires TypedDict which is only available from Python 3.8. Require
therefore typing_extensions to make the functions available for
earlier Python versions.
2022-07-18 09:47:57 +02:00
Sarah Hoffmann
fc254fc744 adapt use of Connection in bdd tests to name change 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
845c43137a add type annotations to freeze functions 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
aaf2b6032e fix uses of config.get_path() to expect None 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
c4928c646d define type for enivronment dictionaries 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
f12fe54d2b restrict return type more 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
fc03c0266a add type annotations to exec_utils 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
7b042de300 CI: install type info for psycopg2 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
681aad7e0d avoid issues with Python < 3.9 and linting 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
f22fa992f7 move complex typing annotations to extra file 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
992e6f72cf type annotations for DB utils 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
e6ee3c772c type annotations for DB connection 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
9d716f0f7d mypy: add psycopg2 typing info from typeshed 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
95ed95c616 add type annotations to config module 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
bf36f33e79 add type annotations for version.py 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
58ab8319b9 mypy: ignore dotenv library 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
a87cb77ce8 document use of mypy 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
2be45a35b4 CI: add mypy to tests 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
9b636fdc10 mypy: minimal annotations to enable a clean run 2022-07-18 09:47:57 +02:00
Sarah Hoffmann
b1903f0fbf Merge pull request #2761 from lonvia/repair-index-analysis
Repair `admin --analyse-indexing`
2022-07-18 09:38:08 +02:00
Sarah Hoffmann
00f5b78160 Merge pull request #2764 from otbutz/patch-4
Remove legacy Postgres options
2022-07-13 15:51:47 +02:00
otbutz
d58061473e Remove legacy Postgres options 2022-07-12 09:49:10 +02:00
Sarah Hoffmann
33cb925f2e Merge pull request #2691 from mtmail/ubuntu-22
Vagrant and CI tests for Ubuntu 22.04
2022-07-11 15:37:51 +02:00
marc tobias
c70ca7f57b In tests for PHP 8 disable Just-in-time, it conflicts with tools that determine coverage 2022-07-09 22:03:48 +02:00
Marc Tobias
a6dab5e300 Vagrant and CI tests for Ubuntu 22.04 2022-07-09 22:03:48 +02:00
Sarah Hoffmann
7cafec0750 decode_json() always create arrays instead of objects 2022-07-09 09:10:21 +02:00
Sarah Hoffmann
4b12d52ef5 convert admin --analyse-indexing to new indexing method
A proper run of indexing requires the place information from the
analyzer. Add the pre-processing of place data, so the right
information is handed into the update function.
2022-07-07 16:20:08 +02:00
Sarah Hoffmann
300612c5a8 Merge pull request #2760 from lonvia/reorganize-data-classes
Code cleanup: move some common code into the data submodule
2022-07-07 16:12:11 +02:00
Sarah Hoffmann
856925d19b remove analyze() from PlaceInfo class
The function creates circular dependencies.
2022-07-07 12:06:58 +02:00
Sarah Hoffmann
cbbcbb1fd7 move country_info into data submodule 2022-07-06 11:08:36 +02:00
Sarah Hoffmann
bce93d60bd move PlaceInfo into data submodule
This data structure is shared between indexer and tokenizer.
2022-07-06 10:54:47 +02:00
Sarah Hoffmann
69e51aebab test: avoid column names with upper-case letters
This may cause problems when the column names get quoted.
2022-07-05 09:12:55 +02:00
Sarah Hoffmann
8ac133f2ee CI: remove unneed stuff to make space for DB 2022-07-03 16:42:57 +02:00
Sarah Hoffmann
67996929e0 Merge pull request #2706 from mtmail/php-fixes-php7-vs-php8
PHP 8 behaves slightly different with in_array and usort
2022-07-03 11:28:52 +02:00
Marc Tobias
ccf119206d PHP 8 behaves slightly different with in_array and usort 2022-07-03 10:55:34 +02:00
Sarah Hoffmann
bc63f10057 fix syntax error with tablespaces 2022-06-30 09:19:16 +02:00
Sarah Hoffmann
6f15306766 docs: replace deprecated pages option
Fixes #2661.
2022-06-29 20:30:28 +02:00
Sarah Hoffmann
161d83af5b fix handling of zero importance
To avoid importance becoming zero and cancelling out other weights,
df008d99f5 introduced a minimum value
for importance. That broke importances for interpolated addresses,
which are less than zero.

Instead of setting a minimum, set zero importances to a very small
value.

Fixes #2753.
2022-06-29 17:54:30 +02:00
Sarah Hoffmann
3bf3b894ea Merge pull request #2757 from lonvia/filter-postcodes
Add filtering, normalisation and variants for postcodes
2022-06-24 21:09:41 +02:00
Sarah Hoffmann
536f08f33a ignore 5+ postcodes in the US for now
Hierarchical postcodes need a different treatment.
2022-06-24 19:24:22 +02:00
Sarah Hoffmann
3dd7410bb7 bdd: correctly skip postcode tests for legacy 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
93d5be097a bdd: do not expect legacy word table to be without empty tokens
It can happen for bogus names and this will not get fixed anymore.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
6eb9044353 adapt search algorithm to new postcode format in word 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
612d34930b handle postcodes properly on word table updates
update_postcodes_from_db() needs to do the full postcode treatment
in order to derive the correct word table entries.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
5be320368c add documentation for postcode customization 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
7f2ad4ac7e fix linting issue 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
0f00f4968c fix up BDD tests for postcode changes
Includes smaller code fixes found by the tests.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
37b2c6a830 port legacy tokenizer to new postcode handling
Also documents the changes to the SQL functions of the tokenizer.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
e86db3001f fix postcode pattern for Mozambique
Optional groups are not implemented yet.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
7b6ec4fc6c add tests for discarding bad postcodes 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
67dfa38e60 fix liniting problems 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
2eca9fc8af cache postcode normalization 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
b5e5efc131 only add well-formatted postcodes to location table 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
80ea13437d move postcode matcher in a separate file 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
bf86b45178 move postcode centroid computation to Python 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
4885fdf0f9 add class for online centroid computation 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
b7704833e4 icu: switch postcodes to using the pre-formatted one 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
ca7b46511d introduce and use analyzer for postcodes 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
18864afa8a postcodes: introduce a default pattern for countries without postcodes 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
5ba75df507 postcode: generate a generic form 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
9cf700e85d add postcodes for most of the remaining countries
Now includes all postcodes that have optional parts.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
9172696324 postcodes: add support for optional spaces 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
49626ba709 add postcode formats with optional country code
If the country code is not part of the mandatory output, the
country code filter will do the correct handling.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
baee6f3de0 postcodes: strip leading country codes 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
28ab2f6048 add postcodes patterns without optional spaces 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
90d4d339db initial postcode cleaner for simple patterns
Moves postcodes that are either in countries without a postcode
system or don't correspond to the local pattern for postcodes into
a field for a normal address part. Makes them searchable but not as
a special address. This has two consequences: they are no longer a
skippable part of the address and the postcodes cannot be searched
on their own.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
6e0014e138 add postcode patterns for numeric postcodes
Adds patterns for countries that have simple numeric-only postcodes.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
8080625747 remove postcodes from countries that don't have them
The postcodes will only be removed as a 'computed postcode' they
are still searchable for the given object.
2022-06-23 23:42:31 +02:00
Sarah Hoffmann
21fb501699 add info about countries without a postcode 2022-06-23 23:42:31 +02:00
Sarah Hoffmann
0cd3a1b9bd avoid near searches in very large areas
At some point the contains call becomes too expensive.
2022-06-23 23:42:09 +02:00
Sarah Hoffmann
8de483a45b Merge pull request #2755 from Luflosi/fix-typo
Fix typo
2022-06-20 22:23:36 +02:00
Luflosi
3ea87169ac Fix typo 2022-06-20 20:41:00 +02:00
Sarah Hoffmann
42d16d8296 Merge pull request #2751 from mtmail/issue-2750
Documentation fix: should be "nominatim refresh"
2022-06-20 10:21:06 +02:00
marc tobias
adf3ae004f Documentation fix: should be "nominatim refresh" 2022-06-20 02:32:23 +02:00
Sarah Hoffmann
fced1172c4 Merge pull request #2746 from bgo-eiu/patch-2
Added additional languages for Pakistan in country settings
2022-06-18 09:40:47 +02:00
Sarah Hoffmann
299e98776e Merge pull request #2749 from stefkiourk/patch-1
Typos and syntax on Reverse.md
2022-06-17 22:11:55 +02:00
Stef Ki
b803505402 Typos and syntax on Reverse.md 2022-06-17 21:01:38 +02:00
Sarah Hoffmann
8fb9795d04 Merge pull request #2748 from lonvia/bdd-grid-origin
BDD tests: remove support for scenes
2022-06-17 15:25:29 +02:00
Sarah Hoffmann
d8623d6818 bdd: remove support for scenes
Only keep support for the special point geometry 'country:xx'.
2022-06-17 11:54:18 +02:00
Sarah Hoffmann
6c58a4c46c bdd: move query tests from scene to grid description 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
19f67e167c bdd: remove step for scene setup 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
00d8df6fc3 bdd: move update tests from scenes to grid descriptions 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
02068aec7f bdd: move import tests from scenes to grid descriptions 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
3493d317e4 bdd: clear lof buffer after a successful import run 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
a2b486a5b0 bdd: allow to set an origin of the grid 2022-06-17 11:54:18 +02:00
Sarah Hoffmann
3d0f8bdc39 Merge pull request #2745 from lonvia/city-in-city-fix
Improve hierarchy computation for place areas
2022-06-16 15:36:39 +02:00
bgo-eiu
04644102f2 added additional languages for pakistan in country settings 2022-06-16 06:26:44 -04:00
Sarah Hoffmann
f833cc80df use default ranks when reorganising rank_address
When shifting address ranks, the evaluation is always done against
unshifted address ranks on import because the objects we compare against
have not been indexed yet. This changes for updates when the object have
been touched in the meantime. To ensure consistent behaviour across
imports and updates, always use the  unshifted address ranks.
2022-06-16 11:20:23 +02:00
Sarah Hoffmann
df0142678a improve address ordering with mixes of place and admin areas
Resolves a couple of situations where a mixed use of places areas and
administrative boundaries would result in a hierarchy that did not
properly respect the contains relation.
2022-06-16 10:44:16 +02:00
Sarah Hoffmann
800240550b Merge pull request #2737 from lonvia/reset-linking-ranks
Fix rank inheritance from linked places
2022-06-06 09:29:32 +02:00
Sarah Hoffmann
15cf7dd416 add testcase for #2551
This test proves that places that are linked need to be reindexed.
2022-06-05 21:39:17 +02:00
Sarah Hoffmann
2c05fc858a fix rank inheritance from linked places
When taking over the address rank from a linked place, it needs
to be the originally computed rank, not the one that might have
been adjusted in the meantime. The adjustment was made under the
assumption that the node is not linked.
2022-06-05 19:38:14 +02:00
Sarah Hoffmann
a024c7665c Merge pull request #2736 from lonvia/reverse-interpolation-index-order
Change indexing order for interpolations and non-addressable objects
2022-06-03 10:42:54 +02:00
Sarah Hoffmann
cbb4749996 change indexing order for interpolations
Interpolations are now indexed after rank 30 objects. The housenumber
nodes no longer need information from the interpolations while the
interpolations can make use of precomputed postcodes.
2022-06-02 15:16:46 +02:00
Sarah Hoffmann
4b0d9f71e8 Merge pull request #2735 from lonvia/geocodejson-type-reverse
Also fix type output in geocodejson for reverse
2022-06-01 22:14:06 +02:00
Sarah Hoffmann
218c56f9a6 use getattr() instead of __getattr__
Makes the linter happy.
2022-06-01 21:26:13 +02:00
Sarah Hoffmann
a35eda3d2a also fix type output in geocodejson for reverse 2022-06-01 20:46:08 +02:00
Sarah Hoffmann
8a0e3e2f3d Merge pull request #2732 from lonvia/fix-ordering-address-parts
Fix order when searching for addr:* components
2022-05-31 20:26:05 +02:00
Sarah Hoffmann
12a3d51bcc Merge pull request #2731 from lonvia/cleanup-special-phrases
Minor code reorganisation around special phrase parsing
2022-05-31 17:13:56 +02:00
Sarah Hoffmann
60367d95dd Merge pull request #2730 from lonvia/exclude-inclusion-tag
Exclude addr:inclusion from search
2022-05-31 17:13:37 +02:00
Sarah Hoffmann
bd0e157b91 fix order when searching for addr:* components
When matching addr:* components the preference was given to
matches that do not intersect with the place.
2022-05-31 16:57:37 +02:00
Sarah Hoffmann
1821f68ca0 exclude addr:inclusion from search 2022-05-31 14:19:19 +02:00
Sarah Hoffmann
b5ac546275 CI: always use the latest version of pylint
This makes it easier to reproduce issues locally.
2022-05-31 09:12:26 +02:00
Sarah Hoffmann
46689df668 custom comparison for SpecialPhrase
Duplicate elemination only works when a custom hash/equal function
is implemented that is based on the members.
2022-05-30 16:30:41 +02:00
Sarah Hoffmann
e828d0d3f7 move quoting hack to wiki loader
The bad quotes around the type for special phrases
specifically occure in the Wiki pages, so it should be
removed by the loader and not in the generic SpecialPhrase
object.
2022-05-30 14:40:33 +02:00
Sarah Hoffmann
cce0e5ea38 convert special phrase loaders to generators
Generators simplify the code quite a bit compared to the previous
Iterator approach.
2022-05-30 14:12:46 +02:00
Sarah Hoffmann
042e314589 remove the language parameter in the SPWikiLoader
Languages must always be configured through config or environment.
Also use monkeypatched environment in tests.
2022-05-30 10:26:20 +02:00
Sarah Hoffmann
61d813bfef add get_str_list() for config
Converts a config value written as a comma-sparated list into
a Python list of strings.
2022-05-29 13:53:50 +02:00
Sarah Hoffmann
ecee5cf801 Merge pull request #2728 from lonvia/allow-more-partials
Allow search for partials consisting of 3 or more words
2022-05-27 18:09:11 +02:00
Sarah Hoffmann
9e4e913bf7 allow search for partials consisting of 3 or more words
The search query builder currently rejects searches for partial
names only, when the partial terms are all very frequent to avoid
queries that return too many results.

This change slightly relaxes the condition to allow the search when
there are 3 or more partial terms. With so many terms the number
of matches should be managable.
2022-05-27 16:49:14 +02:00
Sarah Hoffmann
98fc528d8e Merge pull request #2715 from otbutz/patch-2
Simplify apache rewrite rules
2022-05-24 14:40:28 +02:00
otbutz
d1cd2d1674 Change to regular regex group 2022-05-24 11:32:59 +02:00
Sarah Hoffmann
b593fe9c3e Merge pull request #2718 from nslxndr/fix-log-endtime
Undefined offset in error log
2022-05-23 16:25:41 +02:00
Sarah Hoffmann
6ca6725f6e Merge pull request #2722 from lonvia/fix-relinking-on-updates
Fix bug with keeping linking on updates
2022-05-23 11:36:20 +02:00
Sarah Hoffmann
1d203fdb3c fix bug with keeping linking on updates
When moving the finding of linked places to the precomputation stage,
it was also moved before the statement where the linked_place_id was
removed from the linkee. The result was that the current linkee was
excluded when looking for a linked place on updates because it was
still linked to the boundary to be updated.

Fixed by allowing to either keep the linkage or change to an unlinked
place.
2022-05-23 10:55:10 +02:00
Sandor Nagy
3f30699131 correct end time computation 2022-05-20 23:11:00 +02:00
otbutz
22bd9c4993 Simplify apache rewrite rules 2022-05-20 10:15:28 +02:00
Sarah Hoffmann
4654701c10 Merge pull request #2713 from lonvia/remove-county-nodes-in-canada
Remove county nodes in Canada from addresses
2022-05-19 10:21:09 +02:00
Sarah Hoffmann
8a67ddcb2b remove county nodes in Canada from addresses
Canada has complete coverage for administrative boundaries on
county level. Removing the county nodes from the addresses avoids error
due to a wide-spread doubling of place nodes for city counties.
2022-05-18 10:19:05 +02:00
Sarah Hoffmann
ab71f17c47 Merge pull request #2710 from lonvia/offline-import-mode
Assorted performance improvements for BDD tests
2022-05-12 11:08:29 +02:00
Sarah Hoffmann
f314abcfe1 bdd: restrict imports to four languages
This mainly restricts the number of country names that are loaded.
2022-05-11 16:40:53 +02:00
Sarah Hoffmann
2d1a22705f Merge pull request #2709 from lonvia/less-strict-country-assignment
Be more strict with country assignments
2022-05-11 16:24:47 +02:00
Sarah Hoffmann
e74e577029 bdd: recreate functions on template DB
Avoids calling function refresh on every scenario. The content won't
change between runs.
2022-05-11 15:50:22 +02:00
Sarah Hoffmann
aa0ae610c6 avoid calling OSM servers during bdd tests 2022-05-11 15:33:01 +02:00
Sarah Hoffmann
dc6c4bf22e add offline import mode
In offline mode no attempts are made to download data from the internet.
At the moment that only concerns the computation of the database date.
It contacts the main API to get the date.
2022-05-11 15:03:02 +02:00
Sarah Hoffmann
a7a5f0161f Merge pull request #2708 from lonvia/use-format-literals
Assorted fixes for new pylint warnings
2022-05-11 14:29:56 +02:00
Sarah Hoffmann
739fe1c2c4 no longer allow fuzzy assignment of country
The fallback country boundaries already contain a sufficiently large
part of the water area, so there is no need to extend the country
assignment even more. Features outside countries should not show a
country in their address.
2022-05-11 11:54:25 +02:00
Sarah Hoffmann
3ba975466c fix spacing
Some versions of pylint are oddly picky.
2022-05-11 10:36:09 +02:00
Sarah Hoffmann
d14a585cc9 pylint: disable no-self-use check
This checker encourages bad behaviour (namely changing the static
status of a function during inheritence) and will be made optional
in upcoming versions of pylint.
2022-05-11 10:25:00 +02:00
Sarah Hoffmann
7f7a7df3a2 solve assorted issue with newer pylint versions
Includes more use of 'with', adding encodings to open statements
and a couple of issues with parameter renaming.
2022-05-11 10:22:14 +02:00
Sarah Hoffmann
5d5f40a82f use context management when processing Tiger data 2022-05-11 09:48:56 +02:00
Sarah Hoffmann
ae6b029543 remove redundant 'u' prefixes for unicode strings 2022-05-11 09:48:56 +02:00
Sarah Hoffmann
bb2bd76f91 pylint: avoid explicit use of format() function
Use psycopg2 SQL formatters for SQL and formatted string literals
everywhere else.
2022-05-11 09:48:56 +02:00
Sarah Hoffmann
4e1e166c6a add a function to return a formatted version
Replaces the various repeated format strings throughout the code.
2022-05-11 09:01:24 +02:00
Sarah Hoffmann
5ff35d9984 Merge pull request #2707 from lonvia/make-icu-tokenizer-the-default
Make ICU tokenizer the default
2022-05-11 08:52:49 +02:00
Sarah Hoffmann
c6a426a885 no longer need postgresql-server-dev packages 2022-05-10 18:33:51 +02:00
Sarah Hoffmann
11103268e9 make legacy tokenizer tests the extra on CI 2022-05-10 18:33:34 +02:00
Sarah Hoffmann
b332b1ae23 Merge pull request #2704 from mtmail/migrate-phpunit-xml-schema
PHPUnit 9 changed configuration schema slightly
2022-05-10 17:44:34 +02:00
Sarah Hoffmann
7e70e5f503 always state encoding when opening files in text mode
Also applies to Path.write_text().
2022-05-10 15:36:29 +02:00
Marc Tobias
99fa23040a PHPUnit 9 changed configuration schema slightly 2022-05-10 15:20:43 +02:00
Sarah Hoffmann
adeebec32a switch tests to ICU tokenizer as default 2022-05-10 14:54:50 +02:00
Sarah Hoffmann
b93ef23d3f add migration hint for the new tokenizer default 2022-05-10 12:07:21 +02:00
Sarah Hoffmann
4002bee0c1 make ICU the default tokenizer 2022-05-10 12:02:50 +02:00
Sarah Hoffmann
ed6fda6968 Merge pull request #2702 from lonvia/move-country-names-into-includes
Clean up country name settings
2022-05-10 09:21:16 +02:00
Sarah Hoffmann
2ae13c5583 Merge pull request #2695 from mtmail/git-commit-hash-to-version
add git commit hash to 'nominatim --version' output
2022-05-10 09:14:15 +02:00
Marc Tobias
821dabb138 add git commit hash to --version output 2022-05-09 23:56:13 +02:00
Sarah Hoffmann
9d468f6da0 support arbitrary prefixes in country name list
This means we can now get rid of the last special cases for names.
2022-05-09 11:55:26 +02:00
Sarah Hoffmann
3a8ddf736e move country names into separate include files 2022-05-09 11:55:26 +02:00
Sarah Hoffmann
720c7b7519 Merge pull request #2696 from mtmail/norminatyn-typos
fix typos of name Nominatim
2022-05-05 10:04:55 +02:00
Marc Tobias
0de83c4a51 fix typos of name Nominatim 2022-05-05 01:04:47 +02:00
Sarah Hoffmann
8c073993ef Merge pull request #2693 from mtmail/nominatim-cli-version
new "nominatim --version" global CLI argument
2022-05-04 09:14:35 +02:00
Marc Tobias
a79ab41782 new nominatim --version CLI argument 2022-05-04 01:33:25 +02:00
Sarah Hoffmann
f509526e5c Merge pull request #2681 from lonvia/improve-geocodejson
Fix 'type' field in the geocodejson response
2022-05-02 16:05:02 +02:00
Sarah Hoffmann
896199c9d4 Merge pull request #2687 from lonvia/check-for-wikipedia
Add check for presence of wikipedia importance
2022-05-02 16:04:32 +02:00
Sarah Hoffmann
08672cdf0a explicit cast for osm_type parameter in SQL needed
Otherwise PostgreSQL won't correctly pick up the index
condition.
2022-05-02 14:12:17 +02:00
Sarah Hoffmann
8163723e22 respect exclude_place_ids for housenumber search 2022-05-02 11:44:10 +02:00
Sarah Hoffmann
32a5f812a9 Merge pull request #2689 from lonvia/relations-in-associated-street
Accept any OSM type in street member of associatedStreet
2022-05-02 11:42:34 +02:00
Sarah Hoffmann
372874e89a accept any OSM type in street member of associatedStreet
This is needed for pedestrian areas mapped as multipolygons
and consequently as relations. The lookup in placex guarantees
that the referenced OSM object is indeed a street.

Fixes #2669.
2022-05-02 09:48:51 +02:00
Sarah Hoffmann
8ebb8ee304 Merge pull request #2686 from mtmail/ubuntu20-php-fpm-version
Install-on-Ubuntu-20.sh - correct php version
2022-04-29 14:16:44 +02:00
Sarah Hoffmann
3d58254462 skip wikipedia table test on reverse-only installations
Wikipedia importances are not imported on reverse-only imports.
2022-04-29 14:12:55 +02:00
Marc Tobias
95de411a81 Install-on-Ubuntu-20.sh - correct php-fpm version 2022-04-29 13:24:15 +02:00
Sarah Hoffmann
439d17569d Merge pull request #2685 from lonvia/show-inherited-housenumber
Keep address parts inherited from surrounding buildings after indexing
2022-04-29 12:15:33 +02:00
Sarah Hoffmann
8bcdba1a14 add check for wikipedia importance data
Adds a new check level WARNING because missing wikipedia importances
are not necessarily an error. If the database is run for reverse
requests only, then it is fine to go without them.
2022-04-29 12:14:53 +02:00
Sarah Hoffmann
37e5f07d83 Merge pull request #2684 from lonvia/translit-keep-spacing-marks
ICU: better letter identification in normalization
2022-04-29 10:38:28 +02:00
Sarah Hoffmann
3c68b12176 keep inherited address parts after indexing
The inherited housenumber is needed for display output. We can't
take the one from the housenumber field because it is already
normalized. Remove the inherited address only when reindexing.

Fixes #2683.
2022-04-28 21:38:00 +02:00
Sarah Hoffmann
63dc4b39bc ICU: better letter identification in normalization
The Letter class does not include non-spacing marks that can also
have a consonant or vowel meaning, especially in Indian languages.
Use the alnum propoerty instead which includes them all. Also
include the vowel-canceling Virama, which is not a letter by itself
but changes the transliteration.
2022-04-28 18:23:17 +02:00
Sarah Hoffmann
0ea099bfd5 mention the breaking API change in the migratioin docs 2022-04-27 11:52:53 +02:00
Sarah Hoffmann
310776671b adapt docs to geocodejson changes 2022-04-27 11:50:12 +02:00
Sarah Hoffmann
4b84de400b geocodejson: add osm_key and osm_value fields
Return OSM main tag information in geocodejson. This is not part
of the official spec but can be useful to get more detailed information
of the object type. Brings the Nominatim output closer to what
Photon produces.
2022-04-27 10:58:25 +02:00
Sarah Hoffmann
8677da2a72 geocodejson: type should contain the general feature class
'type' so far contained the value of the OSM tag. That is rarely
helpful because it is not a restricted class of values. Change
this to contain the types as defined in the geocodejson spec,
which correspond to the address layer names.
2022-04-27 10:53:12 +02:00
Sarah Hoffmann
de828b723e Merge pull request #2678 from lonvia/address-part-order
Change selection of primary address part for ways that cross boundaries
2022-04-22 20:32:10 +02:00
Sarah Hoffmann
a515761193 further tweaking of address distance
For point features, keep using the distance to centroid.
For area features, add a tie breaker for the case where the
center point falls on the boundary.
2022-04-22 14:32:19 +02:00
Sarah Hoffmann
784dad866f change distance computation between place and address part
Instead of computing the distance to the centroid of the area
compute the distance of the area to the centroid of the feature.
This means we give preference to the area that covers the centroid.
It's still a heuristics but one that is a bit less random.
2022-04-22 14:32:09 +02:00
Sarah Hoffmann
403e6f7e5c Merge pull request #2666 from lonvia/admin-command-for-forced-indexing
Admin command for forced indexing
2022-04-14 21:44:08 +02:00
Sarah Hoffmann
27f7c7fd88 add documentation for new refresh command 2022-04-14 15:10:24 +02:00
Sarah Hoffmann
4f59644cc2 add tests for new data invalidation functions 2022-04-14 14:52:13 +02:00
Sarah Hoffmann
c3f1d34b71 add new commands for forced invalidation before indexing 2022-04-14 11:05:43 +02:00
Sarah Hoffmann
f8f20899a3 recommend PostgreSQL 13+
See https://github.com/osm-search/Nominatim/discussions/2659.
2022-04-14 09:21:25 +02:00
Sarah Hoffmann
a319b0a0b4 docs: different default for format on osm.org
Add a note that the format parameter is needed for
nominatim.openstreetmap.org for historical reasons.
2022-04-08 17:13:42 +02:00
Sarah Hoffmann
604ddc0f9d Merge pull request #2660 from lonvia/pyosmium-contextmanager
Support using ReplicationServer as contextmanager
2022-04-08 17:07:33 +02:00
Sarah Hoffmann
126cabacb8 support new ReplicationServer as contextmanager 2022-04-07 17:58:04 +02:00
Sarah Hoffmann
f78ae969e9 Merge pull request #2466 from I70l0teN4ik/state-code
add ISO3166-2-lvl<admin_level> field to response address details
2022-04-07 16:39:50 +02:00
Artem Ziablytskyi
d1479072ae fix bdd tests and docs 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
9a56e53d50 use ISO3166-2-lvl<admin_level> instead of typeLabel prefix 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
7899654675 proper instruction to import data 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
a79c1bda9b Fix API docs and Vagrant instructions to import data 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
665fae8343 Fix API docs and Vagrant instructions to import data 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
6bee188f24 Change the key to <addresspart_type>-ISO3166-2 to support xml response correctly 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
82dbcbb12a add <addresspart_type>:ISO3166-2 field to response address details 2022-04-07 16:37:51 +02:00
Artem Ziablytskyi
76c146f326 add state_code field to response address details 2022-04-07 16:37:51 +02:00
Sarah Hoffmann
fd4ab3f262 Merge pull request #2629 from tareqpi/country-names-yaml-configuration
Move default country names into yaml configuration
2022-04-04 09:04:25 +02:00
Tareq Al-Ahdal
cfbd3652ef fix linting error 2022-04-02 00:14:18 +08:00
Tareq Al-Ahdal
e9c14979a4 remove the conversion to json for name 2022-04-01 22:54:14 +08:00
Tareq Al-Ahdal
e9f979b67b 'read_config' is no longer a fixture
add 'read_config' to test cases that need it
2022-04-01 22:52:17 +08:00
Tareq Al-Ahdal
a323b8f63a test for loading special characters from country_settings.yaml 2022-04-01 21:58:57 +08:00
Tareq Al-Ahdal
9411c14fd2 fix reset country info before loading custom data 2022-04-01 21:55:34 +08:00
Tareq Al-Ahdal
8525e7542f custom country config loads correctly 2022-04-01 21:46:56 +08:00
Sarah Hoffmann
7dabbc5462 Merge pull request #2655 from lonvia/migration-internal-country-name
Add migration for new country name handling in ICU tokenizer
2022-03-31 18:04:18 +02:00
Sarah Hoffmann
de18cd1523 add test for new table_has_column function 2022-03-31 15:55:20 +02:00
Sarah Hoffmann
36a1560117 add migration to mark internal country names 2022-03-31 15:55:20 +02:00
Tareq Al-Ahdal
b5f311d6bc separate unit test function into three functions 2022-03-30 22:06:59 +08:00
Sarah Hoffmann
83dd4362aa remove temporary file 2022-03-30 15:13:31 +02:00
Sarah Hoffmann
a71cab639b Merge pull request #2650 from mtmail/update-lookup-examples
documentation: update example output of lookup endpoint
2022-03-28 20:21:45 +02:00
Marc Tobias
5e0155ae29 documentation: update example output of lookup endpoint 2022-03-28 16:41:10 +02:00
Tareq Al-Ahdal
afef83b1c6 fix edge case handling when 'names' is not there 2022-03-25 22:25:55 +08:00
Tareq Al-Ahdal
9db13aac72 Added unit tests for loading country info from yaml file 2022-03-25 22:22:44 +08:00
Tareq Al-Ahdal
9a1f891998 fix linting error 2022-03-24 13:27:24 +08:00
Tareq Al-Ahdal
7bb7ed468a fix storing of escape sequences in database 2022-03-24 13:18:44 +08:00
Tareq Al-Ahdal
4fc61d260f clean up 2022-03-24 13:16:59 +08:00
Tareq Al-Ahdal
1ceb6926b7 merge of insert query + modularity enhancements 2022-03-24 13:13:38 +08:00
Sarah Hoffmann
d33c82cb66 Merge pull request #2641 from lonvia/reinit-tokenizer-dir
Transparantly reinitialize tokenizer directory when necessary
2022-03-20 21:46:07 +01:00
Sarah Hoffmann
4c66c35ed6 reinit the tokenizer directory on website refresh
This means the project directory is usable again, once refresh --website
was run.
2022-03-20 17:49:22 +01:00
Sarah Hoffmann
54db1d8915 docs: copying project dir no longer necessary 2022-03-20 16:01:27 +01:00
Sarah Hoffmann
a0ed80d821 restore the tokenizer directory when missing
Automatically repopulate the tokenizer/ directory with the PHP stub
and the postgresql module, when the directory is missing. This allows
to switch working directories and in particular run the service
from a different maschine then where it was installed.
Users still need to make sure that .env files are set up correctly
or they will shoot themselves in the foot.

See #2515.
2022-03-20 11:31:42 +01:00
Sarah Hoffmann
e65913d376 cache loaded configuration
Reading the YAML files is fairly expensive and slows down the BDD tests
significantly. Therefore cache the results from reading the file.
2022-03-20 11:30:03 +01:00
Sarah Hoffmann
2f266d946b Merge pull request #2639 from lonvia/remove-operator
No longer use operator tag as a name
2022-03-18 16:42:18 +01:00
Tareq Al-Ahdal
b6ac4ad837 fix linting error 2022-03-18 21:05:47 +08:00
Sarah Hoffmann
42f0282f14 remove special case for operator names
The OSM data has been sufficiently cleaned up by now that
the operator no longer needs to be considered a name tag.
Use 'brand' as the searchable alternative.
2022-03-18 10:48:53 +01:00
Tareq Al-Ahdal
af739d2f57 modify logic of _include_key function 2022-03-18 06:52:16 +08:00
Tareq Al-Ahdal
fa2aca1cbc adding prefix to keys is now more configurable 2022-03-18 06:20:00 +08:00
Tareq Al-Ahdal
943e5fe699 Revert the removal of new line at the end of the file 2022-03-18 06:07:48 +08:00
Tareq Al-Ahdal
d09670d208 modify logic to prepend 'name:' to keys' 2022-03-18 06:01:25 +08:00
Tareq Al-Ahdal
83b4b8d9c1 reattach 'name:' prefix to keys 2022-03-18 05:46:23 +08:00
Tareq Al-Ahdal
d32a7c1888 initialize an empty dictionary for nested name key 2022-03-18 02:50:33 +08:00
Tareq Al-Ahdal
d0c1b73fb3 remove duplicate values 2022-03-18 02:43:42 +08:00
Tareq Al-Ahdal
90ac15748e fix comment 2022-03-18 02:38:04 +08:00
Tareq Al-Ahdal
6be2077d92 Merge branch 'master' into country-names-yaml-configuration 2022-03-18 02:36:12 +08:00
Tareq Al-Ahdal
456d439e97 Reformatting of country keys 2022-03-18 02:23:11 +08:00
Sarah Hoffmann
2723553593 Merge pull request #2637 from lonvia/keep-linked-place-names
Introduce separation of names from linked places
2022-03-17 16:39:30 +01:00
Sarah Hoffmann
23de4c7aca adapt ParameterParser tests to new key list 2022-03-17 11:45:05 +01:00
Sarah Hoffmann
ce14964943 fix linting 2022-03-17 11:05:32 +01:00
Sarah Hoffmann
e133476c35 merge linked names correctly into namedetails
Convert the '_place_*' entries back to normal entries before
returning them in the 'namedetails' section. If the name field is
duplicated, kept the '_place_*' notation. This preserves the previous
behaviour before _place_ names were introduces but adds the additional
names from the linked place for reference.
2022-03-17 11:02:02 +01:00
Sarah Hoffmann
524dc64ab7 make sure outputs take into account linked place names 2022-03-16 21:44:52 +01:00
Sarah Hoffmann
17da5f45be fix return code for PHP exceptions
These have returned a 0 until now.
2022-03-16 21:44:02 +01:00
Sarah Hoffmann
42cd021d04 save differing linked polace names in extra fields
This keeps the names tracable and ensures that all names are searchable
when they differ. Do not keep names when they are exactly the same
to save some space. Linked names are cleaned out before relinking.
2022-03-16 16:38:52 +01:00
Sarah Hoffmann
433d2f4c7d Merge pull request #2633 from lonvia/fix-reverse-single-interpolation-point
Correctly handle single-point interpolations in reverse
2022-03-16 14:22:59 +01:00
Sarah Hoffmann
be8f5778a1 use https protocol for cloning from github
Does not need authentication.
2022-03-16 12:05:58 +01:00
Sarah Hoffmann
ef98a85b05 correctly handle single-point interpolations in reverse
Lookup in location_property_osmline needs to be special cased
for startnumber = endnumber. Also adds tests for the case.

Fixes #2680.
2022-03-16 11:19:09 +01:00
Tareq Al-Ahdal
b4bd4ff67d fix linting error 2022-03-15 19:14:04 +08:00
Sarah Hoffmann
930a5cd12a Merge pull request #2632 from nslxndr/fix-log-typo
Fix typo in log message on replication initialisation
2022-03-15 11:01:57 +01:00
Sandor Nagy
7e3701b64a Fix typo in log message on replication initialisation 2022-03-15 07:50:47 +01:00
Tareq Al-Ahdal
165d17f7f7 reintroduce 'name:' prefix to country name keys 2022-03-13 18:58:27 +08:00
Tareq Al-Ahdal
3939cb614e Remove country.sql from CMakeLists.txt 2022-03-13 18:56:19 +08:00
Tareq Al-Ahdal
377cf36be3 modify data import logic to load country names from yaml 2022-03-12 15:20:57 +08:00
Tareq Al-Ahdal
8b6652a40b move default country names into yaml configuration 2022-03-12 15:17:01 +08:00
Sarah Hoffmann
479d726774 Merge pull request #2627 from mtmail/location-of-osm2pgsql
documentation: clarify osm2pgsql isnt in project directory by default
2022-03-10 15:39:10 +01:00
Marc Tobias
1fcc9717bb documentation: clarify osm2pgsql isnt in project directory by default 2022-03-10 14:16:12 +01:00
Sarah Hoffmann
c35b3ea5c7 Merge pull request #2621 from lonvia/housenumber-analyzer
Introduce optional token analysis for housenumbers
2022-03-01 15:19:07 +01:00
Sarah Hoffmann
15beeef6ce do not expand records in select list
An expression of the form 'SELECT (func()).*' will be expanded
by Postgresql _before_ execution with the result that the function
will be called as many times as there are fields in the record.
This is not what we want. The function call needs to go into
the FROM clause instead.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
92bc3cd0a7 fix linting issue 2022-03-01 09:34:32 +01:00
Sarah Hoffmann
0a9f971e44 add tests for new analyzed housenumbers 2022-03-01 09:34:32 +01:00
Sarah Hoffmann
4a3bbd0319 adapt housenumber cleanup to new word table structure 2022-03-01 09:34:32 +01:00
Sarah Hoffmann
89e1446131 bdd: disable some housenumber tests for legacy
Optional spaces in housenumbers are not supported by legacy tokenizer,
so disable those tests.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
b694a97edf add documentation for housenumber analyzer 2022-03-01 09:34:32 +01:00
Sarah Hoffmann
13ed184efd housenumber analyzer: avoid creating too many variants
Housenumber fields with lots of text are likely bad data. So is
data with many changes from letter to digit. Exclude them from adding
optional spaces.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
f03a05f6bb add new analyser for houenumbers
This analyser makes spaces optional.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
a6903651fc add framework for analysing housenumbers
This lays the groundwork for adding variants for housenumbers.
When analysis is enabled, then the 'word' field in the word table
is used as usual, so that variants can be created. There will be
only one analyser allowed which must have the fixed name
'@housenumber'.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
b8c544cc98 icu: move token deduplication into TokenInfo
Puts collection into one common place.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
243725aae1 icu: move housenumber token computation out of TokenInfo
This was the last function to use the cache. There is a more clean
separation of responsibility now.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
0bb59b2e22 handle unknown analyzer
When changing something in the default configuration of the sanatizers
that refers to an analyzer that is not yet loaded, there shouldn't be
any errors.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
837d44391c move generation of normalized token form to analyzer
This gives the analyzer more flexibility in choosing the normalized
form. In particular, an analyzer creating different variants can choose
the variant that will be used as the canonical form.
2022-03-01 09:34:32 +01:00
Sarah Hoffmann
691ec08586 Merge pull request #2614 from lonvia/reorganise-country-names
Reorganise handling of country names imported from OSM
2022-02-25 09:46:20 +01:00
Sarah Hoffmann
5425394654 add migration to add new derived_names column 2022-02-24 20:50:33 +01:00
Sarah Hoffmann
1d82569f6d add tests for country updates 2022-02-24 16:18:49 +01:00
Sarah Hoffmann
f74228830d bdd: run full import on tests
This uncovered a couple of outdated/wrong tests which have been
fixed, too.
2022-02-24 14:27:51 +01:00
Sarah Hoffmann
a9e3329c39 country_name: use separate columns for names from OSM
This allows us to distinguish between base names and imported ones
and consiquently removing imported ones if necessary.
2022-02-23 09:23:06 +01:00
Sarah Hoffmann
a3e4e8e5cd delete unused country name tokens 2022-02-23 09:23:06 +01:00
Sarah Hoffmann
898febcec5 update supported versions 2022-02-23 09:22:17 +01:00
Sarah Hoffmann
855909b4e9 add 'healthcare' as main tag
Given that the tag is most of the time duplicated by an amenity
tag which is already imported, only import it as a fallback when
there is no name.

Fixes #2609.
2022-02-21 11:52:17 +01:00
Sarah Hoffmann
85d65a2fd2 create idx_place_interpolations for import already
It is needed to look up if a node is part of an interpolation.

Fixes #2608.
2022-02-18 11:11:22 +01:00
Sarah Hoffmann
cd9b0c9a20 Merge pull request #2603 from lonvia/one-step-housenumber-search
One step housenumber search
2022-02-10 17:27:56 +01:00
Sarah Hoffmann
0e11ca9b76 add test that interpolations are found by odd/even 2022-02-10 11:23:51 +01:00
Sarah Hoffmann
fd38dd02ce make sure step is taken into account for interpolations 2022-02-09 21:42:28 +01:00
Sarah Hoffmann
474418f03c include houseumber search in name query
The name query already looks for the existence of housenumbers and
may as well retrive them. Saves up to threee additional lookups.
It also means that we can lift the restriction on looking
for existance of housenumbers for simple queries only.
2022-02-08 22:35:12 +01:00
Sarah Hoffmann
6b9fea6f1a disable debug message in interpolation processing 2022-02-07 23:30:25 +01:00
Sarah Hoffmann
02894ca4a4 Merge pull request #2602 from lonvia/filter-bad-housenumbers
Handle mistagged housenumbers like names
2022-02-07 16:27:04 +01:00
Sarah Hoffmann
7d19209fa1 liniting: disable too-many-ancestors
This is triggered by UserDict which is meant of deriving.
2022-02-07 11:49:18 +01:00
Sarah Hoffmann
a6b4e8ff67 add tests for housenumber-as-name feature 2022-02-07 11:45:12 +01:00
Sarah Hoffmann
38c3ef3da0 add tests for get_string_list()
Renaming test file for sanitizer config because pytest requires
unique names for test files.
2022-02-07 11:22:24 +01:00
Sarah Hoffmann
610f2cc254 sanitizer: move helpers into a configuration class 2022-02-07 10:48:00 +01:00
Sarah Hoffmann
a79a3210e6 implement is-a-name option for housenumbers 2022-02-07 09:27:11 +01:00
Sarah Hoffmann
39ede26b5c Merge pull request #2598 from geofabrik/doc-update-systemd-timer
Document how to set up systemd timers for --once updates
2022-02-06 10:24:48 +01:00
Sarah Hoffmann
c3f206733f really remove CentOS from CI 2022-02-05 16:07:12 +01:00
Sarah Hoffmann
69481d1590 remove CentOS from CI
The CentOS docker image no longer works after CentOS8 went EOL.
See #2601 for discussion.
2022-02-05 15:14:47 +01:00
Sarah Hoffmann
6877668cab Merge pull request #2599 from StephanGeorg/patch-1
Fixed link
2022-02-03 09:45:59 +01:00
Stephan Georg
dc520bd156 Fixed link 2022-02-03 09:39:03 +01:00
Amanda McCann
bc4a343502 Document how to set up systemd timers for --once updates 2022-02-01 17:01:45 +01:00
Sarah Hoffmann
fbc8884693 restrict change propagation to interpolation lines
Also means that Postgresql will use the right index for the query.
2022-01-28 11:05:37 +01:00
Sarah Hoffmann
c50c534d19 Merge pull request #2597 from lonvia/reorganise-interpolations
Reorganise interpolation code
2022-01-28 08:40:08 +01:00
Sarah Hoffmann
45627b485f Merge pull request #2596 from lonvia/remove-codecov
Remove codecov
2022-01-27 17:11:17 +01:00
Sarah Hoffmann
b6fa121f53 remove tests for closest housenumber function 2022-01-27 16:21:45 +01:00
Sarah Hoffmann
9b31ffaa9f php unit tests don't work on ubuntu 18 2022-01-27 15:18:23 +01:00
Sarah Hoffmann
39e300640e remove codecov
Causes more trouble than doing good.
2022-01-27 15:17:33 +01:00
Sarah Hoffmann
2ffc1537e7 raise PostgreSQL requirement to 9.6
The new code uses the open-ended array notation which is only
available sind psql 9.6.
2022-01-27 15:15:56 +01:00
Sarah Hoffmann
64abc90d30 use new tiger step column for queries 2022-01-27 14:08:08 +01:00
Sarah Hoffmann
788505095e add step column to tiger data table
This replaces the interpolationtype column.
2022-01-27 11:54:12 +01:00
Sarah Hoffmann
98432395c3 add migration for upcoming change to tiger tables 2022-01-27 11:48:27 +01:00
Sarah Hoffmann
6b89624f33 adapt frontend to new interpolation table layout 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
4b28b4fed4 adapt BDD tests for new interpolation style 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
fea4dbba50 inherit tags from interpolation not parent
Nodes on an interpolation now only get the address tags of
interpolations and then compute their own parent from that. They no
longer inherit the parent directly.
2022-01-27 11:14:55 +01:00
Sarah Hoffmann
83d2c440d5 add migration for new interpolation table layout 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
e6d855b954 add migration for new lookup index 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
9f64c34f1a optimize indexes for interpolation lines
Do not index 'inactive' rows (with startnumber is null) where possible.
2022-01-27 11:14:55 +01:00
Sarah Hoffmann
638ed15ada improve handling von updates on nodes in interpolations
Use the same update mechanism as for updates on the interpolations
themselves. Updates must solely happen in place_insert as this is
the place where actual changes of the data happen.
2022-01-27 11:14:55 +01:00
Sarah Hoffmann
c0d8b95f67 update interpolations instead of deleting and recreating 2022-01-27 11:14:55 +01:00
Sarah Hoffmann
c65938d53c Merge pull request #2595 from nslxndr/fix-doc-typos
Fix typos in UI doc
2022-01-26 23:08:41 +01:00
Sandor Nagy
2e3f3a55f1 Fix typos in UI doc 2022-01-26 21:39:20 +01:00
Sarah Hoffmann
cdd0f78bc6 Merge pull request #2594 from lonvia/update-osm2pgsql
Update to osm2pgsql 1.6.0
2022-01-25 12:11:57 +01:00
Sarah Hoffmann
9fac20ceef update to osm2pgsql 1.6.0 2022-01-24 16:55:52 +01:00
Sarah Hoffmann
38bd08d25f Merge pull request #2591 from lonvia/cleanup-place-insert
Reorganise code of place_insert() trigger
2022-01-24 15:58:23 +01:00
Sarah Hoffmann
b44493e7f2 reorganise place_insert trigger
Code cleanup and formatting as well as minor improvements, in
particular removal of unnecessary code.
2022-01-24 09:12:50 +01:00
Sarah Hoffmann
f6ec8d2e33 Merge pull request #2589 from lonvia/clean-housenumbers
Add command for cleaning up word table
2022-01-21 10:17:58 +01:00
Sarah Hoffmann
c170d323d9 add tests for cleaning housenumbers 2022-01-20 23:47:20 +01:00
Sarah Hoffmann
3ce123ab69 do not clean housenumbers in reverse-only mode 2022-01-20 20:21:13 +01:00
Sarah Hoffmann
d8b7a51ab6 add actual removal of housenumber tokens 2022-01-20 20:18:15 +01:00
Sarah Hoffmann
344a2bfc1a add new command for cleaning word tokens
Just pulls outdated housenumbers for the moment.
2022-01-20 20:05:15 +01:00
Sarah Hoffmann
86588419fb Merge pull request #2588 from lonvia/housenumber-sanitizer
Move housenumber parsing into sanitizer
2022-01-20 17:44:24 +01:00
Sarah Hoffmann
d09db09849 adapt ICU tets to new housenumber sanitizer
Restrict tests to making sure that handing in multiple housenumbers
works.
2022-01-20 16:05:49 +01:00
Sarah Hoffmann
1e5a8561c0 fix linting issues 2022-01-20 16:00:23 +01:00
Sarah Hoffmann
f3c9578bca complete documentation for new clean-houseunubmers sanatizer 2022-01-20 15:49:32 +01:00
Sarah Hoffmann
3741afa6dc generalize filter-kind parameter for sanatizers
Now behaves the same for tag_analyzer_by_language and
clean_housenumbers. Adds tests.
2022-01-20 15:42:42 +01:00
Sarah Hoffmann
560a006892 add pytest config
We are using custom marks now which need to be registered to avoid
warnings.
2022-01-20 15:38:02 +01:00
Sarah Hoffmann
4774e45218 clean_housenumbers: make kinds and delimiters configurable
Also adds unit tests for various options.
2022-01-20 12:07:12 +01:00
Sarah Hoffmann
206ee87188 factor out housenumber splitting into sanitizer 2022-01-19 17:27:50 +01:00
Sarah Hoffmann
a7e048484b Merge pull request #2585 from lonvia/name-mutations
Introduce character mutations to token analysis
2022-01-19 17:09:36 +01:00
Sarah Hoffmann
d6b5f2f5da docs: add pointer to caddy deployment discussion 2022-01-19 15:28:01 +01:00
Sarah Hoffmann
3df560ea38 fix linting error 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
adbaf700cd move parsing of mutation config to setup phase 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
4a41bff3ab add documentation for new mutation feature 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
b453b0ea95 introduce mutation variants to generic token analyser
Mutations are regular-expression-based replacements that are applied
after variants have been computed. They are meant to be used for
variations on character level.

Add spelling variations for German umlauts.
2022-01-18 11:09:21 +01:00
Sarah Hoffmann
0192a7af96 move variant configuration reading in separate file 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
630ad38a67 refactor variant production to use generators 2022-01-18 11:09:21 +01:00
Sarah Hoffmann
21156fc2a2 Merge pull request #2578 from lonvia/iso-3166-2
Make ISO3166-2 references searchable
2022-01-13 14:54:35 +01:00
Sarah Hoffmann
fa99f5bc03 Merge pull request #2579 from geofabrik/doc-update-typo
Fix typo in name of service. The rest of the docs call it nominatim-updateS
2022-01-13 14:01:57 +01:00
Amanda McCann
09aa1e7af4 Fix typo in name of service. The rest of the docs call it nominatim-updateS 2022-01-13 13:14:17 +01:00
Sarah Hoffmann
2034ed387b make ISO3166-2 references searchable 2022-01-13 09:44:42 +01:00
Sarah Hoffmann
d6140d6d54 Merge pull request #2571 from lonvia/ukrainian-apostrophe
Consider "modifier letter apostrophe" to be punctuation
2022-01-11 09:41:07 +01:00
Sarah Hoffmann
fb54bd3fcf consider "modifier letter apostrophe" to be punctuation
While technically being a letter, the apostrophe is often replaced
with a normal apostrophe in writing which is a punctuation mark.
This makes sure that the modifier letter apostrophe yields the same
normalization results and thus is really interchangable.

Only has an effect after the next reimport.

Fixes #2569.
2022-01-10 17:40:03 +01:00
Sarah Hoffmann
a486ee347a Merge pull request #2570 from woodpeck/patch-3
Fix typos
2022-01-10 14:21:48 +01:00
Frederik Ramm
5fb3582b31 Fix typos 2022-01-10 13:38:53 +01:00
Sarah Hoffmann
8b0b9db31e Merge pull request #2565 from lonvia/swap-wordset-order
Swap order of query interpretation
2022-01-06 09:02:46 +01:00
Sarah Hoffmann
f9889f81d6 swap order of query interpretation
A forward interpretation of the form 'street, city, country' is
much more frequent than the reverse form 'country, city, street'.
Thus swap the order of interpretations that the forward order comes
first.
2022-01-05 15:21:14 +01:00
Sarah Hoffmann
efafa52719 Merge pull request #2562 from lonvia/copyright-headers
Add consistent copyright headers
2022-01-04 23:10:37 +01:00
Micah David Cochran
8bda59fbe7 made collect_os_info script in Python 2022-01-03 14:57:01 -06:00
Sarah Hoffmann
c3788d765e add consistent SPDX copyright headers 2022-01-03 16:23:58 +01:00
Sarah Hoffmann
e407558f76 Merge pull request #2559 from lonvia/disable-jit-in-queries
Disable JIT and parallel workers on search frontend
2022-01-03 15:13:57 +01:00
Sarah Hoffmann
042df4198a disable JIT and parallel workers on search frontend
Bad query planning now also interferes with queries for search and
reverse.
2021-12-22 10:47:54 +01:00
Sarah Hoffmann
ab6f35d83a Merge pull request #2553 from lonvia/revert-street-matching-to-full-names
Revert street matching to full names
2021-12-14 15:52:34 +01:00
Micah David Cochran
f20d85738f add utils/collect_os_info.sh script 2021-12-13 11:26:09 -06:00
Sarah Hoffmann
f9b56a8581 correctly match abbreviated addr:street
This only works when addr:street is abbreviated and the street
name isn't. It does not work the other way around.
2021-12-08 21:58:43 +01:00
Sarah Hoffmann
fedc8ed474 Merge pull request #2542 from lonvia/update-phpunit
Update PHPUnit use to 9.5
2021-12-07 15:44:45 +01:00
Sarah Hoffmann
79aeb31088 restrict PHPUnit to 9.5 version
There are so many breaking changes with PHPUnit that it is
impossible to give any other guarantees.
2021-12-07 14:49:31 +01:00
Sarah Hoffmann
04857d32cd enable PHPUnit 9 for coverage
A couple of functions have been renamed.
2021-12-07 12:07:17 +01:00
Sarah Hoffmann
109cdce92c php unit: replace deprecated regex assert
The regEx assertion has been renamed in PHPUnit 9.5
and causes deprecation warnings.
2021-12-07 11:34:21 +01:00
Sarah Hoffmann
b7554d9ed8 php unit: don't enforce a name on the test database
Also gets rid of a PHPUnit deprecation warning.
2021-12-07 11:31:45 +01:00
Sarah Hoffmann
6106f1a32e php test: class must be called like the file 2021-12-07 11:20:38 +01:00
Sarah Hoffmann
f2a8307bb6 disable codecov
Not working.
2021-12-07 11:13:30 +01:00
Sarah Hoffmann
470ee7aef9 Merge pull request #2540 from lonvia/remove-support-for-centos7
Remove installation instructions for CentOS 7
2021-12-07 09:17:29 +01:00
Sarah Hoffmann
aefca48e78 remove installation instructions for CentOS 7
This ends official support for CentOS 7.
2021-12-06 16:05:27 +01:00
Sarah Hoffmann
5e792078b3 remove some odd varaints of addr:street from the styles
Some import has added names in partial tags which confuse the
street name matching.
2021-12-06 15:17:00 +01:00
Sarah Hoffmann
7f7d2fd5b3 skip most addr: tags with suffixes
Only one addr: tag can be processed currently, so make
sure it is the one without suffixes to not get odd data.
addr:street is the exception because it uses a different
matching mechanism.
2021-12-06 14:55:10 +01:00
Sarah Hoffmann
5e435b41ba ICU: matching any street name will do again 2021-12-06 14:26:08 +01:00
Sarah Hoffmann
44cfce1ca4 revert to using full names for street name matching
Using partial names turned out to not work well because there are
often similarly named streets next to each other. It also
prevents us from being able to take into account all addr:street:*
tags.

This change gets all the full term tokens for the addr:street tags
from the DB. As they are used for matching only, we can assume that
the term must already be there or there will be no match. This
avoid creating unused full name tags.
2021-12-06 11:38:38 +01:00
Sarah Hoffmann
bb175cc958 Merge pull request #2539 from lonvia/clean-up-python-tests
Restructure and extend python unit tests
2021-12-03 17:08:25 +01:00
Sarah Hoffmann
5a9fb6eaf7 specify text type in test SQL
Older version of postgres fail otherwise.
2021-12-03 13:56:23 +01:00
Sarah Hoffmann
54d35ddfe9 split cli tests by subcommand and extend coverage 2021-12-02 23:45:48 +01:00
Sarah Hoffmann
7beccb7997 remove unnecessary pass statements 2021-12-02 15:54:24 +01:00
Sarah Hoffmann
14a78f55cd more unit tests for tokenizers 2021-12-02 15:46:36 +01:00
Sarah Hoffmann
7617a9316e extend API unit tests 2021-12-01 20:48:29 +01:00
Sarah Hoffmann
a52ed366e4 add tests for migration 2021-12-01 20:27:40 +01:00
Sarah Hoffmann
7be164e2a5 more testing for refresh functions 2021-12-01 14:58:54 +01:00
Sarah Hoffmann
a24f25c0d8 more tests for exec utilities 2021-12-01 14:23:51 +01:00
Sarah Hoffmann
993b238a41 add more tests for database import 2021-12-01 11:54:58 +01:00
Sarah Hoffmann
bbbfc8201c add tests for adding additional data
Also adds checks that parameters for osm2pgsql are set
as expected.
2021-12-01 11:22:46 +01:00
Sarah Hoffmann
6f03a4d6ce add tests for flatten_config_file and other than yaml formats 2021-12-01 10:24:11 +01:00
Sarah Hoffmann
c8958a22d2 tests: add fixture for making test project directory 2021-11-30 18:01:46 +01:00
Sarah Hoffmann
37afa2180b generalize fixtures for cli tests 2021-11-30 14:07:39 +01:00
Sarah Hoffmann
b2df8e478a python test: move single-use fixtures to subdirectories 2021-11-30 12:03:16 +01:00
Sarah Hoffmann
50fccb52be remove unused test files 2021-11-30 11:44:10 +01:00
Sarah Hoffmann
b90e719da5 organise python tests in subdirectories
The directories follow the same structure as the modules in
nominatim/.
2021-11-30 11:22:26 +01:00
Sarah Hoffmann
97f1723181 Merge pull request #2530 from lonvia/declassify-highway
Change default rank for highway objects to 30
2021-11-25 08:41:25 +01:00
Sarah Hoffmann
80e0a3cce4 change default rank for highway objects to 30
The highway key is being used more and more for non-ways these
days. This clashes with Nominatim's assumption that essentially
everything that has a highway tag can be used as the street part
of the address.

Change the default rank of highway objects to 30 to avoid this.
Only the known values for streets keep the rank 26 and are now
listed explicitly.
2021-11-24 22:10:40 +01:00
Sarah Hoffmann
79effae933 Merge pull request #2529 from lonvia/sort-street-results-by-tiger-housenumber
Take tiger housenumber into account when ranking street results
2021-11-24 16:23:41 +01:00
Sarah Hoffmann
810056349f add migration for inclusive housenumber Tiger index 2021-11-24 12:03:20 +01:00
Sarah Hoffmann
b1d490ea53 add index for Tiger housenumber queries 2021-11-24 11:10:20 +01:00
Sarah Hoffmann
345637290b take Tiger housenumbers into account when ranking street results
Queries with a housenumber need to rank streets higher that
have the requested housenumber attached. We already do that for
ordinary housenumber objects and for interpolations. This
adds support for Tiger housenumbers as well.

Fixes #2501.
2021-11-24 11:10:20 +01:00
Sarah Hoffmann
9628df3031 Merge pull request #2528 from lonvia/allow-french-extra-housenumbers
Don't penalize French 'bis' housenumbers
2021-11-21 10:53:20 +01:00
Sarah Hoffmann
423f338d04 Merge pull request #2526 from lonvia/docs-moving-database
Add a section about moving the database to another machine
2021-11-19 21:14:53 +01:00
Sarah Hoffmann
3a2597e5c4 don't penalize French 'bis' housenumbers
House numbers of the form '9 bis' are usual in France. So
be a bit more lenient before adding penalties to house numbers
with letters in them.

Fixes #2527.
2021-11-19 21:12:17 +01:00
Sarah Hoffmann
641f261495 Merge pull request #2525 from lonvia/fix-replication-indexer
Fix instantiation of indexer for replication
2021-11-19 16:16:30 +01:00
Sarah Hoffmann
5884a6e7a6 add a section about moving the database to another machine 2021-11-19 16:11:32 +01:00
Sarah Hoffmann
10e979e841 only instantiate indexer once for replication
Also makes sure that indexer object exists everywhere were needed.

See #2518.
2021-11-19 14:48:58 +01:00
Sarah Hoffmann
8dc1441635 Merge pull request #2517 from lonvia/transliteration-special-chars
ICU: avoid non-alphanumerical characters in transliteration
2021-11-11 07:42:42 +01:00
Sarah Hoffmann
c79dcfad9a make sure housenumbers are properly quoted 2021-11-10 20:44:28 +01:00
Sarah Hoffmann
1886952666 avoid special characters in word tokens
Transliteration should only consist of ASCII letters
and numbers. Avoid any other characters.
2021-11-10 17:14:13 +01:00
Sarah Hoffmann
7326b246b7 Merge pull request #2516 from lonvia/test-for-website-dir
Better error reporting when API script does not exist
2021-11-10 13:27:09 +01:00
Sarah Hoffmann
345c812e43 better error reporting when API script does not exist
Check if the API script exists on the expected location before
running php-cli. This way we can add a useful hint about the
project directory.

Fixes #2513.
2021-11-10 11:58:20 +01:00
Sarah Hoffmann
fd4ba3989e Merge pull request #2511 from lonvia/fix-combination-error-needs-address
Fix boolean combination of NeedsAddress flag
2021-11-06 12:11:55 +01:00
Sarah Hoffmann
e2d2571ad0 fix combination of NeedsAddress flag
When dealing with multiple partial terms, only keep the
flag, when all partial terms are so frequent as to need
an address.

Fixes #2510.
2021-11-05 22:18:37 +01:00
725 changed files with 66845 additions and 10659 deletions

7
.codespellrc Normal file
View File

@@ -0,0 +1,7 @@
# https://github.com/codespell-project/codespell
[codespell]
skip = ./man/nominatim.1,data,./docs/styles.css,lib-php,module,munin,osm2pgsql,./test,./settings/*.lua,./settings/*.yaml,./settings/**/*.yaml,./settings/icu-rules,./nominatim/tokenizer/token_analysis/config_variants.py
# Need to be lowercase in the list
# Unter = Unter den Linden (an example address)
ignore-words-list = inout,unter

2
.github/FUNDING.yml vendored Normal file
View File

@@ -0,0 +1,2 @@
github: lonvia
custom: "https://nominatim.org/funding/"

View File

@@ -7,10 +7,13 @@ assignees: ''
---
<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first. -->
<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first.
Do not send screen shots! Copy any console output directly into the issue.
-->
**Describe the bug**
<!-- A clear and concise description of what the bug is. -->
<!-- A clear and concise description of what the bug is.-->
**To Reproduce**
<!-- Please describe what you did to get to the issue. -->
@@ -25,12 +28,15 @@ assignees: ''
- RAM:
- number of CPUs:
- type and size of disks:
- bare metal/AWS/other cloud service:
**Postgresql Configuration:**
<!-- List any configuration items you changed in your postgresql configuration. -->
**Nominatim Configuration:**
<!-- List the contents of your customized `.env` file. -->
**Additional context**
<!-- Add any other context about the problem here. -->

View File

@@ -1,38 +1,48 @@
name: 'Build Nominatim'
inputs:
ubuntu:
flavour:
description: 'Version of Ubuntu to install on'
required: false
default: '20'
default: 'ubuntu-20'
cmake-args:
description: 'Additional options to hand to cmake'
required: false
default: ''
lua:
description: 'Version of Lua to use'
required: false
default: '5.3'
runs:
using: "composite"
steps:
- name: Install prerequisites
- name: Clean out the disk
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev
if [ "x$UBUNTUVER" == "x18" ]; then
pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium PyYAML==5.1 datrie
sudo rm -rf /opt/hostedtoolcache/go /opt/hostedtoolcache/CodeQL /usr/lib/jvm /usr/local/share/chromium /usr/local/lib/android
df -h
shell: bash
- name: Install${{ matrix.flavour }} prerequisites
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg aiosqlite
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
pip3 install sqlalchemy psycopg aiosqlite
fi
shell: bash
env:
UBUNTUVER: ${{ inputs.ubuntu }}
- name: Download dependencies
run: |
if [ ! -f country_grid.sql.gz ]; then
wget --no-verbose https://www.nominatim.org/data/country_grid.sql.gz
fi
cp country_grid.sql.gz Nominatim/data/country_osm_grid.sql.gz
shell: bash
FLAVOUR: ${{ inputs.flavour }}
CMAKE_ARGS: ${{ inputs.cmake-args }}
LUA_VERSION: ${{ inputs.lua }}
- name: Configure
run: mkdir build && cd build && cmake ../Nominatim
run: mkdir build && cd build && cmake $CMAKE_ARGS ../Nominatim
shell: bash
env:
CMAKE_ARGS: ${{ inputs.cmake-args }}
- name: Build
run: |

View File

@@ -15,14 +15,16 @@ runs:
- name: Remove existing PostgreSQL
run: |
sudo apt-get purge -yq postgresql*
sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
sudo apt install curl ca-certificates gnupg
curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/apt.postgresql.org.gpg >/dev/null
sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
sudo apt-get update -qq
shell: bash
- name: Install PostgreSQL
run: |
sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER} postgresql-server-dev-${PGVER}
sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER}
shell: bash
env:
PGVER: ${{ inputs.postgresql-version }}

View File

@@ -7,11 +7,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v4
with:
submodules: true
- uses: actions/cache@v2
- uses: actions/cache@v4
with:
path: |
data/country_osm_grid.sql.gz
@@ -27,7 +27,7 @@ jobs:
mv nominatim-src.tar.bz2 Nominatim
- name: 'Upload Artifact'
uses: actions/upload-artifact@v2
uses: actions/upload-artifact@v4
with:
name: full-source
path: nominatim-src.tar.bz2
@@ -37,23 +37,109 @@ jobs:
needs: create-archive
strategy:
matrix:
ubuntu: [18, 20]
flavour: [oldstuff, "ubuntu-20", "ubuntu-22"]
include:
- ubuntu: 18
postgresql: 9.5
postgis: 2.5
pytest: pytest
php: 7.2
- ubuntu: 20
- flavour: oldstuff
ubuntu: 20
postgresql: '9.6'
postgis: '2.5'
lua: '5.1'
- flavour: ubuntu-20
ubuntu: 20
postgresql: 13
postgis: 3
pytest: py.test-3
php: 7.4
lua: '5.3'
- flavour: ubuntu-22
ubuntu: 22
postgresql: 15
postgis: 3
lua: '5.3'
runs-on: ubuntu-${{ matrix.ubuntu }}.04
steps:
- uses: actions/download-artifact@v2
- uses: actions/download-artifact@v4
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: actions/setup-python@v5
with:
python-version: 3.7
if: matrix.flavour == 'oldstuff'
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: ${{ matrix.postgresql }}
postgis-version: ${{ matrix.postgis }}
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: ${{ matrix.flavour }}
lua: ${{ matrix.lua }}
- name: Install test prerequsites (behave from apt)
run: sudo apt-get install -y -qq python3-behave
if: matrix.flavour == 'ubuntu-20'
- name: Install test prerequsites (behave from pip)
run: pip3 install behave==1.2.6
if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')
- name: Install test prerequsites (from apt for Ununtu 2x)
run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
if: matrix.flavour != 'oldstuff'
- name: Install newer pytest-asyncio
run: pip3 install -U pytest-asyncio
if: matrix.flavour == 'ubuntu-20'
- name: Install test prerequsites (from pip for Ubuntu 18)
run: pip3 install pytest pytest-asyncio uvicorn
if: matrix.flavour == 'oldstuff'
- name: Install Python webservers
run: pip3 install falcon starlette asgi_lifespan
- name: Install latest pylint
run: pip3 install -U pylint
if: matrix.flavour == 'ubuntu-22'
- name: Python linting
run: python3 -m pylint nominatim
working-directory: Nominatim
if: matrix.flavour == 'ubuntu-22'
- name: Python unit tests
run: python3 -m pytest test/python
working-directory: Nominatim
- name: BDD tests
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
working-directory: Nominatim/test/bdd
- name: Install mypy and typechecking info
run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions
if: matrix.flavour != 'oldstuff'
- name: Python static typechecking
run: python3 -m mypy --strict nominatim
working-directory: Nominatim
if: matrix.flavour != 'oldstuff'
legacy-test:
needs: create-archive
runs-on: ubuntu-20.04
strategy:
matrix:
postgresql: ["13", "16"]
steps:
- uses: actions/download-artifact@v4
with:
name: full-source
@@ -63,132 +149,75 @@ jobs:
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
coverage: xdebug
tools: phpunit, phpcs, composer
- uses: actions/setup-python@v2
with:
python-version: 3.6
if: matrix.ubuntu == 18
php-version: '7.4'
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: ${{ matrix.postgresql }}
postgis-version: ${{ matrix.postgis }}
postgis-version: 3
- name: Install Postgresql server dev
run: sudo apt-get install postgresql-server-dev-$PGVER
env:
PGVER: ${{ matrix.postgresql }}
- uses: ./Nominatim/.github/actions/build-nominatim
with:
ubuntu: ${{ matrix.ubuntu }}
cmake-args: -DBUILD_MODULE=on
- name: Install test prerequsites
run: sudo apt-get install -y -qq pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage
if: matrix.ubuntu == 20
run: sudo apt-get install -y -qq python3-behave
- name: Install test prerequsites
run: pip3 install pylint==2.6.0 pytest pytest-cov behave==1.2.6
if: matrix.ubuntu == 18
- name: BDD tests (legacy tokenizer)
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php -DTOKENIZER=legacy --format=progress3
working-directory: Nominatim/test/bdd
php-test:
needs: create-archive
runs-on: ubuntu-22.04
steps:
- uses: actions/download-artifact@v4
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: 15
postgis-version: 3
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.1
tools: phpunit:9, phpcs, composer
ini-values: opcache.jit=disable
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
- name: Python linting
run: pylint nominatim
working-directory: Nominatim
- name: PHP unit tests
run: phpunit --coverage-clover ../../coverage-php.xml ./
run: phpunit ./
working-directory: Nominatim/test/php
if: matrix.ubuntu == 20
- name: Python unit tests
run: $PYTEST --cov=nominatim --cov-report=xml test/python
working-directory: Nominatim
env:
PYTEST: ${{ matrix.pytest }}
- name: BDD tests
run: |
mkdir cov
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 -DPHPCOV=./cov
composer require phpunit/phpcov:7.0.2
vendor/bin/phpcov merge --clover ../../coverage-bdd.xml ./cov
working-directory: Nominatim/test/bdd
if: matrix.ubuntu == 20
- name: BDD tests
run: |
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
working-directory: Nominatim/test/bdd
if: matrix.ubuntu == 18
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v1
with:
files: ./Nominatim/coverage*.xml
directory: ./
name: codecov-umbrella
fail_ci_if_error: false
path_to_write_report: ./coverage/codecov_report.txt
verbose: true
if: matrix.ubuntu == 20
icu-test:
needs: create-archive
strategy:
matrix:
ubuntu: [20]
include:
- ubuntu: 20
postgresql: 13
postgis: 3
pytest: py.test-3
php: 7.4
runs-on: ubuntu-${{ matrix.ubuntu }}.04
steps:
- uses: actions/download-artifact@v2
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
coverage: xdebug
tools: phpunit, phpcs, composer
- uses: actions/setup-python@v2
with:
python-version: 3.6
if: matrix.ubuntu == 18
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: ${{ matrix.postgresql }}
postgis-version: ${{ matrix.postgis }}
- uses: ./Nominatim/.github/actions/build-nominatim
with:
ubuntu: ${{ matrix.ubuntu }}
flavour: 'ubuntu-22'
- name: Install test prerequsites
run: sudo apt-get install -y -qq python3-behave
if: matrix.ubuntu == 20
- name: Install test prerequsites
run: pip3 install behave==1.2.6
if: matrix.ubuntu == 18
- name: BDD tests (icu tokenizer)
- name: BDD tests (php)
run: |
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=icu --format=progress3
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php --format=progress3
working-directory: Nominatim/test/bdd
@@ -198,21 +227,16 @@ jobs:
strategy:
matrix:
name: [Ubuntu-18, Ubuntu-20, Centos-8]
name: [Ubuntu-20, Ubuntu-22]
include:
- name: Ubuntu-18
flavour: ubuntu
image: "ubuntu:18.04"
ubuntu: 18
install_mode: install-nginx
- name: Ubuntu-20
flavour: ubuntu
image: "ubuntu:20.04"
ubuntu: 20
install_mode: install-apache
- name: Centos-8
flavour: centos
image: "centos:8"
- name: Ubuntu-22
image: "ubuntu:22.04"
ubuntu: 22
install_mode: install-apache
container:
image: ${{ matrix.image }}
@@ -232,14 +256,6 @@ jobs:
apt-get install -y git sudo wget
ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
shell: bash
if: matrix.flavour == 'ubuntu'
- name: Prepare container (CentOS)
run: |
dnf update -y
dnf install -y sudo glibc-langpack-en
shell: bash
if: matrix.flavour == 'centos'
- name: Setup import user
run: |
@@ -251,7 +267,7 @@ jobs:
OS: ${{ matrix.name }}
INSTALL_MODE: ${{ matrix.install_mode }}
- uses: actions/download-artifact@v2
- uses: actions/download-artifact@v4
with:
name: full-source
path: /home/nominatim
@@ -269,17 +285,24 @@ jobs:
- name: Prepare import environment
run: |
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
mv Nominatim/settings/flex-base.lua flex-base.lua
mv Nominatim/settings/import-extratags.lua import-extratags.lua
mv Nominatim/settings/taginfo.lua taginfo.lua
rm -rf Nominatim
mkdir data-env-reverse
working-directory: /home/nominatim
- name: Prepare import environment (CentOS)
run: |
sudo ln -s /usr/local/bin/nominatim /usr/bin/nominatim
echo NOMINATIM_DATABASE_WEBUSER="apache" > nominatim-project/.env
cp nominatim-project/.env data-env-reverse/.env
- name: Print version
run: nominatim --version
working-directory: /home/nominatim/nominatim-project
- name: Print taginfo
run: lua taginfo.lua
working-directory: /home/nominatim
if: matrix.flavour == 'centos'
- name: Collect host OS information
run: nominatim admin --collect-os-info
working-directory: /home/nominatim/nominatim-project
- name: Import
run: nominatim import --osm-file ../test.pbf
@@ -300,7 +323,6 @@ jobs:
- name: Prepare update (Ubuntu)
run: apt-get install -y python3-pip
shell: bash
if: matrix.flavour == 'ubuntu'
- name: Run update
run: |
@@ -309,12 +331,71 @@ jobs:
NOMINATIM_REPLICATION_MAX_DIFF=1 nominatim replication --once
working-directory: /home/nominatim/nominatim-project
- name: Clean up database
run: nominatim refresh --postcodes --word-tokens
working-directory: /home/nominatim/nominatim-project
- name: Run reverse-only import
run : |
echo 'NOMINATIM_DATABASE_DSN="pgsql:dbname=reverse"' >> .env
nominatim import --osm-file ../test.pbf --reverse-only --no-updates
working-directory: /home/nominatim/data-env-reverse
- name: Check reverse import
- name: Check reverse-only import
run: nominatim admin --check-database
working-directory: /home/nominatim/data-env-reverse
- name: Clean up database (reverse-only import)
run: nominatim refresh --postcodes --word-tokens
working-directory: /home/nominatim/nominatim-project
install-no-superuser:
runs-on: ubuntu-latest
needs: create-archive
steps:
- uses: actions/download-artifact@v4
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: 16
postgis-version: 3
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: ubuntu-22
lua: 5.3
- name: Prepare import environment
run: |
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
rm -rf Nominatim
- name: Prepare Database
run: |
nominatim import --prepare-database
- name: Create import user
run: |
sudo -u postgres createuser osm-import
psql -d nominatim -c "ALTER USER \"osm-import\" WITH PASSWORD 'osm-import'"
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "osm-import"'
- name: Run import
run: |
NOMINATIM_DATABASE_DSN="pgsql:host=127.0.0.1;dbname=nominatim;user=osm-import;password=osm-import" nominatim import --continue import-from-file --osm-file test.pbf
- name: Check full import
run: nominatim admin --check-database
codespell:
runs-on: ubuntu-latest
steps:
- uses: codespell-project/actions-codespell@v2
with:
only_warn: 1

23
.mypy.ini Normal file
View File

@@ -0,0 +1,23 @@
[mypy]
plugins = sqlalchemy.ext.mypy.plugin
[mypy-sanic_cors.*]
ignore_missing_imports = True
[mypy-icu.*]
ignore_missing_imports = True
[mypy-asyncpg.*]
ignore_missing_imports = True
[mypy-datrie.*]
ignore_missing_imports = True
[mypy-dotenv.*]
ignore_missing_imports = True
[mypy-falcon.*]
ignore_missing_imports = True
[mypy-geoalchemy2.*]
ignore_missing_imports = True

View File

@@ -1,6 +1,6 @@
[MASTER]
extension-pkg-whitelist=osmium
extension-pkg-whitelist=osmium,falcon
ignored-modules=icu,datrie
[MESSAGES CONTROL]
@@ -10,6 +10,9 @@ ignored-modules=icu,datrie
# closing added here because it sometimes triggers a false positive with
# 'with' statements.
ignored-classes=NominatimArgs,closing
disable=too-few-public-methods,duplicate-code
# 'too-many-ancestors' is triggered already by deriving from UserDict
# 'not-context-manager' disabled because it causes false positives once
# typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager,use-dict-literal,chained-comparison,attribute-defined-outside-init
good-names=i,x,y,fd,db
good-names=i,j,x,y,m,t,fd,db,cc,x1,x2,y1,y2,pt,k,v,nr

16
AUTHORS
View File

@@ -1,15 +1,15 @@
Nominatim was written by:
Brian Quinion
Sarah Hoffmann
Marc Tobias Metten
* Brian Quinion
* Sarah Hoffmann
* Marc Tobias Metten
markigail
gemo1011
IrlJidel
Frederik Ramm
* markigail
* AntoJvlt
* gemo1011
* darkshredder
and many more.
For a full list of contributors see
For a full list of contributors see the Git logs or visit
https://github.com/openstreetmap/Nominatim/graphs/contributors

View File

@@ -19,13 +19,24 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 4)
set(NOMINATIM_VERSION_MINOR 0)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION_MINOR 4)
set(NOMINATIM_VERSION_PATCH 1)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")
# Setting GIT_HASH
find_package(Git)
if (GIT_FOUND)
execute_process(
COMMAND "${GIT_EXECUTABLE}" log -1 --format=%h
WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
OUTPUT_VARIABLE GIT_HASH
OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_QUIET
)
endif()
#-----------------------------------------------------------------------------
# Configuration
@@ -33,7 +44,7 @@ add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")
set(BUILD_IMPORTER on CACHE BOOL "Build everything for importing/updating the database")
set(BUILD_API on CACHE BOOL "Build everything for the API server")
set(BUILD_MODULE on CACHE BOOL "Build PostgreSQL module")
set(BUILD_MODULE off CACHE BOOL "Build PostgreSQL module for legacy tokenizer")
set(BUILD_TESTS on CACHE BOOL "Build test suite")
set(BUILD_DOCS on CACHE BOOL "Build documentation")
set(BUILD_MANPAGE on CACHE BOOL "Build Manual Page")
@@ -52,7 +63,6 @@ if (BUILD_IMPORTER AND BUILD_OSM2PGSQL)
endif()
set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
set(BUILD_TESTS off)
set(WITH_LUA off CACHE BOOL "")
add_subdirectory(osm2pgsql)
set(BUILD_TESTS ${BUILD_TESTS_SAVED})
endif()
@@ -63,7 +73,7 @@ endif()
#-----------------------------------------------------------------------------
if (BUILD_IMPORTER)
find_package(PythonInterp 3.6 REQUIRED)
find_package(PythonInterp 3.7 REQUIRED)
endif()
#-----------------------------------------------------------------------------
@@ -72,26 +82,17 @@ endif()
# Setting PHP binary variable as to command line (prevailing) or auto detect
if (BUILD_API OR BUILD_IMPORTER)
if (BUILD_API)
if (NOT PHP_BIN)
find_program (PHP_BIN php)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHP_BIN})
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
message(WARNING "PHP binary not found. Only Python frontend can be used.")
set(PHP_BIN "")
else()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
if (NOT PHPCGI_BIN)
find_program (PHPCGI_BIN php-cgi)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHPCGI_BIN})
message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
set (PHPCGI_BIN "")
else()
message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
endif()
endif()
#-----------------------------------------------------------------------------
@@ -223,11 +224,21 @@ if (BUILD_IMPORTER)
install(DIRECTORY nominatim
DESTINATION ${NOMINATIM_LIBDIR}/lib-python
FILES_MATCHING PATTERN "*.py"
PATTERN "paths.py" EXCLUDE
PATTERN __pycache__ EXCLUDE)
if (EXISTS ${PHP_BIN})
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
else()
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py-no-php.tmpl paths-py.installed)
endif()
install(FILES ${PROJECT_BINARY_DIR}/paths-py.installed
DESTINATION ${NOMINATIM_LIBDIR}/lib-python/nominatim
RENAME paths.py)
install(DIRECTORY lib-sql DESTINATION ${NOMINATIM_LIBDIR})
install(FILES data/country_name.sql
${COUNTRY_GRID_FILE}
install(FILES ${COUNTRY_GRID_FILE}
data/words.sql
DESTINATION ${NOMINATIM_DATADIR})
endif()
@@ -248,24 +259,27 @@ if (BUILD_MODULE)
DESTINATION ${NOMINATIM_LIBDIR}/module)
endif()
if (BUILD_API)
if (BUILD_API AND EXISTS ${PHP_BIN})
install(DIRECTORY lib-php DESTINATION ${NOMINATIM_LIBDIR})
endif()
install(FILES settings/env.defaults
settings/address-levels.json
settings/phrase-settings.json
settings/import-admin.style
settings/import-street.style
settings/import-address.style
settings/import-full.style
settings/import-extratags.style
settings/import-admin.lua
settings/import-street.lua
settings/import-address.lua
settings/import-full.lua
settings/import-extratags.lua
settings/flex-base.lua
settings/icu_tokenizer.yaml
settings/country_settings.yaml
DESTINATION ${NOMINATIM_CONFIGDIR})
install(DIRECTORY settings/icu-rules
DESTINATION ${NOMINATIM_CONFIGDIR})
install(DIRECTORY settings/country-names
DESTINATION ${NOMINATIM_CONFIGDIR})
if (INSTALL_MUNIN_PLUGINS)
install(FILES munin/nominatim_importlag

View File

@@ -36,7 +36,7 @@ Nominatim historically hasn't followed a particular coding style but we
are in process of consolidating the style. The following rules apply:
* Python code uses the official Python style
* indention
* indentation
* SQL use 2 spaces
* all other file types use 4 spaces
* [BSD style](https://en.wikipedia.org/wiki/Indent_style#Allman_style) for braces
@@ -64,3 +64,39 @@ Before submitting a pull request make sure that the tests pass:
cd build
make test
```
## Releases
Nominatim follows semantic versioning. Major releases are done for large changes
that require (or at least strongly recommend) a reimport of the databases.
Minor releases can usually be applied to existing databases. Patch releases
contain bug fixes only and are released from a separate branch where the
relevant changes are cherry-picked from the master branch.
Checklist for releases:
* [ ] increase version in `nominatim/version.py` and CMakeLists.txt
* [ ] update `ChangeLog` (copy information from patch releases from release branch)
* [ ] complete `docs/admin/Migration.md`
* [ ] update EOL dates in `SECURITY.md`
* [ ] commit and make sure CI tests pass
* [ ] test migration
* download, build and import previous version
* migrate using master version
* run updates using master version
* [ ] prepare tarball:
* `git clone --recursive https://github.com/osm-search/Nominatim` (switch to right branch!)
* `rm -r .git* osm2pgsql/.git*`
* copy country data into `data/`
* add version to base directory and package
* [ ] upload tarball to https://nominatim.org
* [ ] prepare documentation
* check out new docs branch
* change git checkout instructions to tarball download instructions or adapt version on existing ones
* build documentation and copy to https://github.com/osm-search/nominatim-org-site
* add new version to history
* [ ] check release tarball
* download tarball as per new documentation instructions
* compile and import Nominatim
* run `nominatim --version` to confirm correct version
* [ ] tag new release and add a release on github.com

199
ChangeLog
View File

@@ -1,3 +1,188 @@
4.4.1
* fix geocodejson output: admin level output should only print boundaries
* updating: restrict invalidation of child objects on large street features
* restrict valid interpolation house numbers to 0-999999
* fix import error when SQLAlchemy 1.4 and psycopg3 are installed
* various typo fixes in the documentation
4.4.0
* add export to SQLite database and SQLite support for the frontend
* switch to Python frontend as the default frontend
* update to osm2pgsql 1.11.0
* add support for new osm2pgsql middle table format
* simplify geometry for large polygon objects not used in addresses
* various performance tweaks for search in Python frontend
* fix regression in search with categories where it was confused with near
search
* partially roll back use of SQLAlchemy lambda statements due to bugs
in SQLAchemy
* fix handling of timezones for timestamps from the database
* fix handling of full address searches in connection with a viewbox
* fix postcode computation of highway areas
* fix handling of timeout errors for Python <= 3.10
* fix address computation for postcode areas
* fix variable shadowing in osm2pgsql flex script, causing bugs with LuaJIT
* make sure extratags are always null when empty
* reduce importance of places without wikipedia reference
* improve performance of word count computations
* drop support for wikipedia tags with full URLs
* replace get_addressdata() SQL implementation with a Python function
* improve display name for non-address features
* fix postcode validation for postcodes with country code
(thanks @pawel-wroniszewski)
* add possibility to run imports without superuser database rights
(thanks @robbe-haesendonck)
* new CLI command for cleaning deleted relations (thanks @lujoh)
* add check for database version in the CLI check command
* updates to import styles ignoring more unused objects
* various typo fixes (thanks @kumarUjjawal)
4.3.2
* fix potential SQL injection issue for 'nominatim admin --collect-os-info'
* PHP frontend: fix on-the-fly lookup of postcode areas near boundaries
* Python frontend: improve handling of viewbox
* Python frontend: correct deployment instructions
4.3.1
* reintroduce result rematching
* improve search of multi-part names
* fix accidentally switched meaning of --reverse-only and --search-only in
warm command
4.3.0
* fix failing importance recalculation command
* fix merging of linked names into unnamed boundaries
* fix a number of corner cases with interpolation splitting resulting in
invalid geometries
* fix failure in website generation when password contains curly brackets
* fix broken use of ST_Project in PostGIS 3.4
* new NOMINATIM_SEARCH_WITHIN_COUNTRIES setting to restrict reverse lookups
to known countries (thanks @alfmarcua)
* allow negative OSM IDs (thanks @alfmarcua)
* disallow import of Tiger data in a frozen DB
* avoid UPDATE to change settings to be compatible with r/o DBs (thanks @t-tomek)
* update bundled osm2pgsql to 1.9.2
* reorganise osm2pgsql flex style and make it the default
* exclude names ending in :wikipedia from indexing
* no longer accept comma as a list separator in name tags
* process forward dependencies on update to catch updates in geometries
of ways and relations
* fix handling of isolated silent letters during transliteration
* no longer assign postcodes to large linear features like rivers
* introduce nominatim.paths module for finding data and libraries
* documentation layout changed to material theme
* new documentation section for library
* various smaller fixes to existing documentation
(thanks @woodpeck, @bloom256, @biswajit-k)
* updates to vagrant install scripts, drop support for Ubuntu 18
(thanks @n-timofeev)
* removed obsolete configuration variables from env.defaults
* add script for generating a taginfo description (thanks @biswajit-k)
* modernize Python code around BDD test and add testing of Python frontend
* lots of new BDD tests for API output
4.2.3
* fix deletion handling for 'nominatim add-data'
* adapt place_force_delete() to new deletion handling
* flex style: avoid dropping of postcode areas
* fix update errors on address interpolation handling
4.2.2
* extend flex-style library to fully support all default styles
* fix handling of Hebrew aleph
* do not assign postcodes to rivers
* fix string matching in PHP code
* update osm2pgsql (various updates to flex)
* fix slow query when deleting places on update
* fix CLI details query
* fix recalculation of importance values
* fix polygon simplification in reverse results
* add class/type information to reverse geocodejson result
* minor improvements to default tokenizer configuration
* various smaller fixes to documentation
4.2.1
* fix XSS vulnerability in debug view
4.2.0
* add experimental support for osm2pgsql flex style
* introduce secondary importance value to be retrieved from a raster data file
(currently still unused, to replace address importance, thanks to @tareqpi)
* add new report tool `nominatim admin --collect-os-info`
(thanks @micahcochran, @tareqpi)
* reorganise index to improve lookup performance and size
* run index creation after import in parallel
* run ANALYZE more selectively to speed up continuation of indexing
* fix crash on update when addr:interpolation receives an illegal value
* fix minimum number of retrieved results to be at least 10
* fix search for combinations of special term + name (e.g Hotel Bellevue)
* do not return interpolations without a parent street on reverse search
* improve invalidation of linked places on updates
* fix address parsing for interpolation lines
* make sure socket timeouts are respected during replication
(working around a bug in some versions of pyosmium)
* update bundled osm2pgsql to 1.7.1
* add support for PostgreSQL 15
* typing fixes to work with latest type annotations from typeshed
* smaller improvements to documentation (thanks to @mausch)
4.1.1
* fix XSS vulnerability in debug view
4.1.0
* switch to ICU tokenizer as default
* add housenumber normalization and support optional spaces during search
* add postcode format checking and support optional spaces during search
* add function for cleaning housenumbers in word table
* add updates/deletion of country names imported from OSM
* linked places no longer overwrite names from a place permanently
* move default country name configuration into yaml file (thanks @tareqpi)
* more compact layout for interpolation and TIGER tables
* introduce mutations to ICU tokenizer (used for German umlauts)
* support reinitializing a full project directory with refresh --website
* fix various issues with linked places on updates
* add support for external sanitizers and token analyzers
* add CLI commands for forced indexing
* add CLI command for version report
* add offline import mode
* change geocodejson to return a feature class in the 'type' field
* add ISO3166-2 to address output (thanks @I70l0teN4ik)
* improve parsing and matching of addr: tags
* support relations as street members of associatedStreet
* better ranking for address results from TIGER data
* adapt rank classification to changed tag usage in OSM
* update bundled osm2pgsql to 1.6.0
* add typing information to Python code
* improve unit test coverage
* reorganise and speed up code for BDD tests, drop support for scenes
* move PHP unit tests to PHP 9.5
* extensive typo fixes in documentation (thanks @woodpeck,@StephanGeorg,
@amandasaurus, @nslxndr, @stefkiourk, @Luflosi, @kianmeng)
* drop official support for installation on CentOS
* add installation instructions for Ubuntu 22.04
* add support for PHP8
* add setup instructions for updates and systemd
* drop support for PostgreSQL 9.5
4.0.2
* fix XSS vulnerability in debug view
4.0.1
* fix initialisation error in replication script
* ICU tokenizer: avoid any special characters in word tokens
* better error message when API php script does not exist
* fix quoting of house numbers in SQL queries
* small fixes and improvements in search query parsing
* add documentation for moving the database to a different machine
4.0.0
* refactor name token computation and introduce ICU tokenizer
@@ -27,6 +212,10 @@
* add testing of installation scripts via CI
* drop support for Python < 3.6 and Postgresql < 9.5
3.7.3
* fix XSS vulnerability in debug view
3.7.2
* fix database check for reverse-only imports
@@ -102,7 +291,7 @@
* increase splitting for large geometries to improve indexing speed
* remove deprecated get_magic_quotes_gpc() function
* make sure that all postcodes have an entry in word and are thus searchable
* remove use of ST_Covers in conjunction woth ST_Intersects,
* remove use of ST_Covers in conjunction with ST_Intersects,
causes bad query planning and slow updates in Postgis3
* update osm2pgsql
@@ -159,7 +348,7 @@
* exclude postcode ranges separated by colon from centre point calculation
* update osm2pgsql, better handling of imports without flatnode file
* switch to more efficient algorithm for word set computation
* use only boundries for country and state parts of addresses
* use only boundaries for country and state parts of addresses
* improve updates of addresses with housenumbers and interpolations
* remove country from place_addressline table and use country_code instead
* optimise indexes on search_name partition tables
@@ -198,7 +387,7 @@
* complete rewrite of reverse search algorithm
* add new geojson and geocodejson output formats
* add simple export script to exprot addresses to CSV
* add simple export script to export addresses to CSV
* remove is_in terms from address computation
* remove unused search_name_country tables
* various smaller fixes to query parsing
@@ -263,7 +452,7 @@
* move installation documentation into this repo
* add self-documenting vagrant scripts
* remove --create-website, recommend to use website directory in build
* add accessor functions for URL parameters and improve erro checking
* add accessor functions for URL parameters and improve error checking
* remove IP blocking and rate-limiting code
* enable CI via travis
* reformatting for more consistent coding style
@@ -274,7 +463,7 @@
* update to refactored osm2pgsql which use libosmium based types
* switch from osmosis to pyosmium for updates
* be more strict when matching against special search terms
* handle postcode entries with mutliple values correctly
* handle postcode entries with multiple values correctly
2.5

View File

@@ -9,10 +9,10 @@ versions.
| Version | End of support for security updates |
| ------- | ----------------------------------- |
| 3.7.x | 2023-04-05 |
| 3.6.x | 2022-12-12 |
| 3.5.x | 2022-06-05 |
| 3.4.x | 2021-10-24 |
| 4.4.x | 2026-03-07 |
| 4.3.x | 2025-09-07 |
| 4.2.x | 2024-11-24 |
| 4.1.x | 2024-08-05 |
## Reporting a Vulnerability
@@ -36,4 +36,6 @@ incident. Announcements will also be published at the
## List of Previous Incidents
* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html)
* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)

View File

@@ -1,6 +1,6 @@
# Install Nominatim in a virtual machine for development and testing
This document describes how you can install Nominatim inside a Ubuntu 16
This document describes how you can install Nominatim inside a Ubuntu 22
virtual machine on your desktop/laptop (host machine). The goal is to give
you a development environment to easily edit code and run the test suite
without affecting the rest of your system.
@@ -42,9 +42,9 @@ is.
```
# inside the virtual machine:
cd build
wget --no-verbose --output-document=/tmp/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf
./utils/setup.php --osm-file /tmp/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | tee monaco.$$.log
cd nominatim-project
wget --no-verbose --output-document=monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf
nominatim import --osm-file monaco.osm.pbf 2>&1 | tee monaco.$$.log
```
To repeat an import you'd need to delete the database first
@@ -56,7 +56,7 @@ is.
## Development
Vagrant maps the virtual machine's port 8089 to your host machine. Thus you can
see Nominatim in action on [locahost:8089](http://localhost:8089/nominatim/).
see Nominatim in action on [localhost:8089](http://localhost:8089/nominatim/).
You edit code on your host machine in any editor you like. There is no need to
restart any software: just refresh your browser window.
@@ -69,8 +69,7 @@ installation.
PHP errors are written to `/var/log/apache2/error.log`.
With `echo` and `var_dump()` you write into the output (HTML/XML/JSON) when
you either add `&debug=1` to the URL (preferred) or set
`@define('CONST_Debug', true);` in `settings/local.php`.
you either add `&debug=1` to the URL.
In the Python BDD test you can use `logger.info()` for temporary debug
statements.
@@ -130,6 +129,10 @@ and then
Yes, Vagrant and Virtualbox can be installed on MS Windows just fine. You need a 64bit
version of Windows.
##### Will it run on Apple Silicon?
You might need to replace Virtualbox with [Parallels](https://www.parallels.com/products/desktop/).
There is no free/open source version of Parallels.
##### Why Monaco, can I use another country?
@@ -141,11 +144,12 @@ No. Long running Nominatim installations will differ once new import features (o
bug fixes) get added since those usually only get applied to new/changed data.
Also this document skips the optional Wikipedia data import which affects ranking
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation) for details.
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation)
for details.
##### Why Ubuntu? Can I test CentOS/Fedora/CoreOS/FreeBSD?
There is a Vagrant script for CentOS available, but the Nominatim directory
There used to be a Vagrant script for CentOS available, but the Nominatim directory
isn't symlinked/mounted to the host which makes development trickier. We used
it mainly for debugging installation with SELinux.
@@ -154,14 +158,17 @@ are slightly different, e.g. the name of the package manager, Apache2 package
name, location of files. We chose Ubuntu because that is closest to the
nominatim.openstreetmap.org production environment.
You can configure/download other Vagrant boxes from [https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
You can configure/download other Vagrant boxes from
[https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
##### How can I connect to an existing database?
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com` and port `5432`. The Postgres username is `postgres`. You can edit `settings/local.php` and point Nominatim to it.
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com`
and port `5432`. The Postgres username is `postgres`. You can edit the `.env` in your
project directory and point Nominatim to it.
NOMINATIM_DATABASE_DSN="pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
No data import or restarting necessary.
If the Postgres installation is behind a firewall, you can try
@@ -169,11 +176,12 @@ If the Postgres installation is behind a firewall, you can try
ssh -L 9999:localhost:5432 your-username@your-server.com
inside the virtual machine. It will map the port to `localhost:9999` and then
you edit `settings/local.php` with
you edit `.env` file with
@define('CONST_Database_DSN', 'pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it');
NOMINATIM_DATABASE_DSN="pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it"
To access postgres directly remember to specify the hostname, e.g. `psql --host localhost --port 9999 nominatim_it`
To access postgres directly remember to specify the hostname,
e.g. `psql --host localhost --port 9999 nominatim_it`
##### My computer is slow and the import takes too long. Can I start the virtual machine "in the cloud"?

90
Vagrantfile vendored
View File

@@ -17,6 +17,14 @@ Vagrant.configure("2") do |config|
checkout = "no"
end
config.vm.provider "hyperv" do |hv, override|
hv.memory = 2048
hv.linked_clone = true
if ENV['CHECKOUT'] != 'y' then
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: "smb", smb_host: ENV['SMB_HOST'] || ENV['COMPUTERNAME']
end
end
config.vm.provider "virtualbox" do |vb, override|
vb.gui = false
vb.memory = 2048
@@ -30,11 +38,38 @@ Vagrant.configure("2") do |config|
lv.memory = 2048
lv.nested = true
if ENV['CHECKOUT'] != 'y' then
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs'
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs', nfs_udp: false
end
end
config.vm.define "ubuntu", primary: true do |sub|
config.vm.define "ubuntu22", primary: true do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "ubuntu22-apache" do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout, "install-apache"]
end
end
config.vm.define "ubuntu22-nginx" do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "ubuntu20" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -43,7 +78,7 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "ubuntu-apache" do |sub|
config.vm.define "ubuntu20-apache" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -52,7 +87,7 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "ubuntu-nginx" do |sub|
config.vm.define "ubuntu20-nginx" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -60,51 +95,4 @@ Vagrant.configure("2") do |config|
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "ubuntu18" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "ubuntu18-apache" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout, "install-apache"]
end
end
config.vm.define "ubuntu18-nginx" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "centos7" do |sub|
sub.vm.box = "centos/7"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Centos-7.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "centos" do |sub|
sub.vm.box = "generic/centos8"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Centos-8.sh"
s.privileged = false
s.args = [checkout]
end
end
end

View File

@@ -0,0 +1,15 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Path settings for extra data used by Nominatim (installed version).
"""
from pathlib import Path
PHPLIB_DIR = None
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()

15
cmake/paths-py.tmpl Normal file
View File

@@ -0,0 +1,15 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Path settings for extra data used by Nominatim (installed version).
"""
from pathlib import Path
PHPLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-php').resolve()
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()

View File

@@ -4,14 +4,10 @@ import os
sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
from nominatim import cli
from nominatim import version
version.GIT_COMMIT_HASH = '@GIT_HASH@'
exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
phplib_dir='@NOMINATIM_LIBDIR@/lib-php',
sqllib_dir='@NOMINATIM_LIBDIR@/lib-sql',
data_dir='@NOMINATIM_DATADIR@',
config_dir='@NOMINATIM_CONFIGDIR@',
phpcgi_path='@PHPCGI_BIN@'))
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql'))

View File

@@ -4,14 +4,10 @@ import os
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
from nominatim import cli
from nominatim import version
version.GIT_COMMIT_HASH = '@GIT_HASH@'
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
phplib_dir='@CMAKE_SOURCE_DIR@/lib-php',
sqllib_dir='@CMAKE_SOURCE_DIR@/lib-sql',
data_dir='@CMAKE_SOURCE_DIR@/data',
config_dir='@CMAKE_SOURCE_DIR@/settings',
phpcgi_path='@PHPCGI_BIN@'))
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql'))

View File

@@ -1,14 +0,0 @@
codecov:
require_ci_to_pass: yes
coverage:
status:
project: off
patch: off
comment:
require_changes: true
after_n_builds: 2
fixes:
- "Nominatim/::"

File diff suppressed because one or more lines are too long

View File

@@ -11,6 +11,7 @@ set (DOC_SOURCES
develop
api
customize
library
index.md
extra.css
styles.css
@@ -23,14 +24,12 @@ foreach (src ${DOC_SOURCES})
endforeach()
ADD_CUSTOM_TARGET(doc
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-8.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-8.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
)
ADD_CUSTOM_TARGET(serve-doc
COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
)

View File

@@ -5,6 +5,35 @@ your Nominatim database. It is assumed that you have already successfully
installed the Nominatim software itself, if not return to the
[installation page](Installation.md).
## Importing with a database user without superuser rights
Nominatim usually creates its own PostgreSQL database at the beginning of the
import process. This makes usage easier for the user but means that the
database user doing the import needs the appropriate rights.
If you prefer to run the import with a database user with limited rights,
you can do so by changing the import process as follows:
1. Run the command for database preparation with a database user with
superuser rights. For example, to use a db user 'dbadmin' for a
database 'nominatim', execute:
```
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=dbadmin" nominatim import --prepare-database
```
2. Grant the import user the right to create tables. For example, foe user 'import-user':
```
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "import-user"'
```
3. Now run the reminder of the import with the import user:
```
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=import-user" nominatim import --continue import-from-file --osm-file file.pbf
```
## Importing multiple regions (without updates)
To import multiple regions in your database you can simply give multiple
@@ -36,16 +65,15 @@ which has the following structure:
```bash
update
   ├── europe
   │   ├── andorra
   │   │   └── sequence.state
   │   └── monaco
   │   └── sequence.state
   └── tmp
└── europe
├── andorra-latest.osm.pbf
└── monaco-latest.osm.pbf
├── europe
├── andorra
│ │ └── sequence.state
└── monaco
└── sequence.state
└── tmp
└── europe
├── andorra-latest.osm.pbf
└── monaco-latest.osm.pbf
```
@@ -99,9 +127,9 @@ Change into the project directory and run the following command:
This will get diffs from the replication server, import diffs and index
the database. The default replication server in the
script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
script ([Geofabrik](https://download.geofabrik.de)) provides daily updates.
## Importing Nominatim to an external PostgreSQL database
## Using an external PostgreSQL database
You can install Nominatim using a database that runs on a different server when
you have physical access to the file system on the other server. Nominatim
@@ -109,6 +137,11 @@ uses a custom normalization library that needs to be made accessible to the
PostgreSQL server. This section explains how to set up the normalization
library.
!!! note
The external module is only needed when using the legacy tokenizer.
If you have chosen the ICU tokenizer, then you can ignore this section
and follow the standard import documentation.
### Option 1: Compiling the library on the database server
The most sure way to get a working library is to compile it on the database
@@ -167,3 +200,45 @@ NOMINATIM_DATABASE_MODULE_PATH="<directory on the database server where nominati
Now change the `NOMINATIM_DATABASE_DSN` to point to your remote server and continue
to follow the [standard instructions for importing](Import.md).
## Moving the database to another machine
For some configurations it may be useful to run the import on one machine, then
move the database to another machine and run the Nominatim service from there.
For example, you might want to use a large machine to be able to run the import
quickly but only want a smaller machine for production because there is not so
much load. Or you might want to do the import once and then replicate the
database to many machines.
The important thing to keep in mind when transferring the Nominatim installation
is that you need to transfer the database _and the project directory_. Both
parts are essential for your installation.
The Nominatim database can be transferred using the `pg_dump`/`pg_restore` tool.
Make sure to use the same version of PostgreSQL and PostGIS on source and
target machine.
!!! note
Before creating a dump of your Nominatim database, consider running
`nominatim freeze` first. Your database looses the ability to receive further
data updates but the resulting database is only about a third of the size
of a full database.
Next install Nominatim on the target machine by following the standard installation
instructions. Again, make sure to use the same version as the source machine.
Create a project directory on your destination machine and set up the `.env`
file to match the configuration on the source machine. Finally run
nominatim refresh --website
to make sure that the local installation of Nominatim will be used.
If you are using the legacy tokenizer you might also have to switch to the
PostgreSQL module that was compiled on your target machine. If you get errors
that PostgreSQL cannot find or access `nominatim.so` then rerun
nominatim refresh --functions
on the target machine to update the the location of the module.

View File

@@ -1,4 +1,4 @@
# Deploying Nominatim
# Deploying Nominatim using the PHP frontend
The Nominatim API is implemented as a PHP application. The `website/` directory
in the project directory contains the configured website. You can serve this
@@ -8,13 +8,13 @@ PHP scripts.
This section gives a quick overview on how to configure Apache and Nginx to
serve Nominatim. It is not meant as a full system administration guide on how
to run a web service. Please refer to the documentation of
[Apache](http://httpd.apache.org/docs/current/) and
[Apache](https://httpd.apache.org/docs/current/) and
[Nginx](https://nginx.org/en/docs/)
for background information on configuring the services.
!!! Note
Throughout this page, we assume that your Nominatim project directory is
located in `/srv/nominatim-project` and that you have installed Nominatim
Throughout this page, we assume your Nominatim project directory is
located in `/srv/nominatim-project` and you have installed Nominatim
using the default installation prefix `/usr/local`. If you have put it
somewhere else, you need to adjust the commands and configuration
accordingly.
@@ -82,7 +82,7 @@ The website should now be available on `http://localhost/nominatim`.
### Installing the required packages
Nginx has no built-in PHP interpreter. You need to use php-fpm as a deamon for
Nginx has no built-in PHP interpreter. You need to use php-fpm as a daemon for
serving PHP cgi.
On Ubuntu/Debian install nginx and php-fpm with:
@@ -99,7 +99,7 @@ Unix socket instead, change the pool configuration
``` ini
; Replace the tcp listener and add the unix socket
listen = /var/run/php-fpm.sock
listen = /var/run/php-fpm-nominatim.sock
; Ensure that the daemon runs as the correct user
listen.owner = www-data
@@ -121,7 +121,7 @@ location @php {
fastcgi_param SCRIPT_FILENAME "$document_root$uri.php";
fastcgi_param PATH_TRANSLATED "$document_root$uri.php";
fastcgi_param QUERY_STRING $args;
fastcgi_pass unix:/var/run/php-fpm.sock;
fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index index.php;
include fastcgi_params;
}
@@ -131,7 +131,7 @@ location ~ [^/]\.php(/|$) {
if (!-f $document_root$fastcgi_script_name) {
return 404;
}
fastcgi_pass unix:/var/run/php-fpm.sock;
fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
fastcgi_index search.php;
include fastcgi.conf;
}
@@ -140,3 +140,9 @@ location ~ [^/]\.php(/|$) {
Restart the nginx and php-fpm services and the website should now be available
at `http://localhost/`.
## Nominatim with other webservers
Users have created instructions for other webservers:
* [Caddy](https://github.com/osm-search/Nominatim/discussions/2580)

View File

@@ -0,0 +1,140 @@
# Deploying the Nominatim Python frontend
The Nominatim can be run as a Python-based
[ASGI web application](https://asgi.readthedocs.io/en/latest/). You have the
choice between [Falcon](https://falcon.readthedocs.io/en/stable/)
and [Starlette](https://www.starlette.io/) as the ASGI framework.
This section gives a quick overview on how to configure Nginx to serve
Nominatim. Please refer to the documentation of
[Nginx](https://nginx.org/en/docs/) for background information on how
to configure it.
!!! Note
Throughout this page, we assume your Nominatim project directory is
located in `/srv/nominatim-project` and you have installed Nominatim
using the default installation prefix `/usr/local`. If you have put it
somewhere else, you need to adjust the commands and configuration
accordingly.
We further assume that your web server runs as user `www-data`. Older
versions of CentOS may still use the user name `apache`. You also need
to adapt the instructions in this case.
### Installing the required packages
The recommended way to deploy a Python ASGI application is to run
the ASGI runner [uvicorn](https://uvicorn.org/)
together with [gunicorn](https://gunicorn.org/) HTTP server. We use
Falcon here as the web framework.
Create a virtual environment for the Python packages and install the necessary
dependencies:
``` sh
sudo apt install virtualenv
virtualenv /srv/nominatim-venv
/srv/nominatim-venv/bin/pip install SQLAlchemy PyICU psycopg[binary] \
psycopg2-binary python-dotenv PyYAML falcon uvicorn gunicorn
```
### Setting up Nominatim as a systemd job
Next you need to set up the service that runs the Nominatim frontend. This is
easiest done with a systemd job.
First you need to tell systemd to create a socket file to be used by
hunicorn. Create the following file `/etc/systemd/system/nominatim.socket`:
``` systemd
[Unit]
Description=Gunicorn socket for Nominatim
[Socket]
ListenStream=/run/nominatim.sock
SocketUser=www-data
[Install]
WantedBy=multi-user.target
```
Now you can add the systemd service for Nominatim itself.
Create the following file `/etc/systemd/system/nominatim.service`:
``` systemd
[Unit]
Description=Nominatim running as a gunicorn application
After=network.target
Requires=nominatim.socket
[Service]
Type=simple
Environment="PYTHONPATH=/usr/local/lib/nominatim/lib-python/"
User=www-data
Group=www-data
WorkingDirectory=/srv/nominatim-project
ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker nominatim.server.falcon.server:run_wsgi
ExecReload=/bin/kill -s HUP $MAINPID
StandardOutput=append:/var/log/gunicorn-nominatim.log
StandardError=inherit
PrivateTmp=true
TimeoutStopSec=5
KillMode=mixed
[Install]
WantedBy=multi-user.target
```
This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
its own Python process using
[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
connections to the database to serve requests in parallel.
Make the new services known to systemd and start it:
``` sh
sudo systemctl daemon-reload
sudo systemctl enable nominatim.socket
sudo systemctl start nominatim.socket
sudo systemctl enable nominatim.service
sudo systemctl start nominatim.service
```
This sets the service up, so that Nominatim is automatically started
on reboot.
### Configuring nginx
To make the service available to the world, you need to proxy it through
nginx. Add the following definition to the default configuration:
``` nginx
upstream nominatim_service {
server unix:/run/nominatim.sock fail_timeout=0;
}
server {
listen 80;
listen [::]:80;
root /var/www/html;
index /search;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_redirect off;
proxy_pass http://nominatim_service;
}
}
```
Reload nginx with
```
sudo systemctl reload nginx
```
and you should be able to see the status of your server under
`http://localhost/status`.

View File

@@ -37,40 +37,6 @@ nominatim import --continue indexing
Otherwise it's best to start the full setup from the beginning.
### PHP "open_basedir restriction in effect" warnings
PHP Warning: file_get_contents(): open_basedir restriction in effect.
You need to adjust the
[open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir)
setting in your PHP configuration (`php.ini` file). By default this setting may
look like this:
open_basedir = /srv/http/:/home/:/tmp/:/usr/share/pear/
Either add reported directories to the list or disable this setting temporarily
by adding ";" at the beginning of the line. Don't forget to enable this setting
again once you are done with the PHP command line operations.
### PHP timezeone warnings
The Apache log may contain lots of PHP warnings like this:
`PHP Warning: date_default_timezone_set() function.`
You should set the default time zone as instructed in the warning in
your `php.ini` file. Find the entry about timezone and set it to
something like this:
; Defines the default timezone used by the date functions
; https://php.net/date.timezone
date.timezone = 'America/Denver'
Or
```
echo "date.timezone = 'America/Denver'" > /etc/php.d/timezone.ini
```
### nominatim.so version mismatch
@@ -79,7 +45,7 @@ When running the import you may get a version mismatch:
pg_config seems to use bad includes sometimes when multiple versions
of PostgreSQL are available in the system. Make sure you remove the
server development libraries (`postgresql-server-dev-9.5` on Ubuntu)
server development libraries (`postgresql-server-dev-13` on Ubuntu)
and recompile (`cmake .. && make`).
@@ -106,11 +72,6 @@ If you are using a flatnode file, then it may also be that the underlying
filesystem does not fully support 'mmap'. A notable candidate is virtualbox's
vboxfs.
### I see the error: "clang: Command not found" on CentOS
On CentOS 7 users reported `/opt/rh/llvm-toolset-7/root/usr/bin/clang: Command not found`.
Double-check clang is installed. Instead of `make` try running `make CLANG=true`.
### nominatim UPDATE failed: ERROR: buffer 179261 is not owned by resource owner Portal
Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168)
@@ -126,22 +87,6 @@ The server cannot access your database. Add `&debug=1` to your URL
to get the full error message.
### On CentOS the website shows "Could not connect to server"
`could not connect to server: No such file or directory`
On CentOS v7 the PostgreSQL server is started with `systemd`. Check if
`/usr/lib/systemd/system/httpd.service` contains a line `PrivateTmp=true`. If
so then Apache cannot see the `/tmp/.s.PGSQL.5432` file. It's a good security
feature, so use the
[preferred solution](../appendix/Install-on-Centos-7.md#adding-selinux-security-settings).
However, you can solve this the quick and dirty way by commenting out that line and then run
sudo systemctl daemon-reload
sudo systemctl restart httpd
### Website reports "DB Error: insufficient permissions"
The user the webserver, e.g. Apache, runs under needs to have access to the
@@ -181,9 +126,6 @@ by everybody, e.g.
Try `chmod a+r nominatim.so; chmod a+x nominatim.so`.
When running SELinux, make sure that the
[context is set up correctly](../appendix/Install-on-Centos-7.md#adding-selinux-security-settings).
When you recently updated your operating system, updated PostgreSQL to
a new version or moved files (e.g. the build directory) you should
recreate `nominatim.so`. Try
@@ -194,7 +136,7 @@ recreate `nominatim.so`. Try
cmake $main_Nominatim_path && make
```
### Setup.php fails with "DB Error: extension not found"
### Setup fails with "DB Error: extension not found"
Make sure you have the PostgreSQL extensions "hstore" and "postgis" installed.
See the installation instructions for a full list of required packages.

View File

@@ -14,15 +14,15 @@ to a single Nominatim setup: configuration, extra data, etc. Create a project
directory apart from the Nominatim software and change into the directory:
```
mkdir ~/nominatim-planet
cd ~/nominatim-planet
mkdir ~/nominatim-project
cd ~/nominatim-project
```
In the following, we refer to the project directory as `$PROJECT_DIR`. To be
able to copy&paste instructions, you can export the appropriate variable:
```
export PROJECT_DIR=~/nominatim-planet
export PROJECT_DIR=~/nominatim-project
```
The Nominatim tool assumes per default that the current working directory is
@@ -74,15 +74,15 @@ but it will improve the quality of the results if this is installed.
This data is available as a binary download. Put it into your project directory:
cd $PROJECT_DIR
wget https://www.nominatim.org/data/wikimedia-importance.sql.gz
wget https://nominatim.org/data/wikimedia-importance.sql.gz
The file is about 400MB and adds around 4GB to the Nominatim database.
!!! tip
If you forgot to download the wikipedia rankings, you can also add
importances after the import. Download the files, then run
`nominatim refresh --wiki-data --importance`. Updating importances for
a planet can take a couple of hours.
If you forgot to download the wikipedia rankings, then you can
also add importances after the import. Download the SQL files, then
run `nominatim refresh --wiki-data --importance`. Updating
importances for a planet will take a couple of hours.
### External postcodes
@@ -92,8 +92,8 @@ and the UK (using the [CodePoint OpenData set](https://osdatahub.os.uk/downloads
This data can be optionally downloaded into the project directory:
cd $PROJECT_DIR
wget https://www.nominatim.org/data/gb_postcodes.csv.gz
wget https://www.nominatim.org/data/us_postcodes.csv.gz
wget https://nominatim.org/data/gb_postcodes.csv.gz
wget https://nominatim.org/data/us_postcodes.csv.gz
You can also add your own custom postcode sources, see
[Customization of postcodes](../customize/Postcodes.md).
@@ -139,7 +139,7 @@ import. So this option is particularly interesting if you plan to transfer the
database or reuse the space later.
!!! warning
The datastructure for updates are also required when adding additional data
The data structure for updates are also required when adding additional data
after the import, for example [TIGER housenumber data](../customize/Tiger.md).
If you plan to use those, you must not use the `--no-updates` parameter.
Do a normal import, add the external data and once you are done with
@@ -150,10 +150,10 @@ database or reuse the space later.
If you only want to use the Nominatim database for reverse lookups or
if you plan to use the installation only for exports to a
[photon](https://photon.komoot.de/) database, then you can set up a database
[photon](https://photon.komoot.io/) database, then you can set up a database
without search indexes. Add `--reverse-only` to your setup command above.
This saves about 5% of disk space.
This saves about 5% of disk space, import time won't be significant faster.
### Filtering Imported Data
@@ -228,7 +228,7 @@ to load the OSM data into the PostgreSQL database. This step is very demanding
in terms of RAM usage. osm2pgsql and PostgreSQL are running in parallel at
this point. PostgreSQL blocks at least the part of RAM that has been configured
with the `shared_buffers` parameter during
[PostgreSQL tuning](Installation.md#postgresql-tuning)
[PostgreSQL tuning](Installation.md#tuning-the-postgresql-database)
and needs some memory on top of that. osm2pgsql needs at least 2GB of RAM for
its internal data structures, potentially more when it has to process very large
relations. In addition it needs to maintain a cache for node locations. The size
@@ -254,26 +254,71 @@ successfully.
nominatim admin --check-database
```
Now you can try out your installation by running:
Now you can try out your installation by executing a simple query on the
command line:
```sh
``` sh
nominatim search --query Berlin
```
or, when you have a reverse-only installation:
``` sh
nominatim reverse --lat 51 --lon 45
```
If you want to run Nominatim as a service, you need to make a choice between
running the modern Python frontend and the legacy PHP frontend.
Make sure you have installed the right packages as per
[Installation](Installation.md#software).
#### Testing the Python frontend
To run the test server against the Python frontend, you must choose a
web framework to use, either starlette or falcon. Make sure the appropriate
packages are installed. Then run
``` sh
nominatim serve
```
This runs a small test server normally used for development. You can use it
to verify that your installation is working. Go to
`http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
or, if you prefer to use Starlette instead of Falcon as webserver,
Note that search query is not supported for reverse-only imports. You can run a
reverse query, e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
``` sh
nominatim serve --engine starlette
```
To run Nominatim via webservers like Apache or nginx, please read the
[Deployment chapter](Deployment.md).
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
or, for reverse-only installations a reverse query,
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
## Adding search through category phrases
Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the Python frontend](Deployment-Python.md).
If you want to be able to search for places by their type through
#### Testing the PHP frontend
You can run a small test server with the PHP frontend like this:
```sh
nominatim serve --engine php
```
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
or, for reverse-only installations a reverse query,
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the PHP frontend](Deployment-PHP.md).
## Enabling search by category phrases
To be able to search for places by their type using
[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to import these key phrases like this:

View File

@@ -4,10 +4,8 @@ This page contains generic installation instructions for Nominatim and its
prerequisites. There are also step-by-step instructions available for
the following operating systems:
* [Ubuntu 22.04](../appendix/Install-on-Ubuntu-22.md)
* [Ubuntu 20.04](../appendix/Install-on-Ubuntu-20.md)
* [Ubuntu 18.04](../appendix/Install-on-Ubuntu-18.md)
* [CentOS 8](../appendix/Install-on-Centos-8.md)
* [CentOS 7.2](../appendix/Install-on-Centos-7.md)
These OS-specific instructions can also be found in executable form
in the `vagrant/` directory.
@@ -25,8 +23,9 @@ and can't offer support.
### Software
!!! Warning
For larger installations you **must have** PostgreSQL 11+ and Postgis 3+
For larger installations you **must have** PostgreSQL 11+ and PostGIS 3+
otherwise import and queries will be slow to the point of being unusable.
Query performance has marked improvements with PostgreSQL 13+ and PostGIS 3.2+.
For compiling:
@@ -36,46 +35,59 @@ For compiling:
* [bzip2](http://www.bzip.org/)
* [zlib](https://www.zlib.net/)
* [ICU](http://site.icu-project.org/)
* [nlohmann/json](https://json.nlohmann.me/)
* [Boost libraries](https://www.boost.org/), including system and filesystem
* PostgreSQL client libraries
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
For running Nominatim:
* [PostgreSQL](https://www.postgresql.org) (9.5+ will work, 11+ strongly recommended)
* [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
* [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
* [Python 3](https://www.python.org/) (3.6+)
* [Python 3](https://www.python.org/) (3.7+)
* [Psycopg2](https://www.psycopg.org) (2.7+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
* [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
* [PyICU](https://pypi.org/project/PyICU/)
* [PyYaml](https://pyyaml.org/) (5.1+)
* [datrie](https://github.com/pytries/datrie)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
* PHP-cgi (for running queries from the command line)
For running continuous updates:
* [pyosmium](https://osmcode.org/pyosmium/)
For running the Python frontend:
* one of the following web frameworks:
* [falcon](https://falconframework.org/) (3.0+)
* [starlette](https://www.starlette.io/)
* [uvicorn](https://www.uvicorn.org/)
For running the legacy PHP frontend:
* [PHP](https://php.net) (7.3+)
* PHP-pgsql
* PHP-intl (bundled with PHP)
For dependencies for running tests and building documentation, see
the [Development section](../develop/Development-Environment.md).
### Hardware
A minimum of 2GB of RAM is required or installation will fail. For a full
planet import 64GB of RAM or more are strongly recommended. Do not report
planet import 128GB of RAM or more are strongly recommended. Do not report
out of memory problems if you have less than 64GB RAM.
For a full planet install you will need at least 900GB of hard disk space.
For a full planet install you will need at least 1TB of hard disk space.
Take into account that the OSM database is growing fast.
Fast disks are essential. Using NVME disks is recommended.
Even on a well configured machine the import of a full planet takes
around 2 days. On traditional spinning disks, 7-8 days are more realistic.
around 2 days. When using traditional SSDs, 4-5 days are more realistic.
## Tuning the PostgreSQL database
@@ -89,8 +101,7 @@ your `postgresql.conf` file.
work_mem = (50MB)
effective_cache_size = (24GB)
synchronous_commit = off
checkpoint_segments = 100 # only for postgresql <= 9.4
max_wal_size = 1GB # postgresql > 9.4
max_wal_size = 1GB
checkpoint_timeout = 10min
checkpoint_completion_target = 0.9
@@ -108,15 +119,6 @@ you might consider setting:
and even reduce `autovacuum_work_mem` further. This will reduce the amount
of memory that autovacuum takes away from the import process.
For the initial import, you should also set:
fsync = off
full_page_writes = off
Don't forget to reenable them after the initial import or you risk database
corruption.
## Downloading and building Nominatim
### Downloading the latest release
@@ -130,13 +132,13 @@ If you want to install latest development version from github, make sure to
also check out the osm2pgsql subproject:
```
git clone --recursive git://github.com/openstreetmap/Nominatim.git
git clone --recursive https://github.com/openstreetmap/Nominatim.git
```
The development version does not include the country grid. Download it separately:
```
wget -O Nominatim/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
wget -O Nominatim/data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
```
### Building Nominatim
@@ -158,6 +160,17 @@ make
sudo make install
```
!!! warning
The default installation no longer compiles the PostgreSQL module that
is needed for the legacy tokenizer from older Nominatim versions. If you
are upgrading an older database or want to run the
[legacy tokenizer](../customize/Tokenizers.md#legacy-tokenizer) for
some other reason, you need to enable the PostgreSQL module via
cmake: `cmake -DBUILD_MODULE=on ../Nominatim`. To compile the module
you need to have the server development headers for PostgreSQL installed.
On Ubuntu/Debian run: `sudo apt install postgresql-server-dev-<postgresql version>`
Nominatim installs itself into `/usr/local` per default. To choose a different
installation directory add `-DCMAKE_INSTALL_PREFIX=<install root>` to the
cmake command. Make sure that the `bin` directory is available in your path

View File

@@ -34,18 +34,39 @@ to rerun the statistics computation when adding larger amounts of new data,
for example, when adding an additional country via `nominatim add-data`.
## Forcing recomputation of places and areas
Command: `nominatim refresh --data-object [NWR]<id> --data-area [NWR]<id>`
When running replication updates, Nominatim tries to recompute the search
and address information for all places that are affected by a change. But it
needs to restrict the total number of changes to make sure it can keep up
with the minutely updates. Therefore it will refrain from propagating changes
that affect a lot of objects.
The administrator may force an update of places in the database.
`nominatim refresh --data-object` invalidates a single OSM object.
`nominatim refresh --data-area` invalidates an OSM object and all dependent
objects. That are usually the places that inside its area or around the
center of the object. Both commands expect the OSM object as an argument
of the form OSM type + OSM id. The type must be `N` (node), `W` (way) or
`R` (relation).
After invalidating the object, indexing must be run again. If continuous
update are running in the background, the objects will be recomputed together
with the next round of updates. Otherwise you need to run `nominatim index`
to finish the recomputation.
## Removing large deleted objects
Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up.
There is currently no command to do that. You can use the following SQL
query to force a deletion on all objects that have been deleted more than
a certain timespan ago (here: 1 month):
To run this command you will need to pass a PostgreSQL time interval. For example to
delete any objects that have been deleted more than a month ago you would run:
`nominatim admin --clean-deleted '1 month'`
```sql
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
and age(p.indexed_date) > '1 month'::interval
```

View File

@@ -15,6 +15,42 @@ breaking changes. **Please read them before running the migration.**
If you are migrating from a version <3.6, then you still have to follow
the manual migration steps up to 3.6.
## 4.2.0 -> 4.3.0
### New indexes for reverse lookup
The reverse lookup algorithm has changed slightly to improve performance.
This change needs a different index in the database. The required index
will be automatically build during migration. Until the new index is available
performance of the /reverse endpoint is significantly reduced. You should
therefore either remove traffic from the machine before attempting a
version update or create the index manually **before** starting the update
using the following SQL:
```sql
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
WHERE rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND linked_place_id is null AND osm_type = 'N';
```
## 4.0.0 -> 4.1.0
### ICU tokenizer is the new default
Nominatim now installs the [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer)
by default. This only has an effect on newly installed databases. When
updating older databases, it keeps its installed tokenizer. If you still
run with the legacy tokenizer, make sure to compile Nominatim with the
PostgreSQL module, see [Installation](Installation.md#building-nominatim).
### geocodejson output changed
The `type` field of the geocodejson output has changed. It now contains
the address class of the object instead of the value of the OSM tag. If
your client has used the `type` field, switch them to read `osm_value`
instead.
## 3.7.0 -> 4.0.0
### NOMINATIM_PHRASE_CONFIG removed

View File

@@ -16,12 +16,12 @@ and run it. Grab the latest release from
[nominatim-ui's Github release page](https://github.com/osm-search/nominatim-ui/releases)
and unpack it. You can use `nominatim-ui-x.x.x.tar.gz` or `nominatim-ui-x.x.x.zip`.
Next you need to adapt the UI yo your installation. Custom settings need to be
Next you need to adapt the UI to your installation. Custom settings need to be
put into `dist/theme/config.theme.js`. At a minimum you need to
set `Nominatim_API_Endpoint` to point to your Nominatim installation:
cd nominatim-ui
echo "Nominatim_Config.Nominatim_API_Endpoint='https:\\myserver.org\nominatim';" > dist/theme/config.theme.js
echo "Nominatim_Config.Nominatim_API_Endpoint='https://myserver.org/nominatim/';" > dist/theme/config.theme.js
For the full set of available settings, have a look at `dist/config.defaults.js`.
@@ -161,24 +161,16 @@ directory like this:
# If no endpoint is given, then use search.
RewriteRule ^(/|$) "search.php"
# If format-html is explicity requested, forward to the UI.
# If format-html is explicitly requested, forward to the UI.
RewriteCond %{QUERY_STRING} "format=html"
RewriteRule ^([^/]+).php ui/$1.html [R,END]
# Same but .php suffix is missing.
RewriteCond %{QUERY_STRING} "format=html"
RewriteRule ^([^/]+) ui/$1.html [R,END]
RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
# If no format parameter is there then forward anything
# but /reverse and /lookup to the UI.
RewriteCond %{QUERY_STRING} "!format="
RewriteCond %{REQUEST_URI} "!/lookup"
RewriteCond %{REQUEST_URI} "!/reverse"
RewriteRule ^([^/]+).php ui/$1.html [R,END]
# Same but .php suffix is missing.
RewriteCond %{QUERY_STRING} "!format="
RewriteCond %{REQUEST_URI} "!/lookup"
RewriteCond %{REQUEST_URI} "!/reverse"
RewriteRule ^([^/]+) ui/$1.html [R,END]
RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
</Directory>
```

View File

@@ -59,8 +59,137 @@ imported multiple country extracts and want to keep them
up-to-date, [Advanced installations section](Advanced-Installations.md)
contains instructions to set up and update multiple country extracts.
#### One-time mode
When the `--once` parameter is given, then Nominatim will download exactly one
batch of updates and then exit. This one-time mode still respects the
`NOMINATIM_REPLICATION_UPDATE_INTERVAL` that you have set. If according to
the update interval no new data has been published yet, it will go to sleep
until the next expected update and only then attempt to download the next batch.
The one-time mode is particularly useful if you want to run updates continuously
but need to schedule other work in between updates. For example, the main
service at osm.org uses it, to regularly recompute postcodes -- a process that
must not be run while updates are in progress. Its update script
looks like this:
```sh
#!/bin/bash
# Switch to your project directory.
cd /srv/nominatim
while true; do
nominatim replication --once
if [ -f "/srv/nominatim/schedule-maintenance" ]; then
rm /srv/nominatim/schedule-maintenance
nominatim refresh --postcodes
fi
done
```
A cron job then creates the file `/srv/nominatim/schedule-maintenance` once per night.
##### One-time mode with systemd
You can run the one-time mode with a systemd timer & service.
Create a timer description like `/etc/systemd/system/nominatim-updates.timer`:
```
[Unit]
Description=Timer to start updates of Nominatim
[Timer]
OnActiveSec=2
OnUnitActiveSec=1min
Unit=nominatim-updates.service
[Install]
WantedBy=multi-user.target
```
And then a similar service definition: `/etc/systemd/system/nominatim-updates.service`:
```
[Unit]
Description=Single updates of Nominatim
[Service]
WorkingDirectory=/srv/nominatim
ExecStart=nominatim replication --once
StandardOutput=append:/var/log/nominatim-updates.log
StandardError=append:/var/log/nominatim-updates.error.log
User=nominatim
Group=nominatim
Type=simple
[Install]
WantedBy=multi-user.target
```
Replace the `WorkingDirectory` with your project directory. Also adapt user and
group names as required. `OnUnitActiveSec` defines how often the individual
update command is run.
Now activate the service and start the updates:
```
sudo systemctl daemon-reload
sudo systemctl enable nominatim-updates.timer
sudo systemctl start nominatim-updates.timer
```
You can stop future data updates, while allowing any current, in-progress
update steps to finish, by running `sudo systemctl stop
nominatim-updates.timer` and waiting until `nominatim-updates.service` isn't
running (`sudo systemctl is-active nominatim-updates.service`). Current output
from the update can be seen like above (`systemctl status
nominatim-updates.service`).
#### Catch-up mode
With the `--catch-up` parameter, Nominatim will immediately try to download
all changes from the server until the database is up-to-date. The catch-up mode
still respects the parameter `NOMINATIM_REPLICATION_MAX_DIFF`. It downloads and
applies the changes in appropriate batches until all is done.
The catch-up mode is foremost useful to bring the database up to speed after the
initial import. Give that the service usually is not in production at this
point, you can temporarily be a bit more generous with the batch size and
number of threads you use for the updates by running catch-up like this:
```
cd /srv/nominatim
NOMINATIM_REPLICATION_MAX_DIFF=5000 nominatim replication --catch-up --threads 15
```
The catch-up mode is also useful when you want to apply updates at a lower
frequency than what the source publishes. You can set up a cron job to run
replication catch-up at whatever interval you desire.
!!! hint
When running scheduled updates with catch-up, it is a good idea to choose
a replication source with an update frequency that is an order of magnitude
lower. For example, if you want to update once a day, use an hourly updated
source. This makes sure that you don't miss an entire day of updates when
the source is unexpectedly late to publish its update.
If you want to use the source with the same update frequency (e.g. a daily
updated source with daily updates), use the
continuous update mode. It ensures to re-request the newest update until it
is published.
#### Continuous updates
!!! danger
This mode is no longer recommended to use and will removed in future
releases. systemd is much better
suited for running regular updates. Please refer to the setup
instructions for running one-time mode with systemd above.
This is the easiest mode. Simply run the replication command without any
parameters:
@@ -70,7 +199,7 @@ The update application keeps running forever and retrieves and applies
new updates from the server as they are published.
You can run this command as a simple systemd service. Create a service
description like that in `/etc/systemd/system/nominatim-update.service`:
description like that in `/etc/systemd/system/nominatim-updates.service`:
```
[Unit]
@@ -100,67 +229,4 @@ sudo systemctl enable nominatim-updates
sudo systemctl start nominatim-updates
```
#### One-time mode
When the `--once` parameter is given, then Nominatim will download exactly one
batch of updates and then exit. This one-time mode still respects the
`NOMINATIM_REPLICATION_UPDATE_INTERVAL` that you have set. If according to
the update interval no new data has been published yet, it will go to sleep
until the next expected update and only then attempt to download the next batch.
The one-time mode is particularly useful if you want to run updates continuously
but need to schedule other work in between updates. For example, the main
service at osm.org uses it, to regularly recompute postcodes -- a process that
must not be run while updates are in progress. Its update script
looks like this:
```sh
#!/bin/bash
# Switch to your project directory.
cd /srv/nominatim
while true; do
nominatim replication --once
if [ -f "/srv/nominatim/schedule-mainenance" ]; then
rm /srv/nominatim/schedule-mainenance
nominatim refresh --postcodes
fi
done
```
A cron job then creates the file `/srv/nominatim/need-mainenance` once per night.
#### Catch-up mode
With the `--catch-up` parameter, Nominatim will immediately try to download
all changes from the server until the database is up-to-date. The catch-up mode
still respects the parameter `NOMINATIM_REPLICATION_MAX_DIFF`. It downloads and
applies the changes in appropriate batches until all is done.
The catch-up mode is foremost useful to bring the database up to speed after the
initial import. Give that the service usually is not in production at this
point, you can temporarily be a bit more generous with the batch size and
number of threads you use for the updates by running catch-up like this:
```
cd /srv/nominatim
NOMINATIM_REPLICATION_MAX_DIFF=5000 nominatim replication --catch-up --threads 15
```
The catch-up mode is also useful when you want to apply updates at a lower
frequency than what the source publishes. You can set up a cron job to run
replication catch-up at whatever interval you desire.
!!! hint
When running scheduled updates with catch-up, it is a good idea to choose
a replication source with an update frequency that is an order of magnitude
lower. For example, if you want to update once a day, use an hourly updated
source. This makes sure that you don't miss an entire day of updates when
the source is unexpectely late to publish its update.
If you want to use the source with the same update frequency (e.g. a daily
updated source with daily updates), use the
continuous update mode. It ensures to re-request the newest update until it
is published.

View File

@@ -2,13 +2,17 @@
Show all details about a single place saved in the database.
This API endpoint is meant for visual inspection of the data in the database,
mainly together with [Nominatim-UI](https://github.com/osm-search/nominatim-ui/).
The parameters of the endpoint and the output may change occasionally between
versions of Nominatim. Do not rely on the output in scripts or applications.
!!! warning
The details page exists for debugging only. You may not use it in scripts
or to automatically query details about a result.
The details endpoint at https://nominatim.openstreetmap.org
may not used in scripts or bots at all.
See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
## Parameters
The details API supports the following two request formats:
@@ -35,59 +39,90 @@ for a place is different between Nominatim installation (servers) and
changes when data gets reimported. Therefore it cannot be used as
a permanent id and shouldn't be used in bug reports.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/details.php`. This is now deprecated
and will be removed in future versions.
Additional optional parameters are explained below.
## Parameters
This section lists additional optional parameters.
### Output format
* `json_callback=<string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
When set, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
* `pretty=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| pretty | 0 or 1 | 0 |
Add indentation to make it more human-readable. (Default: 0)
`[PHP-only]` Add indentation to the output to make it more human-readable.
### Output details
* `addressdetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
Include a breakdown of the address into elements. (Default: 0)
When set to 1, include a breakdown of the address into elements.
* `keywords=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| keywords | 0 or 1 | 0 |
Include a list of name keywords and address keywords (word ids). (Default: 0)
When set to 1, include a list of name keywords and address keywords
in the result.
* `linkedplaces=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| linkedplaces | 0 or 1 | 1 |
Include a details of places that are linked with this one. Places get linked
Include details of places that are linked with this one. Places get linked
together when they are different forms of the same physical object. Nominatim
links two kinds of objects together: place nodes get linked with the
corresponding administrative boundaries. Waterway relations get linked together with their
members.
(Default: 1)
* `hierarchy=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| hierarchy | 0 or 1 | 0 |
Include details of places lower in the address hierarchy. (Default: 0)
Include details of places lower in the address hierarchy.
* `group_hierarchy=[0|1]`
`[Python-only]` will only return properly parented places. These are address
or POI-like places that reuse the address of their parent street or place.
For JSON output will group the places by type. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| group_hierarchy | 0 or 1 | 0 |
* `polygon_geojson=[0|1]`
When set to 1, the output of the address hierarchy will be
grouped by type.
Include geometry of result. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
Include geometry of result.
### Language of results
* `accept-language=<browser language string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing result, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
## Examples

View File

@@ -59,3 +59,27 @@ suited for these kinds of queries.
That said if you installed your own Nominatim instance you can use the
`nominatim export` PHP script as basis to return such lists.
#### 7. My result has a wrong postcode. Where does it come from?
Most places in OSM don't have a postcode, so Nominatim tries to interpolate
one. It first look at all the places that make up the address of the place.
If one of them has a postcode defined, this is the one to be used. When
none of the address parts has a postcode either, Nominatim interpolates one
from the surrounding objects. If the postcode is for your result is one, then
most of the time there is an OSM object with the wrong postcode nearby.
To find the bad postcode, go to
[https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
and search for your place. When you have found it, click on the 'details' link
under the result to go to the details page. There is a field 'Computed Postcode'
which should display the bad postcode. Click on the 'how?' link. A small
explanation text appears. It contains a link to a query for Overpass Turbo.
Click on that and you get a map with all places in the area that have the bad
postcode. If none is displayed, zoom the map out a bit and then click on 'Run'.
Now go to [OpenStreetMap](https://openstreetmap.org) and fix the error you
have just found. It will take at least a day for Nominatim to catch up with
your data fix. Sometimes longer, depending on how much editing activity is in
the area.

View File

@@ -3,7 +3,7 @@
The lookup API allows to query the address and other details of one or
multiple OSM objects like node, way or relation.
## Parameters
## Endpoint
The lookup API has the following format:
@@ -15,86 +15,140 @@ The lookup API has the following format:
prefixed with its type, one of node(N), way(W) or relation(R). Up to 50 ids
can be queried at the same time.
Additional optional parameters are explained below.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/lookup.php`. This is now deprecated
and will be removed in future versions.
## Parameters
This section lists additional optional parameters.
### Output format
* `format=[xml|json|jsonv2|geojson|geocodejson]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
See [Place Output Formats](Output.md) for details on each format.
* `json_callback=<string>`
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Only has an effect for JSON output formats.
### Output details
* `addressdetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
Include a breakdown of the address into elements. (Default: 0)
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
* `extratags=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
* `namedetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
### Language of results
* `accept-language=<browser language string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
Preferred language order for showing search results, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
### Polygon output
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
* `polygon_threshold=0.0`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
Return a simplified version of the output geometry. The parameter is the
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
geometry. Topology is preserved in the geometry.
### Other
* `email=<valid email address>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
* `debug=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
## Examples
##### XML
[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189)
[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W50637691,N240109189)
```xml
<lookupresults timestamp="Mon, 29 Jun 15 18:01:33 +0000" attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright" querystring="R146656,W104393803,N240109189" polygon="false">
<place place_id="127761056" osm_type="relation" osm_id="146656" place_rank="16" lat="53.4791466" lon="-2.2447445" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.704893333438333">
<lookupresults timestamp="Mon, 28 Mar 22 14:38:54 +0000" attribution="Data &#xA9; OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright" querystring="R146656,W50637691,N240109189" more_url="">
<place place_id="282236157" osm_type="relation" osm_id="146656" place_rank="16" address_rank="16" boundingbox="53.3401044,53.5445923,-2.3199185,-2.1468288" lat="53.44246175" lon="-2.2324547359718547" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.35">
<city>Manchester</city>
<county>Greater Manchester</county>
<state_district>North West England</state_district>
@@ -102,21 +156,20 @@ This overrides the specified machine readable format. (Default: 0)
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
<place place_id="77769745" osm_type="way" osm_id="104393803" place_rank="30" lat="52.5162024" lon="13.3777343363579" display_name="Brandenburg Gate, 1, Pariser Platz, Mitte, Berlin, 10117, Germany" class="tourism" type="attraction" importance="0.443472858361592">
<attraction>Brandenburg Gate</attraction>
<house_number>1</house_number>
<pedestrian>Pariser Platz</pedestrian>
<suburb>Mitte</suburb>
<city_district>Mitte</city_district>
<city>Berlin</city>
<state>Berlin</state>
<postcode>10117</postcode>
<place place_id="115462561" osm_type="way" osm_id="50637691" place_rank="30" address_rank="30" boundingbox="52.3994612,52.3996426,13.0479574,13.0481754" lat="52.399550700000006" lon="13.048066846939687" display_name="Brandenburger Tor, Brandenburger Stra&#xDF;e, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany" class="tourism" type="attraction" importance="0.29402874005524">
<tourism>Brandenburger Tor</tourism>
<road>Brandenburger Stra&#xDF;e</road>
<suburb>Historische Innenstadt</suburb>
<city>Potsdam</city>
<state>Brandenburg</state>
<postcode>14467</postcode>
<country>Germany</country>
<country_code>de</country_code>
</place>
<place place_id="2570600569" osm_type="node" osm_id="240109189" place_rank="15" lat="52.5170365" lon="13.3888599" display_name="Berlin, Germany" class="place" type="city" importance="0.822149797630868">
<place place_id="567505" osm_type="node" osm_id="240109189" place_rank="15" address_rank="16" boundingbox="52.3586925,52.6786925,13.2396024,13.5596024" lat="52.5186925" lon="13.3996024" display_name="Berlin, 10178, Germany" class="place" type="city" importance="0.78753902824914">
<city>Berlin</city>
<state>Berlin</state>
<postcode>10178</postcode>
<country>Germany</country>
<country_code>de</country_code>
</place>
@@ -125,38 +178,50 @@ This overrides the specified machine readable format. (Default: 0)
##### JSON with extratags
[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json)
[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1)
```json
[
{
"place_id": "84271358",
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
"osm_type": "way",
"osm_id": "50637691",
"lat": "52.39955055",
"lon": "13.04806574678",
"display_name": "Brandenburger Tor, Brandenburger Straße, Nördliche Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
"class": "historic",
"type": "city_gate",
"importance": "0.221233780277011",
"address": {
"address29": "Brandenburger Tor",
"pedestrian": "Brandenburger Straße",
"suburb": "Nördliche Innenstadt",
"city": "Potsdam",
"state": "Brandenburg",
"postcode": "14467",
"country": "Germany",
"country_code": "de"
},
"extratags": {
"image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
"wikidata": "Q695045",
"wikipedia": "de:Brandenburger Tor (Potsdam)",
"wheelchair": "yes",
"description": "Kleines Brandenburger Tor in Potsdam"
}
}
{
"place_id": 115462561,
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
"osm_type": "way",
"osm_id": 50637691,
"boundingbox": [
"52.3994612",
"52.3996426",
"13.0479574",
"13.0481754"
],
"lat": "52.399550700000006",
"lon": "13.048066846939687",
"display_name": "Brandenburger Tor, Brandenburger Straße, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
"class": "tourism",
"type": "attraction",
"importance": 0.2940287400552381,
"address": {
"tourism": "Brandenburger Tor",
"road": "Brandenburger Straße",
"suburb": "Historische Innenstadt",
"city": "Potsdam",
"state": "Brandenburg",
"postcode": "14467",
"country": "Germany",
"country_code": "de"
},
"extratags": {
"image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
"heritage": "4",
"wikidata": "Q695045",
"architect": "Carl von Gontard;Georg Christian Unger",
"wikipedia": "de:Brandenburger Tor (Potsdam)",
"wheelchair": "yes",
"description": "Kleines Brandenburger Tor in Potsdam",
"heritage:website": "http://www.bldam-brandenburg.de/images/stories/PDF/DML%202012/04-p-internet-13.pdf",
"heritage:operator": "bldam",
"architect:wikidata": "Q68768;Q95223",
"year_of_construction": "1771"
}
}
]
```

View File

@@ -28,6 +28,7 @@ a single place (for reverse) of the following format:
"city": "London",
"state_district": "Greater London",
"state": "England",
"ISO3166-2-lvl4": "GB-ENG",
"postcode": "SW1A 2DU",
"country": "United Kingdom",
"country_code": "gb"
@@ -97,7 +98,10 @@ The GeocodeJSON format follows the
The following feature attributes are implemented:
* `osm_type`, `osm_id` - reference to the OSM object (unofficial extension, [see notes](#osm-reference))
* `type` - value of the main tag of the object (e.g. residential, restaurant, ...)
* `type` - the 'address level' of the object ('house', 'street', `district`, `city`,
`county`, `state`, `country`, `locality`)
* `osm_key`- key of the main tag of the OSM object (e.g. boundary, highway, amenity)
* `osm_value` - value of the main tag of the OSM object (e.g. residential, restaurant)
* `label` - full comma-separated address
* `name` - localised name of the place
* `housenumber`, `street`, `locality`, `district`, `postcode`, `city`,
@@ -126,6 +130,7 @@ formats depending on the API call.
</result>
<addressparts>
<state>Bavaria</state>
<ISO3166-2-lvl4>DE-BY</ISO3166-2-lvl4>
<country>Germany</country>
<country_code>de</country_code>
</addressparts>
@@ -179,6 +184,7 @@ Additional information requested with `addressdetails=1`, `extratags=1` and
<city>London</city>
<state_district>Greater London</state_district>
<state>England</state>
<ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
<postcode>SW1A 2DU</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
@@ -205,8 +211,8 @@ be more than one. The attributes of that element contain:
* `ref` - content of `ref` tag if it exists
* `lat`, `lon` - latitude and longitude of the centroid of the object
* `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))
* `place_rank` - class [search rank](../develop/Ranking#search-rank)
* `address_rank` - place [address rank](../develop/Ranking#address-rank)
* `place_rank` - class [search rank](../customize/Ranking.md#search-rank)
* `address_rank` - place [address rank](../customize/Ranking.md#address-rank)
* `display_name` - full comma-separated address
* `class`, `type` - key and value of the main OSM tag
* `importance` - computed importance rank
@@ -230,7 +236,7 @@ on another server. It may even change its ID on the same server when it is
removed and reimported while updating the database with fresh OSM data.
It is thus not useful to treat it as permanent for later use.
The combination `osm_type`+`osm_id` is slighly better but remember in
The combination `osm_type`+`osm_id` is slightly better but remember in
OpenStreetMap mappers can delete, split, recreate places (and those
get a new `osm_id`), there is no link between those old and new ids.
Places can also change their meaning without changing their `osm_id`,
@@ -279,12 +285,12 @@ with a designation label. Per default the following labels may appear:
* continent
* country, country_code
* region, state, state_district, county
* region, state, state_district, county, ISO3166-2-lvl<admin_level>
* municipality, city, town, village
* city_district, district, borough, suburb, subdivision
* hamlet, croft, isolated_dwelling
* neighbourhood, allotments, quarter
* city_block, residental, farm, farmyard, industrial, commercial, retail
* city_block, residential, farm, farmyard, industrial, commercial, retail
* road
* house_number, house_name
* emergency, historic, military, natural, landuse, place, railway,

View File

@@ -1,8 +1,16 @@
### Nominatim API
Nominatim indexes named (or numbered) features within the OpenStreetMap (OSM) dataset and a subset of other unnamed features (pubs, hotels, churches, etc).
!!! Attention
The current version of Nominatim implements two different search frontends:
the old PHP frontend and the new Python frontend. They have a very similar
API but differ in some implementation details. These are marked in the
documentation as `[Python-only]` or `[PHP-only]`.
Its API has the following endpoints for querying the data:
`https://nominatim.openstreetmap.org` implements the **Python frontend**.
So users should refer to the **`[Python-only]`** comments.
This section describes the API V1 of the Nominatim web service. The
service offers the following endpoints:
* __[/search](Search.md)__ - search OSM objects by name or type
* __[/reverse](Reverse.md)__ - search OSM object by their location
@@ -12,3 +20,6 @@ Its API has the following endpoints for querying the data:
back in Nominatim in case the deletion was accidental
* __/polygons__ - list of broken polygons detected by Nominatim
* __[/details](Details.md)__ - show internal details for an object (for debugging only)

View File

@@ -1,6 +1,7 @@
# Reverse Geocoding
Reverse geocoding generates an address from a latitude and longitude.
Reverse geocoding generates an address from a coordinate given as
latitude and longitude.
## How it works
@@ -18,8 +19,7 @@ The other issue to be aware of is that the closest OSM object may not always
have a similar enough address to the coordinate you were requesting. For
example, in dense city areas it may belong to a completely different street.
## Parameters
## Endpoint
The main format of the reverse API is
@@ -27,61 +27,105 @@ The main format of the reverse API is
https://nominatim.openstreetmap.org/reverse?lat=<value>&lon=<value>&<params>
```
where `lat` and `lon` are latitude and longitutde of a coordinate in WGS84
where `lat` and `lon` are latitude and longitude of a coordinate in WGS84
projection. The API returns exactly one result or an error when the coordinate
is in an area with no OSM data coverage.
Additional paramters are accepted as listed below.
!!! warning "Deprecation warning"
!!! danger "Deprecation warning"
The reverse API used to allow address lookup for a single OSM object by
its OSM id. This use is now deprecated. Use the [Address Lookup API](../Lookup)
instead.
its OSM id for `[PHP-only]`. The use is considered deprecated.
Use the [Address Lookup API](Lookup.md) instead.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/reverse.php`. This is now deprecated
and will be removed in future versions.
## Parameters
This section lists additional parameters to further influence the output.
### Output format
* `format=[xml|json|jsonv2|geojson|geocodejson]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `xml` |
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
See [Place Output Formats](Output.md) for details on each format.
* `json_callback=<string>`
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Only has an effect for JSON output formats.
### Output details
* `addressdetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 1 |
Include a breakdown of the address into elements. (Default: 1)
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
* `extratags=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
* `namedetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
### Language of results
* `accept-language=<browser language string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing search results, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
### Result limitation
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
* `zoom=[0-18]`
Level of detail required for the address. Default: 18. This is a number that
### Result restriction
| Parameter | Value | Default |
|-----------| ----- | ------- |
| zoom | 0-18 | 18 |
Level of detail required for the address. This is a number that
corresponds roughly to the zoom level used in XYZ tile sources in frameworks
like Leaflet.js, Openlayers etc.
In terms of address details the zoom levels are as follows:
@@ -92,41 +136,81 @@ In terms of address details the zoom levels are as follows:
5 | state
8 | county
10 | city
14 | suburb
12 | town / borough
13 | village / suburb
14 | neighbourhood
15 | any settlement
16 | major streets
17 | major and minor streets
18 | building
| Parameter | Value | Default |
|-----------| ----- | ------- |
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
**`[Python-only]`**
The layer filter allows to select places by themes.
The `address` layer contains all places that make up an address:
address points with house numbers, streets, inhabited places (suburbs, villages,
cities, states etc.) and administrative boundaries.
The `poi` layer selects all point of interest. This includes classic points
of interest like restaurants, shops, hotels but also less obvious features
like recycling bins, guideposts or benches.
The `railway` layer includes railway infrastructure like tracks.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database.
The `natural` layer collects features like rivers, lakes and mountains while
the `manmade` layer functions as a catch-all for features not covered by the
other layers.
### Polygon output
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
* `polygon_threshold=0.0`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
Return a simplified version of the output geometry. The parameter is the
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
geometry. Topology is preserved in the geometry.
### Other
* `email=<valid email address>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
* `debug=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
## Examples

View File

@@ -8,12 +8,12 @@ The search query may also contain
which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
This can be used to narrow down the kind of objects to be returned.
!!! warning
!!! note
Special phrases are not suitable to query all objects of a certain type in an
area. Nominatim will always just return a collection of the best matches. To
download OSM data by object type, use the [Overpass API](https://overpass-api.de/).
## Parameters
## Endpoint
The search API has the following format:
@@ -21,82 +21,213 @@ The search API has the following format:
https://nominatim.openstreetmap.org/search?<params>
```
The search term may be specified with two different sets of parameters:
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/search.php`. This is now deprecated
and will be removed in future versions.
* `q=<query>`
The query term can be given in two different forms: free-form or structured.
Free-form query string to search for.
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
Commas are optional, but improve performance by reducing the complexity of the search.
### Free-form query
| Parameter | Value |
|-----------| ----- |
| q | Free-form query string to search for |
* `street=<housenumber> <streetname>`
* `city=<city>`
* `county=<county>`
* `state=<state>`
* `country=<country>`
* `postalcode=<postalcode>`
In this form, the query can be unstructured.
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
Commas are optional, but improve performance by reducing the complexity of the search.
Alternative query string format split into several parameters for structured requests.
Structured requests are faster but are less robust against alternative
OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
The free-form may also contain special phrases to describe the type of
place to be returned or a coordinate to search close to a position.
Both query forms accept the additional parameters listed below.
### Structured query
| Parameter | Value |
|----------- | ----- |
| amenity | name and/or type of POI |
| street | housenumber and streetname |
| city | city |
| county | county |
| state | state |
| country | country |
| postalcode | postal code |
The structured form of the search query allows to lookup up an address
that is already split into its components. Each parameter represents a field
of the address. All parameters are optional. You should only use the ones
that are relevant for the address you want to geocode.
!!! Attention
Cannot be combined with the `q=<query>` parameter. Newer versions of
the API will return an error if you do so. Older versions simply return
unexpected results.
## Parameters
The following parameters can be used to further restrict the search and
change the output. They are usable for both forms of the search query.
### Output format
* `format=[xml|json|jsonv2|geojson|geocodejson]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
See [Place Output Formats](Output.md) for details on each format.
* `json_callback=<string>`
!!! note
The Nominatim service at
[https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
has a different default behaviour for historical reasons. When the
`format` parameter is omitted, the request will be forwarded to the Web UI.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| limit | number | 10 |
Limit the maximum number of returned results. Cannot be more than 40.
Nominatim may decide to return less results than given, if additional
results do not sufficiently match the query.
### Output details
* `addressdetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
Include a breakdown of the address into elements. (Default: 0)
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
* `extratags=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
* `namedetails=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
### Language of results
* `accept-language=<browser language string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing search results, overrides the value
specified in the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
### Result limitation
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
* `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`
### Result restriction
Limit search results to one or more countries. `<countrycode>` must be the
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
e.g. `gb` for the United Kingdom, `de` for Germany.
There are two ways to influence the results. *Filters* exclude certain
kinds of results completely. *Boost parameters* only change the order of the
results and thus give a preference to some results over others.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| countrycodes | comma-separated list of country codes | _unset_ |
Filter that limits the search results to one or more countries.
The country code must be the
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
of the country, e.g. `gb` for the United Kingdom, `de` for Germany.
Each place in Nominatim is assigned to one country code based
on OSM country boundaries. In rare cases a place may not be in any country
at all, for example, in international waters.
at all, for example, when it is in international waters. These places are
also excluded when the filter is set.
* `exclude_place_ids=<place_id,[place_id],[place_id]`
!!! Note
This parameter should not be confused with the 'country' parameter of
the structured query. The 'country' parameter contains a search term
and will be handled with some fuzziness. The `countrycodes` parameter
is a hard filter and as such should be preferred. Having both parameters
in the same query will work. If the parameters contradict each other,
the search will come up empty.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
**`[Python-only]`**
The layer filter allows to select places by themes.
The `address` layer contains all places that make up an address:
address points with house numbers, streets, inhabited places (suburbs, villages,
cities, states tec.) and administrative boundaries.
The `poi` layer selects all point of interest. This includes classic POIs like
restaurants, shops, hotels but also less obvious features like recycling bins,
guideposts or benches.
The `railway` layer includes railway infrastructure like tracks.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database.
The `natural` layer collects features like rivers, lakes and mountains while
the `manmade` layer functions as a catch-all for features not covered by the
other layers.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| featureType | one of: `country`, `state`, `city`, `settlement` | _unset_ |
The featureType allows to have a more fine-grained selection for places
from the address layer. Results can be restricted to places that make up
the 'state', 'country' or 'city' part of an address. A featureType of
settlement selects any human inhabited feature from 'state' down to
'neighbourhood'.
When featureType is set, then results are automatically restricted
to the address layer (see above).
!!! tip
Instead of using the featureType filters `country`, `state` or `city`,
you can also use a structured query without the finer-grained parameters
amenity or street.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| exclude_place_ids | comma-separated list of place ids |
If you do not want certain OSM objects to appear in the search
result, give a comma separated list of the `place_id`s you want to skip.
@@ -104,180 +235,212 @@ This can be used to retrieve additional search results. For example, if a
previous query only returned a few results, then including those here would
cause the search to return other, less accurate, matches (if possible).
| Parameter | Value | Default |
|-----------| ----- | ------- |
| viewbox | `<x1>,<y1>,<x2>,<y2>` | _unset_ |
* `limit=<integer>`
Boost parameter which focuses the search on the given area.
Any two corner points of the box are accepted as long as they make a proper
box. `x` is longitude, `y` is latitude.
Limit the number of returned results. (Default: 10, Maximum: 50)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| bounded | 0 or 1 | 0 |
When set to 1, then it turns the 'viewbox' parameter (see above) into
a filter parameter, excluding any results outside the viewbox.
* `viewbox=<x1>,<y1>,<x2>,<y2>`
The preferred area to find search results. Any two corner points of the box
are accepted as long as they span a real box. `x` is longitude,
`y` is latitude.
* `bounded=[0|1]`
When a viewbox is given, restrict the result to items contained within that
viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
only search is allowed. Give the special keyword for the amenity in square
When `bounded=1` is given and the viewbox is small enough, then an amenity-only
search is allowed. Give the special keyword for the amenity in square
brackets, e.g. `[pub]` and a selection of objects of this type is returned.
There is no guarantee that the result is complete. (Default: 0)
There is no guarantee that the result returns all objects in the area.
### Polygon output
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
* `polygon_threshold=0.0`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
Return a simplified version of the output geometry. The parameter is the
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the result. (Default: 0.0)
geometry. Topology is preserved in the geometry.
### Other
* `email=<valid email address>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
* `dedupe=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| dedupe | 0 or 1 | 1 |
Sometimes you have several objects in OSM identifying the same place or
object in reality. The simplest case is a street being split into many
different OSM ways due to different characteristics. Nominatim will
attempt to detect such duplicates and only return one match unless
this parameter is set to 0. (Default: 1)
attempt to detect such duplicates and only return one match. Setting
this parameter to 0 disables this deduplication mechanism and
ensures that all results are returned.
* `debug=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
## Examples
##### XML with kml polygon
##### XML with KML polygon
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1)
```xml
<searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
<place
place_id="1620612" osm_type="node" osm_id="452010817"
boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
lat="52.5487429714954" lon="-1.81602098644987"
display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
class="place" type="house">
<geokml>
<Polygon>
<outerBoundaryIs>
<LinearRing>
<coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
</LinearRing>
</outerBoundaryIs>
</Polygon>
</geokml>
<house_number>135</house_number>
<road>Pilkington Avenue</road>
<village>Wylde Green</village>
<town>Sutton Coldfield</town>
<city>City of Birmingham</city>
<county>West Midlands (county)</county>
<postcode>B72</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
</searchresults>
<?xml version="1.0" encoding="UTF-8" ?>
<searchresults timestamp="Tue, 08 Aug 2023 15:45:41 +00:00"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright"
querystring="135 pilkington avenue, birmingham"
more_url="https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue%2C+birmingham&amp;polygon_kml=1&amp;addressdetails=1&amp;limit=20&amp;exclude_place_ids=125279639&amp;format=xml"
exclude_place_ids="125279639">
<place place_id="125279639"
osm_type="way"
osm_id="90394480"
lat="52.5487921"
lon="-1.8164308"
boundingbox="52.5487473,52.5488481,-1.8165130,-1.8163464"
place_rank="30"
address_rank="30"
display_name="135, Pilkington Avenue, Maney, Sutton Coldfield, Wylde Green, Birmingham, West Midlands Combined Authority, England, B72 1LH, United Kingdom"
class="building"
type="residential"
importance="9.999999994736442e-08">
<geokml>
<Polygon>
<outerBoundaryIs>
<LinearRing>
<coordinates>-1.816513,52.5487566 -1.816434,52.5487473 -1.816429,52.5487629 -1.8163717,52.5487561 -1.8163464,52.5488346 -1.8164599,52.5488481 -1.8164685,52.5488213 -1.8164913,52.548824 -1.816513,52.5487566</coordinates>
</LinearRing>
</outerBoundaryIs>
</Polygon>
</geokml>
<house_number>135</house_number>
<road>Pilkington Avenue</road>
<hamlet>Maney</hamlet>
<town>Sutton Coldfield</town>
<village>Wylde Green</village>
<city>Birmingham</city>
<ISO3166-2-lvl8>GB-BIR</ISO3166-2-lvl8>
<state_district>West Midlands Combined Authority</state_district>
<state>England</state>
<ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
<postcode>B72 1LH</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
</searchresults>
```
##### JSON with SVG polygon
[https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1)
[https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1)
```json
{
"address": {
"city": "Berlin",
"city_district": "Mitte",
"construction": "Unter den Linden",
"continent": "European Union",
"country": "Deutschland",
"country_code": "de",
"house_number": "1",
"neighbourhood": "Scheunenviertel",
"postcode": "10117",
"public_building": "Kommandantenhaus",
"state": "Berlin",
"suburb": "Mitte"
},
"boundingbox": [
"52.5170783996582",
"52.5173187255859",
"13.3975105285645",
"13.3981599807739"
],
"class": "amenity",
"display_name": "Kommandantenhaus, 1, Unter den Linden, Scheunenviertel, Mitte, Berlin, 10117, Deutschland, European Union",
"importance": 0.73606775332943,
"lat": "52.51719785",
"licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
"lon": "13.3978352028938",
"osm_id": "15976890",
"osm_type": "way",
"place_id": "30848715",
"svg": "M 13.397511 -52.517283599999999 L 13.397829400000001 -52.517299800000004 13.398131599999999 -52.517315099999998 13.398159400000001 -52.517112099999999 13.3975388 -52.517080700000001 Z",
"type": "public_building"
}
[
{
"address": {
"ISO3166-2-lvl4": "DE-BE",
"borough": "Mitte",
"city": "Berlin",
"country": "Deutschland",
"country_code": "de",
"historic": "Kommandantenhaus",
"house_number": "1",
"neighbourhood": "Friedrichswerder",
"postcode": "10117",
"road": "Unter den Linden",
"suburb": "Mitte"
},
"boundingbox": [
"52.5170798",
"52.5173311",
"13.3975116",
"13.3981577"
],
"class": "historic",
"display_name": "Kommandantenhaus, 1, Unter den Linden, Friedrichswerder, Mitte, Berlin, 10117, Deutschland",
"importance": 0.8135042058306902,
"lat": "52.51720765",
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
"lon": "13.397834399325466",
"osm_id": 15976890,
"osm_type": "way",
"place_id": 108681845,
"svg": "M 13.3975116 -52.5172905 L 13.397549 -52.5170798 13.397715 -52.5170906 13.3977122 -52.5171064 13.3977392 -52.5171086 13.3977417 -52.5170924 13.3979655 -52.5171069 13.3979623 -52.5171233 13.3979893 -52.5171248 13.3979922 -52.5171093 13.3981577 -52.5171203 13.398121 -52.5173311 13.3978115 -52.5173103 Z",
"type": "house"
}
]
```
##### JSON with address details
[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
[https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1](https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1)
```json
{
"address": {
"bakery": "B\u00e4cker Kamps",
"city_district": "Mitte",
"continent": "European Union",
"country": "Deutschland",
"country_code": "de",
"footway": "Bahnsteig U6",
"neighbourhood": "Sprengelkiez",
"postcode": "13353",
"state": "Berlin",
"suburb": "Wedding"
},
"boundingbox": [
"52.5460929870605",
"52.5460968017578",
"13.3591794967651",
"13.3591804504395"
],
"class": "shop",
"display_name": "B\u00e4cker Kamps, Bahnsteig U6, Sprengelkiez, Wedding, Mitte, Berlin, 13353, Deutschland, European Union",
"icon": "https://nominatim.openstreetmap.org/images/mapicons/shopping_bakery.p.20.png",
"importance": 0.201,
"lat": "52.5460941",
"licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
"lon": "13.35918",
"osm_id": "317179427",
"osm_type": "node",
"place_id": "1453068",
"type": "bakery"
}
[
{
"address": {
"ISO3166-2-lvl4": "DE-BE",
"borough": "Mitte",
"city": "Berlin",
"country": "Deutschland",
"country_code": "de",
"neighbourhood": "Sprengelkiez",
"postcode": "13347",
"road": "Lindower Straße",
"shop": "Ditsch",
"suburb": "Wedding"
},
"addresstype": "shop",
"boundingbox": [
"52.5427201",
"52.5427654",
"13.3668619",
"13.3669442"
],
"category": "shop",
"display_name": "Ditsch, Lindower Straße, Sprengelkiez, Wedding, Mitte, Berlin, 13347, Deutschland",
"importance": 9.99999999995449e-06,
"lat": "52.54274275",
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright",
"lon": "13.36690305710228",
"name": "Ditsch",
"osm_id": 437595031,
"osm_type": "way",
"place_id": 204751033,
"place_rank": 30,
"type": "bakery"
}
]
```
##### GeoJSON

View File

@@ -1,35 +1,50 @@
# Status
Useful for checking if the service and database is running. The JSON output also shows
Report on the state of the service and database. Useful for checking if the
service is up and running. The JSON output also reports
when the database was last updated.
## Endpoint
The status API has the following format:
```
https://nominatim.openstreetmap.org/status
```
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/status.php`. This is now deprecated
and will be removed in future versions.
## Parameters
* `format=[text|json]` (defaults to 'text')
The status endpoint takes a single optional parameter:
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `text`, `json` | 'text' |
Selects the output format. See below.
## Output
#### Text format
```
https://nominatim.openstreetmap.org/status.php
```
When everything is okay, a status code 200 is returned and a simple message: `OK`
will return HTTP status code 200 and print `OK`.
On error it will return HTTP status code 500 and print a message, e.g.
On error it will return HTTP status code 500 and print a detailed error message, e.g.
`ERROR: Database connection failed`.
#### JSON format
```
https://nominatim.openstreetmap.org/status.php?format=json
```
Always returns a HTTP code 200, when the status call could be executed.
will return HTTP code 200 and a structure
On success a JSON dictionary with the following structure is returned:
```json
{
@@ -45,8 +60,8 @@ The `software_version` field contains the version of Nominatim used to serve
the API. The `database_version` field contains the version of the data format
in the database.
On error will also return HTTP status code 200 and a structure with error
code and message, e.g.
On error will return a shorter JSON dictionary with the error message
and status only, e.g.
```json
{
@@ -54,13 +69,3 @@ code and message, e.g.
"message": "Database connection failed"
}
```
Possible status codes are
| | message | notes |
|-----|----------------------|---------------------------------------------------|
| 700 | "No database" | connection failed |
| 701 | "Module failed" | database could not load nominatim.so |
| 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
| 703 | "Query failed" | test query against a database table failed |
| 704 | "No value" | test query worked but returned no results |

View File

@@ -0,0 +1,149 @@
# Customizing Per-Country Data
Whenever an OSM is imported into Nominatim, the object is first assigned
a country. Nominatim can use this information to adapt various aspects of
the address computation to the local customs of the country. This section
explains how country assignment works and the principal per-country
localizations.
## Country assignment
Countries are assigned on the basis of country data from the OpenStreetMap
input data itself. Countries are expected to be tagged according to the
[administrative boundary schema](https://wiki.openstreetmap.org/wiki/Tag:boundary%3Dadministrative):
a OSM relation with `boundary=administrative` and `admin_level=2`. Nominatim
uses the country code to distinguish the countries.
If there is no country data available for a point, then Nominatim uses the
fallback data imported from `data/country_osm_grid.sql.gz`. This was computed
from OSM data as well but is guaranteed to cover all countries.
Some OSM objects may also be located outside any country, for example a buoy
in the middle of the ocean. These object do not get any country assigned and
get a default treatment when it comes to localized handling of data.
## Per-country settings
### Global country settings
The main place to configure settings per country is the file
`settings/country_settings.yaml`. This file has one section per country that
is recognised by Nominatim. Each section is tagged with the country code
(in lower case) and contains the different localization information. Only
countries which are listed in this file are taken into account for computations.
For example, the section for Andorra looks like this:
```
partition: 35
languages: ca
names: !include country-names/ad.yaml
postcode:
pattern: "(ddd)"
output: AD\1
```
The individual settings are described below.
#### `partition`
Nominatim internally splits the data into multiple tables to improve
performance. The partition number tells Nominatim into which table to put
the country. This is purely internal management and has no effect on the
output data.
The default is to have one partition per country.
#### `languages`
A comma-separated list of ISO-639 language codes of default languages in the
country. These are the languages used in name tags without a language suffix.
Note that this is not necessarily the same as the list of official languages
in the country. There may be officially recognised languages in a country
which are only ever used in name tags with the appropriate language suffixes.
Conversely, a non-official language may appear a lot in the name tags, for
example when used as an unofficial Lingua Franca.
List the languages in order of frequency of appearance with the most frequently
used language first. It is not recommended to add languages when there are only
very few occurrences.
If only one language is listed, then Nominatim will 'auto-complete' the
language of names without an explicit language-suffix.
#### `names`
List of names of the country and its translations. These names are used as
a baseline. It is always possible to search countries by the given names, no
matter what other names are in the OSM data. They are also used as a fallback
when a needed translation is not available.
!!! Note
The list of names per country is currently fairly large because Nominatim
supports translations in many languages per default. That is why the
name lists have been separated out into extra files. You can find the
name lists in the file `settings/country-names/<country code>.yaml`.
The names section in the main country settings file only refers to these
files via the special `!include` directive.
#### `postcode`
Describes the format of the postcode that is in use in the country.
When a country has no official postcodes, set this to no. Example:
```
ae:
postcode: no
```
When a country has a postcode, you need to state the postcode pattern and
the default output format. Example:
```
bm:
postcode:
pattern: "(ll)[ -]?(dd)"
output: \1 \2
```
The **pattern** is a regular expression that describes the possible formats
accepted as a postcode. The pattern follows the standard syntax for
[regular expressions in Python](https://docs.python.org/3/library/re.html#regular-expression-syntax)
with two extra shortcuts: `d` is a shortcut for a single digit([0-9])
and `l` for a single ASCII letter ([A-Z]).
Use match groups to indicate groups in the postcode that may optionally be
separated with a space or a hyphen.
For example, the postcode for Bermuda above always consists of two letters
and two digits. They may optionally be separated by a space or hyphen. That
means that Nominatim will consider `AB56`, `AB 56` and `AB-56` spelling variants
for one and the same postcode.
Never add the country code in front of the postcode pattern. Nominatim will
automatically accept variants with a country code prefix for all postcodes.
The **output** field is an optional field that describes what the canonical
spelling of the postcode should be. The format is the
[regular expression expand syntax](https://docs.python.org/3/library/re.html#re.Match.expand) referring back to the bracket groups in the pattern.
Most simple postcodes only have one spelling variant. In that case, the
**output** can be omitted. The postcode will simply be used as is.
In the Bermuda example above, the canonical spelling would be to have a space
between letters and digits.
!!! Warning
When your postcode pattern covers multiple variants of the postcode, then
you must explicitly state the canonical output or Nominatim will not
handle the variations correctly.
### Other country-specific configuration
There are some other configuration files where you can set localized settings
according to the assigned country. These are:
* [Place ranking configuration](Ranking.md)
Please see the linked documentation sections for more information.

View File

@@ -1,149 +1,439 @@
## Configuring the Import
Which OSM objects are added to the database and which of the tags are used
can be configured via the import style configuration file. This
is a JSON file which contains a list of rules which are matched against every
tag of every object and then assign the tag its specific role.
In the very first step of a Nominatim import, OSM data is loaded into the
database. Nominatim uses [osm2pgsql](https://osm2pgsql.org) for this task.
It comes with a [flex style](https://osm2pgsql.org/doc/manual.html#the-flex-output)
specifically tailored to filter and convert OSM data into Nominatim's
internal data representation.
The style to use is given by the `NOMINATIM_IMPORT_STYLE` configuration
option. There are a number of default styles, which are explained in detail
in the [Import section](../admin/Import.md#filtering-imported-data). These
standard styles may be referenced by their name.
There are a number of default configurations for the flex style which
result in geocoding databases of different detail. The
[Import section](../admin/Import.md#filtering-imported-data) explains
these default configurations in detail.
You can also create your own custom syle. Put the style file into your
You can also create your own custom style. Put the style file into your
project directory and then set `NOMINATIM_IMPORT_STYLE` to the name of the file.
It is always recommended to start with one of the standard styles and customize
those. You find the standard styles under the name `import-<stylename>.style`
those. You find the standard styles under the name `import-<stylename>.lua`
in the standard Nominatim configuration path (usually `/etc/nominatim` or
`/usr/local/etc/nominatim`).
The remainder of the page describes the format of the file.
The remainder of the page describes how the flex style works and how to
customize it.
### Configuration Rules
### The `flex-base.lua` module
A single rule looks like this:
The core of Nominatim's flex import configuration is the `flex-base` module.
It defines the table layout used by Nominatim and provides standard
implementations for the import callbacks that make it easy to customize
how OSM tags are used by Nominatim.
Every custom style should include this module to make sure that the correct
tables are created. Thus start your custom style as follows:
``` lua
local flex = require('flex-base')
```json
{
"keys" : ["key1", "key2", ...],
"values" : {
"value1" : "prop",
"value2" : "prop1,prop2"
}
}
```
A rule first defines a list of keys to apply the rule to. This is always a list
of strings. The string may have four forms. An empty string matches against
any key. A string that ends in an asterisk `*` is a prefix match and accordingly
matches against any key that starts with the given string (minus the `*`). A
suffix match can be defined similarly with a string that starts with a `*`. Any
other string constitutes an exact match.
The following sections explain how the module can be customized.
The second part of the rules defines a list of values and the properties that
apply to a successful match. Value strings may be either empty, which
means that they match any value, or describe an exact match. Prefix
or suffix matching of values is not possible.
For a rule to match, it has to find a valid combination of keys and values. The
resulting property is that of the matched values.
### Changing the recognized tags
The rules in a configuration file are processed sequentially and the first
match for each tag wins.
If you just want to change which OSM tags are recognized during import,
then there are a number of convenience functions to set the tag lists used
during the processing.
A rule where key and value are the empty string is special. This defines the
fallback when none of the rules match. The fallback is always used as a last
resort when nothing else matches, no matter where the rule appears in the file.
Defining multiple fallback rules is not allowed. What happens in this case,
is undefined.
!!! warning
There are no built-in defaults for the tag lists, so all the functions
need to be called from your style script to fully process the data.
Make sure you start from one of the default style and only modify
the data you are interested in. You can also derive your style from an
existing style by importing the appropriate module, e.g.
`local flex = require('import-street')`.
### Tag Properties
Many of the following functions take _key match lists_. These lists can
contain three kinds of strings to match against tag keys:
A string that ends in an asterisk `*` is a prefix match and accordingly matches
against any key that starts with the given string (minus the `*`).
A suffix match can be defined similarly with a string that starts with a `*`.
Any other string is matched exactly against tag keys.
One or more of the following properties may be given for each tag:
* `main`
#### `set_main_tags()` - principal tags
A principal tag. A new row will be added for the object with key and value
as `class` and `type`.
If a principal or main tag is found on an OSM object, then the object
is included in Nominatim's search index. A single object may also have
multiple main tags. In that case, the object will be included multiple
times in the index, once for each main tag.
* `with_name`
The flex script distinguishes between four types of main tags:
When the tag is a principal tag (`main` property set): only really add a new
row, if there is any name tag found (a reference tag is not sufficient, see
below).
* __always__: a main tag that is used unconditionally
* __named__: consider this main tag only, if the object has a proper name
(a reference is not enough, see below).
* __named_with_key__: consider this main tag only, when the object has
a proper name with a domain prefix. For example, if the main tag is
`bridge=yes`, then it will only be added as an extra row, if there is
a tag `bridge:name[:XXX]` for the same object. If this property is set,
all other names that are not domain-specific are ignored.
* __fallback__: use this main tag only, if there is no other main tag.
Fallback always implied `named`, i.e. fallbacks are only tried for
named objects.
* `with_name_key`
The `set_main_tags()` function takes exactly one table parameter which
defines the keys and key/value combinations to include and the kind of
main tag. Each lua table key defines an OSM tag key. The value may
be a string defining the kind of main key as described above. Then the tag will
be considered a main tag for any possible value. To further restrict
which values are acceptable, give a table with the permitted values
and their kind of main tag. If the table contains a simple value without
key, then this is used as default for values that are not listed.
When the tag is a principal tag (`main` property set): only really add a new
row, if there is also a name tag that matches the key of the principal tag.
For example, if the main tag is `bridge=yes`, then it will only be added as
an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
If this property is set, all other names that are not domain-specific are
ignored.
!!! example
``` lua
local flex = require('import-full')
* `fallback`
flex.set_main_tags{
boundary = {administrative = 'named'},
highway = {'always', street_lamp = 'named'},
landuse = 'fallback'
}
```
When the tag is a principal tag (`main` property set): only really add a new
row, when no other principal tags for this object have been found. Only one
fallback tag can win for an object.
In this example an object with a `boundary` tag will only be included
when it has a value of `administrative`. Objects with `highway` tags are
always included. However when the value is `street_lamp` then the object
must have a name, too. With any other value, the object is included
independently of the name. Finally, if a `landuse` tag is present then
it will be used independely of the concrete value if neither boundary
nor highway tags were found and the object is named.
* `operator`
When the tag is a principal tag (`main` property set): also include the
`operator` tag in the list of names. This is a special construct for an
out-dated tagging practise in OSM. Fuel stations and chain restaurants
in particular used to have the name of the chain tagged as `operator`.
These days the chain can be more commonly found in the `brand` tag but
there is still enough old data around to warrant this special case.
#### `set_prefilters()` - ignoring tags
* `name`
Pre-filtering of tags allows to ignore them for any further processing.
Thus pre-filtering takes precedence over any other tag processing. This is
useful when some specific key/value combinations need to be excluded from
processing. When tags are filtered, they may either be deleted completely
or moved to `extratags`. Extra tags are saved with the object and returned
to the user when requested, but are not used otherwise.
Add tag to the list of names.
`set_prefilters()` takes a table with four optional fields:
* `ref`
* __delete_keys__ is a _key match list_ for tags that should be deleted
* __delete_tags__ contains a table of tag keys pointing to a list of tag
values. Tags with matching key/value pairs are deleted.
* __extra_keys__ is a _key match list_ for tags which should be saved into
extratags
* __extra_tags__ contains a table of tag keys pointing to a list of tag
values. Tags with matching key/value pairs are moved to extratags.
Add tag to the list of names as a reference. At the moment this only means
that the object is not considered to be named for `with_name`.
Key list may contain three kinds of strings:
A string that ends in an asterisk `*` is a prefix match and accordingly matches
against any key that starts with the given string (minus the `*`).
A suffix match can be defined similarly with a string that starts with a `*`.
Any other string is matched exactly against tag keys.
* `address`
!!! example
``` lua
local flex = require('import-full')
Add tag to the list of address tags. If the tag starts with `addr:` or
`is_in:`, then this prefix is cut off before adding it to the list.
flex.set_prefilters{
delete_keys = {'source', 'source:*'},
extra_tags = {amenity = {'yes', 'no'}}
}
flex.set_main_tags{
amenity = 'always'
}
```
* `postcode`
In this example any tags `source` and tags that begin with `source:` are
deleted before any other processing is done. Getting rid of frequent tags
this way can speed up the import.
Add the value as a postcode to the address tags. If multiple tags are
candidate for postcodes, one wins out and the others are dropped.
Tags with `amenity=yes` or `amenity=no` are moved to extratags. Later
all tags with an `amenity` key are made a main tag. This effectively means
that Nominatim will use all amenity tags except for those with value
yes and no.
* `country`
#### `set_name_tags()` - defining names
Add the value as a country code to the address tags. The value must be a
two letter country code, otherwise it is ignored. If there are multiple
tags that match, then one wins out and the others are dropped.
The flex script distinguishes between two kinds of names:
* `house`
* __main__: the primary names make an object fully searchable.
Main tags of type _named_ will only cause the object to be included when
such a primary name is present. Primary names are usually those found
in the `name` tag and its variants.
* __extra__: extra names are still added to the search index but they are
alone not sufficient to make an object named.
If no principle tags can be found for the object, still add the object with
`class`=`place` and `type`=`house`. Use this for address nodes that have no
other function.
`set_name_tags()` takes a table with two optional fields `main` and `extra`.
They take _key match lists_ for main and extra names respectively.
* `interpolation`
!!! example
``` lua
local flex = require('flex-base')
Add this object as an address interpolation (appears as `class`=`place` and
`type`=`houses` in the database).
flex.set_main_tags{highway = {traffic_light = 'named'}}
flex.set_name_tags{main = {'name', 'name:*'},
extra = {'ref'}
}
```
* `extra`
This example creates a search index over traffic lights but will
only include those that have a common name and not those which just
have some reference ID from the city.
Add tag to the list of extra tags.
#### `set_address_tags()` - defining address parts
* `skip`
Address tags will be used to build up the address of an object.
Skip the tag completely. Useful when a custom default fallback is defined
or to define exceptions to rules.
`set_address_tags()` takes a table with arbitrary fields pointing to
_key match lists_. To fields have a special meaning:
A rule can define as many of these properties for one match as it likes. For
example, if the property is `"main,extra"` then the tag will open a new row
but also have the tag appear in the list of extra tags.
* __main__: defines
the tags that make a full address object out of the OSM object. This
is usually the housenumber or variants thereof. If a main address tag
appears, then the object will always be included, if necessary with a
fallback of `place=house`. If the key has a prefix of `addr:` or `is_in:`
this will be stripped.
* __extra__: defines all supplementary tags for addresses, tags like `addr:street`, `addr:city` etc. If the key has a prefix of `addr:` or `is_in:` this will be stripped.
All other fields will be handled as summary fields. If a key matches the
key match list, then its value will be added to the address tags with the
name of the field as key. If multiple tags match, then an arbitrary one
wins.
Country tags are handled slightly special. Only tags with a two-letter code
are accepted, all other values are discarded.
!!! example
``` lua
local flex = require('import-full')
flex.set_address_tags{
main = {'addr:housenumber'},
extra = {'addr:*'},
postcode = {'postal_code', 'postcode', 'addr:postcode'},
country = {'country-code', 'ISO3166-1'}
}
```
In this example all tags which begin with `addr:` will be saved in
the address tag list. If one of the tags is `addr:housenumber`, the
object will fall back to be entered as a `place=house` in the database
unless there is another interested main tag to be found.
Tags with keys `country-code` and `ISO3166-1` are saved with their
value under `country` in the address tag list. The same thing happens
to postcodes, they will always be saved under the key `postcode` thus
normalizing the multitude of keys that are used in the OSM database.
#### `set_unused_handling()` - processing remaining tags
This function defines what to do with tags that remain after all tags
have been classified using the functions above. There are two ways in
which the function can be used:
`set_unused_handling(delete_keys = ..., delete_tags = ...)` deletes all
keys that match the descriptions in the parameters and moves all remaining
tags into the extratags list.
`set_unused_handling(extra_keys = ..., extra_tags = ...)` moves all tags
matching the parameters into the extratags list and then deletes the remaining
tags. For the format of the parameters see the description in `set_prefilters()`
above.
!!! example
``` lua
local flex = require('import-full')
flex.set_address_tags{
main = {'addr:housenumber'},
extra = {'addr:*', 'tiger:county'}
}
flex.set_unused_handling{delete_keys = {'tiger:*'}}
```
In this example all remaining tags except those beginning with `tiger:`
are moved to the extratags list. Note that it is not possible to
already delete the tiger tags with `set_prefilters()` because that
would remove tiger:county before the address tags are processed.
### Customizing osm2pgsql callbacks
osm2pgsql expects the flex style to implement three callbacks, one process
function per OSM type. If you want to implement special handling for
certain OSM types, you can override the default implementations provided
by the flex-base module.
#### Changing the relation types to be handled
The default scripts only allows relations of type `multipolygon`, `boundary`
and `waterway`. To add other types relations, set `RELATION_TYPES` for
the type to the kind of geometry that should be created. The following
kinds of geometries can be used:
* __relation_as_multipolygon__ creates a (Multi)Polygon from the ways in
the relation. If the ways do not form a valid area, then the object is
silently discarded.
* __relation_as_multiline__ creates a (Multi)LineString from the ways in
the relations. Ways are combined as much as possible without any regards
to their order in the relation.
!!! Example
``` lua
local flex = require('import-full')
flex.RELATION_TYPES['site'] = flex.relation_as_multipolygon
```
With this line relations of `type=site` will be included in the index
according to main tags found. This only works when the site relation
resolves to a valid area. Nodes in the site relation are not part of the
geometry.
#### Adding additional logic to processing functions
The default processing functions are also exported by the flex-base module
as `process_node`, `process_way` and `process_relation`. These can be used
to implement your own processing functions with some additional processing
logic.
!!! Example
``` lua
local flex = require('import-full')
function osm2pgsql.process_relation(object)
if object.tags.boundary ~= 'administrative' or object.tags.admin_level ~= '2' then
flex.process_relation(object)
end
end
```
This example discards all country-level boundaries and uses standard
handling for everything else. This can be useful if you want to use
your own custom country boundaries.
### Customizing the main processing function
The main processing function of the flex style can be found in the function
`process_tags`. This function is called for all OSM object kinds and is
responsible for filtering the tags and writing out the rows into Postgresql.
!!! Example
``` lua
local flex = require('import-full')
local original_process_tags = flex.process_tags
function flex.process_tags(o)
if o.object.tags.highway ~= nil and o.object.tags.access == 'no' then
return
end
original_process_tags(o)
end
```
This example shows the most simple customization of the process_tags function.
It simply adds some additional processing before running the original code.
To do that, first save the original function and then overwrite process_tags
from the module. In this example all highways which are not accessible
by anyone will be ignored.
#### The `Place` class
The `process_tags` function receives a Lua object of `Place` type which comes
with some handy functions to collect the data necessary for geocoding and
writing it into the place table. Always use this object to fill the table.
The Place class has some attributes which you may access read-only:
* __object__ is the original OSM object data handed in by osm2pgsql
* __admin_level__ is the content of the admin_level tag, parsed into an
integer and normalized to a value between 0 and 15
* __has_name__ is a boolean indicating if the object has a full name
* __names__ is a table with the collected list of name tags
* __address__ is a table with the collected list of address tags
* __extratags__ is a table with the collected list of additional tags to save
There are a number of functions to fill these fields. All functions expect
a table parameter with fields as indicated in the description.
Many of these functions expect match functions which are described in detail
further below.
* __delete{match=...}__ removes all tags that match the match function given
in _match_.
* __grab_extratags{match=...}__ moves all tags that match the match function
given in _match_ into extratags. Returns the number of tags moved.
* __clean{delete=..., extra=...}__ deletes all tags that match _delete_ and
moves the ones that match _extra_ into extratags
* __grab_address_parts{groups=...}__ moves matching tags into the address table.
_groups_ must be a group match function. Tags of the group `main` and
`extra` are added to the address table as is but with `addr:` and `is_in:`
prefixes removed from the tag key. All other groups are added with the
group name as key and the value from the tag. Multiple values of the same
group overwrite each other. The function returns the number of tags saved
from the main group.
* __grab_main_parts{groups=...}__ moves matching tags into the name table.
_groups_ must be a group match function. If a tags of the group `main` is
present, the object will be marked as having a name. Tags of group `house`
produce a fallback to `place=house`. This fallback is return by the function
if present.
There are two functions to write a row into the place table. Both functions
expect the main tag (key and value) for the row and then use the collected
information from the name, address, extratags etc. fields to complete the row.
They also have a boolean parameter `save_extra_mains` which defines how any
unprocessed tags are handled: when True, the tags will be saved as extratags,
when False, they will be simply discarded.
* __write_row(key, value, save_extra_mains)__ creates a new table row from
the current state of the Place object.
* __write_place(key, value, mtype, save_extra_mains)__ creates a new row
conditionally. When value is nil, the function will attempt to look up the
value in the object tags. If value is still nil or mtype is nil, the row
is ignored. An mtype of `always` will then always write out the row,
a mtype of `named` only, when the object has a full name. When mtype
is `named_with_key`, the function checks for a domain name, i.e. a name
tag prefixed with the name of the main key. Only if at least one is found,
the row will be written. The names are replaced with the domain names found.
#### Match functions
The Place functions usually expect either a _match function_ or a
_group match function_ to find the tags to apply their function to.
The __match function__ is a Lua function which takes two parameters,
key and value, and returns a boolean to indicate that a tag matches. The
flex-base module has a convenience function `tag_match()` to create such a
function. It takes a table with two optional fields: `keys` takes a key match
list (see above), `tags` takes a table with keys that point to a list of
possible values, thus defining key/value matches.
The __group match function__ is a Lua function which also takes two parameters,
key and value, and returns a string indicating to which group or type they
belong to. The `tag_group()` can be used to create such a function. It expects
a table where the group names are the keys and the values are a key match list.
### Using the gazetteer output of osm2pgsql
Nominatim still allows you to configure the gazetteer output to remain
backwards compatible with older imports. It will be automatically used
when the style file name ends in `.style`. For documentation of the
old import style, please refer to the documentation of older releases
of Nominatim. Do not use the gazetteer output for new imports. There is no
guarantee that new versions of Nominatim are fully compatible with the
gazetteer output.
### Changing the Style of Existing Databases

View File

@@ -0,0 +1,49 @@
## Importance
Search requests can yield multiple results which match equally well with
the original query. In such case Nominatim needs to order the results
according to a different criterion: importance. This is a measure for how
likely it is that a user will search for a given place. This section explains
the sources Nominatim uses for computing importance of a place and how to
customize them.
### How importance is computed
The main value for importance is derived from page ranking values for Wikipedia
pages for a place. For places that do not have their own
Wikipedia page, a formula is used that derives a static importance from the
places [search rank](../customize/Ranking.md#search-rank).
In a second step, a secondary importance value is added which is meant to
represent how well-known the general area is where the place is located. It
functions as a tie-breaker between places with very similar primary
importance values.
nominatim.org has preprocessed importance tables for the
[primary Wikipedia rankings](https://nominatim.org/data/wikimedia-importance.sql.gz)
and for a secondary importance based on the number of tile views on openstreetmap.org.
### Customizing secondary importance
The secondary importance is implemented as a simple
[Postgis raster](https://postgis.net/docs/raster.html) table, where Nominatim
looks up the value for the coordinates of the centroid of a place. You can
provide your own secondary importance raster in form of an SQL file named
`secondary_importance.sql.gz` in your project directory.
The SQL file needs to drop and (re)create a table `secondary_importance` which
must as a minimum contain a column `rast` of type `raster`. The raster must
be in EPSG:4326 and contain 16bit unsigned ints
(`raster_constraint_pixel_types(rast) = '{16BUI}'). Any other columns in the
table will be ignored. You must furthermore create an index as follows:
```
CREATE INDEX ON secondary_importance USING gist(ST_ConvexHull(gist))
```
The following raster2pgsql command will create a table that conforms to
the requirements:
```
raster2pgsql -I -C -Y -d -t 128x128 input.tiff public.secondary_importance
```

55
docs/customize/SQLite.md Normal file
View File

@@ -0,0 +1,55 @@
A Nominatim database can be converted into an SQLite database and used as
a read-only source for geocoding queries. This sections describes how to
create and use an SQLite database.
!!! danger
This feature is in an experimental state at the moment. Use at your own
risk.
## Installing prerequisites
To use a SQLite database, you need to install:
* SQLite (>= 3.30)
* Spatialite (> 5.0.0)
On Ubuntu/Debian, you can run:
sudo apt install sqlite3 libsqlite3-mod-spatialite libspatialite7
## Creating a new SQLite database
Nominatim cannot import directly into SQLite database. Instead you have to
first create a geocoding database in PostgreSQL by running a
[regular Nominatim import](../admin/Import.md).
Once this is done, the database can be converted to SQLite with
nominatim convert -o mydb.sqlite
This will create a database where all geocoding functions are available.
Depending on what functions you need, the database can be made smaller:
* `--without-reverse` omits indexes only needed for reverse geocoding
* `--without-search` omit tables and indexes used for forward search
* `--without-details` leaves out extra information only available in the
details API
## Using an SQLite database
Once you have created the database, you can use it by simply pointing the
database DSN to the SQLite file:
NOMINATIM_DATABASE_DSN=sqlite:dbname=mydb.sqlite
Please note that SQLite support is only available for the Python frontend. To
use the test server with an SQLite database, you therefore need to switch
the frontend engine:
nominatim serve --engine falcon
You need to install falcon or starlette for this, depending on which engine
you choose.
The CLI query commands and the library interface already use the new Python
frontend and therefore work right out of the box.

View File

@@ -91,7 +91,7 @@ The option is only used by the Legacy tokenizer and ignored otherwise.
| -------------- | --------------------------------------------------- |
| **Description:** | Tokenizer used for normalizing and parsing queries and names |
| **Format:** | string |
| **Default:** | legacy |
| **Default:** | icu |
| **After Changes:** | cannot be changed after import |
Sets the tokenizer type to use for the import. For more information on
@@ -189,7 +189,7 @@ will be used.
| **Description:** | Enable searching for Tiger house number data |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim --refresh --functions` |
| **After Changes:** | run `nominatim refresh --functions` |
When this setting is enabled, search and reverse queries also take data
from [Tiger house number data](Tiger.md) into account.
@@ -202,7 +202,7 @@ from [Tiger house number data](Tiger.md) into account.
| **Description:** | Enable searching in external house number tables |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim --refresh --functions` |
| **After Changes:** | run `nominatim refresh --functions` |
| **Comment:** | Do not use. |
When this setting is enabled, search queries also take data from external
@@ -552,6 +552,8 @@ used.
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
This feature is currently undocumented and potentially broken.
@@ -564,6 +566,7 @@ This feature is currently undocumented and potentially broken.
| **Format:** | integer |
| **Default:** | 500 |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
This setting defines the threshold over which a name is no longer considered
as rare. When searching for places with rare names, only the name is used
@@ -604,6 +607,88 @@ with a single query.
Setting this parameter to 0 disables polygon output completely.
#### NOMINATIM_SEARCH_WITHIN_COUNTRIES
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Disable search for elements that are not in the country grid |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
Enable to search elements just within countries.
When enabled, if, despite not finding a point within the static grid of countries, it
finds a geometry of a region, do not return the geometry.
Return "Unable to geocode" instead.
#### NOMINATIM_SERVE_LEGACY_URLS
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable serving via URLs with a .php suffix |
| **Format:** | boolean |
| **Default:** | yes |
| **Comment:** | Python frontend only |
When enabled, then endpoints are reachable as `/<name>` as well as `/<name>.php`.
This can be useful when you want to be backwards-compatible with previous
versions of Nominatim.
#### NOMINATIM_API_POOL_SIZE
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Number of parallel database connections per worker |
| **Format:** | number |
| **Default:** | 10 |
| **Comment:** | Python frontend only |
Sets the maximum number of database connections available for a single instance
of Nominatim. When configuring the maximum number of connections that your
PostgreSQL database can handle, you need at least
`NOMINATIM_API_POOL_SIZE` * `<number of configured workers>` connections.
For configuring the number of workers, refer to the section about
[Deploying the Python frontend](../admin/Deployment-Python.md).
#### NOMINATIM_QUERY_TIMEOUT
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Timeout for SQL queries to the database |
| **Format:** | number (seconds) |
| **Default:** | 10 |
| **Comment:** | Python frontend only |
When this timeout is set, then all SQL queries that run longer than the
specified numbers of seconds will be cancelled and the user receives a
timeout exceptions. Users of the API see a 503 HTTP error.
The timeout does ont apply when using the
[low-level DB access](../library/Low-Level-DB-Access.md)
of the library. A timeout can be manually set, if required.
#### NOMINATIM_REQUEST_TIMEOUT
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Timeout for search queries |
| **Format:** | number (seconds) |
| **Default:** | 60 |
| **Comment:** | Python frontend only |
When this timeout is set, a search query will finish sending queries
to the database after the timeout has passed and immediately return the
results gathered so far.
Note that under high load you may observe that users receive different results
than usual without seeing an error. This may cause some confusion.
### Logging Settings
#### NOMINATIM_LOG_DB
@@ -643,7 +728,24 @@ The entries in the log file have the following format:
<request time> <execution time in s> <number of results> <type> "<query string>"
Request time is the time when the request was started. The execution time is
given in ms and corresponds to the time the query took executing in PHP.
given in seconds and corresponds to the time the query took executing in PHP.
type contains the name of the endpoint used.
Can be used as the same time as NOMINATIM_LOG_DB.
#### NOMINATIM_DEBUG_SQL
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable printing of raw SQL by SQLAlchemy |
| **Format:** | boolean |
| **Default:** | no |
| **Comment:** | **For developers only.** |
This settings enables
[SQL debugging](https://docs.sqlalchemy.org/en/20/core/engines.html#dbengine-logging)
by SQLAlchemy. This can be helpful when debugging some bugs with internal
query handling. It should only be used together with the CLI query functions.
Enabling it for server mode may have unintended consequences. Use the `debug`
parameter instead, which prints information on how the search is executed
including SQL statements.

View File

@@ -5,22 +5,22 @@ address set to complement the OSM house number data in the US. You can add
TIGER data to your own Nominatim instance by following these steps. The
entire US adds about 10GB to your database.
1. Get preprocessed TIGER 2021 data:
1. Get preprocessed TIGER data:
cd $PROJECT_DIR
wget https://nominatim.org/data/tiger2021-nominatim-preprocessed.csv.tar.gz
wget https://nominatim.org/data/tiger-nominatim-preprocessed-latest.csv.tar.gz
2. Import the data into your Nominatim database:
nominatim add-data --tiger-data tiger2021-nominatim-preprocessed.csv.tar.gz
nominatim add-data --tiger-data tiger-nominatim-preprocessed-latest.csv.tar.gz
3. Enable use of the Tiger data in your `.env` by adding:
3. Enable use of the Tiger data in your existing `.env` file by adding:
echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
4. Apply the new settings:
nominatim refresh --functions
nominatim refresh --functions --website
See the [TIGER-data project](https://github.com/osm-search/TIGER-data) for more

View File

@@ -19,7 +19,22 @@ they can be configured.
The legacy tokenizer implements the analysis algorithms of older Nominatim
versions. It uses a special Postgresql module to normalize names and queries.
This tokenizer is currently the default.
This tokenizer is automatically installed and used when upgrading an older
database. It should not be used for new installations anymore.
### Compiling the PostgreSQL module
The tokeinzer needs a special C module for PostgreSQL which is not compiled
by default. If you need the legacy tokenizer, compile Nominatim as follows:
```
mkdir build
cd build
cmake -DBUILD_MODULE=on
make
```
### Enabling the tokenizer
To enable the tokenizer add the following line to your project configuration:
@@ -47,6 +62,7 @@ normalization functions are hard-coded.
The ICU tokenizer uses the [ICU library](http://site.icu-project.org/) to
normalize names and queries. It also offers configurable decomposition and
abbreviation handling.
This tokenizer is currently the default.
To enable the tokenizer add the following line to your project configuration:
@@ -86,7 +102,7 @@ Here is an example configuration file:
``` yaml
normalization:
- ":: lower ()"
- "ß > 'ss'" # German szet is unimbigiously equal to double ss
- "ß > 'ss'" # German szet is unambiguously equal to double ss
transliteration:
- !include /etc/nominatim/icu-rules/extended-unicode-to-asccii.yaml
- ":: Ascii ()"
@@ -99,6 +115,9 @@ token-analysis:
- words:
- road -> rd
- bridge -> bdge,br,brdg,bri,brg
mutations:
- pattern: 'ä'
replacements: ['ä', 'ae']
```
The configuration file contains four sections:
@@ -109,7 +128,7 @@ The configuration file contains four sections:
The normalization and transliteration sections each define a set of
ICU rules that are applied to the names.
The **normalisation** rules are applied after sanitation. They should remove
The **normalization** rules are applied after sanitation. They should remove
any information that is not relevant for search at all. Usual rules to be
applied here are: lower-casing, removing of special characters, cleanup of
spaces.
@@ -157,28 +176,66 @@ The following is a list of sanitizers that are shipped with Nominatim.
##### split-name-list
::: nominatim.tokenizer.sanitizers.split_name_list
selection:
options:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
##### strip-brace-terms
::: nominatim.tokenizer.sanitizers.strip_brace_terms
selection:
options:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
##### tag-analyzer-by-language
::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
selection:
options:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
##### clean-housenumbers
::: nominatim.tokenizer.sanitizers.clean_housenumbers
options:
members: False
heading_level: 6
docstring_section_style: spacy
##### clean-postcodes
::: nominatim.tokenizer.sanitizers.clean_postcodes
options:
members: False
heading_level: 6
docstring_section_style: spacy
##### clean-tiger-tags
::: nominatim.tokenizer.sanitizers.clean_tiger_tags
options:
members: False
heading_level: 6
docstring_section_style: spacy
#### delete-tags
::: nominatim.tokenizer.sanitizers.delete_tags
options:
members: False
heading_level: 6
docstring_section_style: spacy
#### tag-japanese
::: nominatim.tokenizer.sanitizers.tag_japanese
options:
members: False
heading_level: 6
docstring_section_style: spacy
#### Token Analysis
@@ -196,21 +253,25 @@ by a sanitizer (see for example the
The token-analysis section contains the list of configured analyzers. Each
analyzer must have an `id` parameter that uniquely identifies the analyzer.
The only exception is the default analyzer that is used when no special
analyzer was selected.
analyzer was selected. There are analysers with special ids:
* '@housenumber'. If an analyzer with that name is present, it is used
for normalization of house numbers.
* '@potcode'. If an analyzer with that name is present, it is used
for normalization of postcodes.
Different analyzer implementations may exist. To select the implementation,
the `analyzer` parameter must be set. Currently there is only one implementation
`generic` which is described in the following.
the `analyzer` parameter must be set. The different implementations are
described in the following.
##### Generic token analyzer
The generic analyzer is able to create variants from a list of given
abbreviation and decomposition replacements. It takes one optional parameter
`variants` which lists the replacements to apply. If the section is
omitted, then the generic analyzer becomes a simple analyzer that only
applies the transliteration.
The generic analyzer `generic` is able to create variants from a list of given
abbreviation and decomposition replacements and introduce spelling variations.
The variants section defines lists of replacements which create alternative
###### Variants
The optional 'variants' section defines lists of replacements which create alternative
spellings of a name. To create the variants, a name is scanned from left to
right and the longest matching replacement is applied until the end of the
string is reached.
@@ -296,6 +357,48 @@ decomposition has an effect here on the source as well. So a rule
means that for a word like `hauptstrasse` four variants are created:
`hauptstrasse`, `haupt strasse`, `hauptstr` and `haupt str`.
###### Mutations
The 'mutation' section in the configuration describes an additional set of
replacements to be applied after the variants have been computed.
Each mutation is described by two parameters: `pattern` and `replacements`.
The pattern must contain a single regular expression to search for in the
variant name. The regular expressions need to follow the syntax for
[Python regular expressions](file:///usr/share/doc/python3-doc/html/library/re.html#regular-expression-syntax).
Capturing groups are not permitted.
`replacements` must contain a list of strings that the pattern
should be replaced with. Each occurrence of the pattern is replaced with
all given replacements. Be mindful of combinatorial explosion of variants.
###### Modes
The generic analyser supports a special mode `variant-only`. When configured
then it consumes the input token and emits only variants (if any exist). Enable
the mode by adding:
```
mode: variant-only
```
to the analyser configuration.
##### Housenumber token analyzer
The analyzer `housenumbers` is purpose-made to analyze house numbers. It
creates variants with optional spaces between numbers and letters. Thus,
house numbers of the form '3 a', '3A', '3-A' etc. are all considered equivalent.
The analyzer cannot be customized.
##### Postcode token analyzer
The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
a 'lookup' variant of the token, which produces variants with optional
spaces. Use together with the clean-postcodes sanitizer.
The analyzer cannot be customized.
### Reconfiguration
Changing the configuration after the import is currently not possible, although

View File

@@ -119,7 +119,7 @@ to compute the address relations between places. These tables are partitioned.
Each country is assigned a partition number in the country_name table (see
below) and the data is then split between a set of tables, one for each
partition. Note that Nominatim still manually manages partitioned tables.
Native support for partitions in PostgreSQL only became useable with version 13.
Native support for partitions in PostgreSQL only became usable with version 13.
It will be a little while before Nominatim drops support for older versions.
![address tables](address-tables.svg)
@@ -155,9 +155,9 @@ Nominatim also creates a number of static tables at import:
default languages and saves the assignment of countries to partitions.
* `country_osm_grid` provides a fallback for country geometries
## Auxilary data tables
## Auxiliary data tables
Finally there are some table for auxillary data:
Finally there are some table for auxiliary data:
* `location_property_tiger` - saves housenumber from the Tiger import. Its
layout is similar to that of `location_propoerty_osmline`.

View File

@@ -1,6 +1,6 @@
# Setting up Nominatim for Development
This chapter gives an overview how to set up Nominatim for developement
This chapter gives an overview how to set up Nominatim for development
and how to run tests.
!!! Important
@@ -30,15 +30,25 @@ unit tests (using PHPUnit for PHP code and pytest for Python code).
It has the following additional requirements:
* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
* [phpunit](https://phpunit.de) >= 7.3
* [phpunit](https://phpunit.de) (9.5 is known to work)
* [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
* [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
* [Pylint](https://pylint.org/) (CI always runs the latest version from pip)
* [mypy](http://mypy-lang.org/) (plus typing information for external libs)
* [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9)
* [pytest](https://pytest.org)
* [pytest-asyncio](https://pytest-asyncio.readthedocs.io)
For testing the Python search frontend, you need to install extra dependencies
depending on your choice of webserver framework:
* [httpx](https://www.python-httpx.org/) (starlette only)
* [asgi-lifespan](https://github.com/florimondmanca/asgi-lifespan) (starlette only)
The documentation is built with mkdocs:
* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
* [mkdocstrings](https://mkdocstrings.github.io/)
* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
* [mkdocstrings-python](https://mkdocstrings.github.io/python/)
### Installing prerequisites on Ubuntu/Debian
@@ -50,9 +60,12 @@ To install all necessary packages run:
```sh
sudo apt install php-cgi phpunit php-codesniffer \
python3-pip python3-setuptools python3-dev pylint
python3-pip python3-setuptools python3-dev
pip3 install --user behave mkdocs mkdocstrings pytest
pip3 install --user behave mkdocs mkdocstrings pytest pytest-asyncio pylint \
mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \
types-ujson types-requests types-Pygments typing-extensions\
httpx asgi-lifespan
```
The `mkdocs` executable will be located in `.local/bin`. You may have to add

View File

@@ -0,0 +1,220 @@
# Writing custom sanitizer and token analysis modules for the ICU tokenizer
The [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer) provides a
highly customizable method to pre-process and normalize the name information
of the input data before it is added to the search index. It comes with a
selection of sanitizers and token analyzers which you can use to adapt your
installation to your needs. If the provided modules are not enough, you can
also provide your own implementations. This section describes the API
of sanitizers and token analysis.
!!! warning
This API is currently in early alpha status. While this API is meant to
be a public API on which other sanitizers and token analyzers may be
implemented, it is not guaranteed to be stable at the moment.
## Using non-standard sanitizers and token analyzers
Sanitizer names (in the `step` property) and token analysis names (in the
`analyzer`) may refer to externally supplied modules. There are two ways
to include external modules: through a library or from the project directory.
To include a module from a library, use the absolute import path as name and
make sure the library can be found in your PYTHONPATH.
To use a custom module without creating a library, you can put the module
somewhere in your project directory and then use the relative path to the
file. Include the whole name of the file including the `.py` ending.
## Custom sanitizer modules
A sanitizer module must export a single factory function `create` with the
following signature:
``` python
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]
```
The function receives the custom configuration for the sanitizer and must
return a callable (function or class) that transforms the name and address
terms of a place. When a place is processed, then a `ProcessInfo` object
is created from the information that was queried from the database. This
object is sequentially handed to each configured sanitizer, so that each
sanitizer receives the result of processing from the previous sanitizer.
After the last sanitizer is finished, the resulting name and address lists
are forwarded to the token analysis module.
Sanitizer functions are instantiated once and then called for each place
that is imported or updated. They don't need to be thread-safe.
If multi-threading is used, each thread creates their own instance of
the function.
### Sanitizer configuration
::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
options:
heading_level: 6
### The main filter function of the sanitizer
The filter function receives a single object of type `ProcessInfo`
which has with three members:
* `place: PlaceInfo`: read-only information about the place being processed.
See PlaceInfo below.
* `names: List[PlaceName]`: The current list of names for the place.
* `address: List[PlaceName]`: The current list of address names for the place.
While the `place` member is provided for information only, the `names` and
`address` lists are meant to be manipulated by the sanitizer. It may add and
remove entries, change information within a single entry (for example by
adding extra attributes) or completely replace the list with a different one.
#### PlaceInfo - information about the place
::: nominatim.data.place_info.PlaceInfo
options:
heading_level: 6
#### PlaceName - extended naming information
::: nominatim.data.place_name.PlaceName
options:
heading_level: 6
### Example: Filter for US street prefixes
The following sanitizer removes the directional prefixes from street names
in the US:
``` python
import re
def _filter_function(obj):
if obj.place.country_code == 'us' \
and obj.place.rank_address >= 26 and obj.place.rank_address <= 27:
for name in obj.names:
name.name = re.sub(r'^(north|south|west|east) ',
'',
name.name,
flags=re.IGNORECASE)
def create(config):
return _filter_function
```
This is the most simple form of a sanitizer module. If defines a single
filter function and implements the required `create()` function by returning
the filter.
The filter function first checks if the object is interesting for the
sanitizer. Namely it checks if the place is in the US (through `country_code`)
and it the place is a street (a `rank_address` of 26 or 27). If the
conditions are met, then it goes through all available names and
removes any leading directional prefix using a simple regular expression.
Save the source code in a file in your project directory, for example as
`us_streets.py`. Then you can use the sanitizer in your `icu_tokenizer.yaml`:
``` yaml
...
sanitizers:
- step: us_streets.py
...
```
!!! warning
This example is just a simplified show case on how to create a sanitizer.
It is not really read for real-world use: while the sanitizer would
correctly transform `West 5th Street` into `5th Street`. it would also
shorten a simple `North Street` to `Street`.
For more sanitizer examples, have a look at the sanitizers provided by Nominatim.
They can be found in the directory
[`nominatim/tokenizer/sanitizers`](https://github.com/osm-search/Nominatim/tree/master/nominatim/tokenizer/sanitizers).
## Custom token analysis module
::: nominatim.tokenizer.token_analysis.base.AnalysisModule
options:
heading_level: 6
::: nominatim.tokenizer.token_analysis.base.Analyzer
options:
heading_level: 6
### Example: Creating acronym variants for long names
The following example of a token analysis module creates acronyms from
very long names and adds them as a variant:
``` python
class AcronymMaker:
""" This class is the actual analyzer.
"""
def __init__(self, norm, trans):
self.norm = norm
self.trans = trans
def get_canonical_id(self, name):
# In simple cases, the normalized name can be used as a canonical id.
return self.norm.transliterate(name.name).strip()
def compute_variants(self, name):
# The transliterated form of the name always makes up a variant.
variants = [self.trans.transliterate(name)]
# Only create acronyms from very long words.
if len(name) > 20:
# Take the first letter from each word to form the acronym.
acronym = ''.join(w[0] for w in name.split())
# If that leds to an acronym with at least three letters,
# add the resulting acronym as a variant.
if len(acronym) > 2:
# Never forget to transliterate the variants before returning them.
variants.append(self.trans.transliterate(acronym))
return variants
# The following two functions are the module interface.
def configure(rules, normalizer, transliterator):
# There is no configuration to parse and no data to set up.
# Just return an empty configuration.
return None
def create(normalizer, transliterator, config):
# Return a new instance of our token analysis class above.
return AcronymMaker(normalizer, transliterator)
```
Given the name `Trans-Siberian Railway`, the code above would return the full
name `Trans-Siberian Railway` and the acronym `TSR` as variant, so that
searching would work for both.
## Sanitizers vs. Token analysis - what to use for variants?
It is not always clear when to implement variations in the sanitizer and
when to write a token analysis module. Just take the acronym example
above: it would also have been possible to write a sanitizer which adds the
acronym as an additional name to the name list. The result would have been
similar. So which should be used when?
The most important thing to keep in mind is that variants created by the
token analysis are only saved in the word lookup table. They do not need
extra space in the search index. If there are many spelling variations, this
can mean quite a significant amount of space is saved.
When creating additional names with a sanitizer, these names are completely
independent. In particular, they can be fed into different token analysis
modules. This gives a much greater flexibility but at the price that the
additional names increase the size of the search index.

View File

@@ -78,7 +78,7 @@ The inheritance is computed in the data preparation step.
The prepared place information is handed to the tokenizer next. This is a
Python module responsible for processing the names from both name and address
terms and building up the word index from them. The process is explained in
more detail in the [Tokenizer chapter](Tokenizer.md).
more detail in the [Tokenizer chapter](Tokenizers.md).
### Address processing

View File

@@ -10,7 +10,7 @@ There are two kind of tests in this test suite. There are functional tests
which test the API interface using a BDD test framework and there are unit
tests for specific PHP functions.
This test directory is sturctured as follows:
This test directory is structured as follows:
```
-+- bdd Functional API tests
@@ -22,8 +22,8 @@ This test directory is sturctured as follows:
|
+- php PHP unit tests
+- python Python unit tests
+- scenes Geometry test data
+- testdb Base data for generating API test database
+- testdata Additional test data used by unit tests
```
## PHP Unit Tests (`test/php`)
@@ -84,6 +84,8 @@ The tests can be configured with a set of environment variables (`behave -D key=
* `TEST_DB` - name of test database (db tests)
* `API_TEST_DB` - name of the database containing the API test data (api tests)
* `API_TEST_FILE` - OSM file to be imported into the API test database (api tests)
* `API_ENGINE` - webframe to use for running search queries, same values as
`nominatim serve --engine` parameter
* `DB_HOST` - (optional) hostname of database host
* `DB_PORT` - (optional) port of database on host
* `DB_USER` - (optional) username of database login
@@ -120,7 +122,7 @@ and compromises the following data:
API tests should only be testing the functionality of the website PHP code.
Most tests should be formulated as BDD DB creation tests (see below) instead.
#### Code Coverage
#### Code Coverage (PHP engine only)
The API tests also support code coverage tests. You need to install
[PHP_CodeCoverage](https://github.com/sebastianbergmann/php-code-coverage).
@@ -153,7 +155,3 @@ needs superuser rights for postgres.
These tests check that data is imported correctly into the place table. They
use the same template database as the DB Creation tests, so the same remarks apply.
Note that most testing of the gazetteer output of osm2pgsql is done in the tests
of osm2pgsql itself. The BDD tests are just there to ensure compatibility of
the osm2pgsql and Nominatim code.

View File

@@ -93,7 +93,7 @@ for a custom tokenizer implementation.
Nominatim expects two files for a tokenizer:
* `nominiatim/tokenizer/<NAME>_tokenizer.py` containing the Python part of the
* `nominatim/tokenizer/<NAME>_tokenizer.py` containing the Python part of the
implementation
* `lib-php/tokenizer/<NAME>_tokenizer.php` with the PHP part of the
implementation
@@ -105,7 +105,7 @@ functions. By convention, these should be placed in `lib-sql/tokenizer`.
If the tokenizer has a default configuration file, this should be saved in
the `settings/<NAME>_tokenizer.<SUFFIX>`.
### Configuration and Persistance
### Configuration and Persistence
Tokenizers may define custom settings for their configuration. All settings
must be prefixed with `NOMINATIM_TOKENIZER_`. Settings may be transient or
@@ -134,14 +134,14 @@ All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
and implement the abstract functions defined there.
::: nominatim.tokenizer.base.AbstractTokenizer
rendering:
heading_level: 4
options:
heading_level: 6
### Python Analyzer Class
::: nominatim.tokenizer.base.AbstractAnalyzer
rendering:
heading_level: 4
options:
heading_level: 6
### PL/pgSQL Functions
@@ -189,6 +189,28 @@ a house number token text. If a place has multiple house numbers they must
be listed with a semicolon as delimiter. Must be NULL when the place has no
house numbers.
```sql
FUNCTION token_is_street_address(info JSONB) RETURNS BOOLEAN
```
Return true if this is an object that should be parented against a street.
Only relevant for objects with address rank 30.
```sql
FUNCTION token_has_addr_street(info JSONB) RETURNS BOOLEAN
```
Return true if there are street names to match against for finding the
parent of the object.
```sql
FUNCTION token_has_addr_place(info JSONB) RETURNS BOOLEAN
```
Return true if there are place names to match against for finding the
parent of the object.
```sql
FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[]) RETURNS BOOLEAN
```
@@ -245,11 +267,11 @@ Currently, tokenizers are encouraged to make sure that matching works against
both the search token list and the match token list.
```sql
FUNCTION token_normalized_postcode(postcode TEXT) RETURNS TEXT
FUNCTION token_get_postcode(info JSONB) RETURNS TEXT
```
Return the normalized version of the given postcode. This function must return
the same value as the Python function `AbstractAnalyzer->normalize_postcode()`.
Return the postcode for the object, if any exists. The postcode must be in
the form that should also be presented to the end-user.
```sql
FUNCTION token_strip_info(info JSONB) RETURNS JSONB

View File

@@ -13,7 +13,7 @@ More details in [osm-search/country-grid-data](https://github.com/osm-search/cou
## US Census TIGER
For the United States you can choose to import additonal street-level data.
For the United States you can choose to import additional street-level data.
The data isn't mixed into OSM data but queried as fallback when no OSM
result can be found.

View File

@@ -18,7 +18,7 @@ elseif (has 'addr:place'?) then (yes)
**with same name**;
kill
else (no)
:add addr:place to adress;
:add addr:place to address;
:**Use closest place**\n**rank 16 to 25**;
kill
endif

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 9.8 KiB

After

Width:  |  Height:  |  Size: 9.8 KiB

View File

@@ -2,6 +2,10 @@
display: none!important
}
.wy-nav-content {
max-width: 900px!important
}
table {
margin-bottom: 12pt
}
@@ -14,10 +18,19 @@ th {
background-color: #eee;
}
/* Indentation for mkdocstrings.
div.doc-contents:not(.first) {
padding-left: 25px;
border-left: 4px solid rgba(230, 230, 230);
margin-bottom: 60px;
}*/
.doc-object h6 {
margin-bottom: 0.8em;
font-size: 130%;
}
.doc-object {
margin-bottom: 1.3em;
}
.doc-children .doc-contents {
margin-left: 3em;
}
.md-footer__inner {
display: none;
}

View File

@@ -1,10 +1,15 @@
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and
address and to generate synthetic addresses of OSM points (reverse geocoding).
It has also limited capability to search features by their type
(pubs, hotels, churches, etc).
This guide comes in four parts:
This guide comes in five parts:
* __[API reference](api/Overview.md)__ for users of Nominatim
* __[Administration Guide](admin/Installation.md)__ for those who want
to install their own Nominatim server
* __[Customization Guide](customize/Overview.md)__ for those who want to
adapt their own installation to their special requirements
* __[Library Guide](library/Getting-Started.md)__ for Python developers who
want to use Nominatim as a library in their project
* __[Developer's Guide](develop/overview.md)__ for developers of the software

View File

@@ -0,0 +1,31 @@
# Configuration
When using Nominatim through the library, it can be configured in exactly
the same way as when running as a service. This means that you should have
created a [project directory](../admin/Import.md#creating-the-project-directory)
which contains all files belonging to the Nominatim instance. It can also contain
an `.env` file with configuration options. Setting configuration parameters
via environment variables works as well.
Configuration options are resolved in the following order:
* from the OS environment (or the dictionary given in `environ`,
(see NominatimAPI.md#nominatim.api.core.NominatimAPI.__init__)
* from the .env file in the project directory of the installation
* from the default installation in the configuration directory
For more information on configuration via dotenv and a list of possible
configuration parameters, see the [Configuration page](../customize/Settings.md).
## `Configuration` class
::: nominatim.config.Configuration
options:
members:
- get_bool
- get_int
- get_str_list
- get_path
heading_level: 6
show_signature_annotations: True

View File

@@ -0,0 +1,248 @@
# Getting Started
The Nominatim search frontend can directly be used as a Python library in
scripts and applications. When you have imported your own Nominatim database,
then it is no longer necessary to run a full web service for it and access
the database through http requests. There are
also less constraints on the kinds of data that can be accessed. The library
allows to get access to more detailed information about the objects saved
in the database.
!!! danger
The library interface is currently in an experimental stage. There might
be some smaller adjustments to the public interface until the next version.
The library also misses a proper installation routine, so some manipulation
of the PYTHONPATH is required. At the moment, use is only recommended for
developers with some experience in Python.
## Installation
To use the Nominatim library, you need access to a local Nominatim database.
Follow the [installation](../admin/Installation.md) and
[import](../admin/Import.md) instructions to set up your database.
It is not yet possible to install it in the usual way via pip or inside a
virtualenv. To get access to the library you need to set an appropriate
`PYTHONPATH`. With the default installation, the python library can be found
under `/usr/local/share/nominatim/lib-python`. If you have installed
Nominatim under a different prefix, adapt the `/usr/local/` part accordingly.
You can also point the `PYTHONPATH` to the Nominatim source code.
### A simple search example
To query the Nominatim database you need to first set up a connection. This
is done by creating an Nominatim API object. This object exposes all the
search functions of Nominatim that are also known from its web API.
This code snippet implements a simple search for the town of 'Brugge':
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
async def search(query):
api = napi.NominatimAPIAsync(Path('.'))
return await api.search(query)
results = asyncio.run(search('Brugge'))
if not results:
print('Cannot find Brugge')
else:
print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
api = napi.NominatimAPI(Path('.'))
results = api.search('Brugge')
if not results:
print('Cannot find Brugge')
else:
print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
```
The Nominatim library is designed around
[asyncio](https://docs.python.org/3/library/asyncio.html). `NominatimAPIAsync`
provides you with an interface of coroutines.
If you have many requests to make, coroutines can speed up your applications
significantly.
For smaller scripts there is also a synchronous wrapper around the API. By
using `NominatimAPI`, you get exactly the same interface using classic functions.
The examples in this chapter will always show-case both
implementations. The documentation itself will usually refer only to
'Nominatim API class' when both flavours are meant. If a functionality is
available only for the synchronous or asynchronous version, this will be
explicitly mentioned.
### Defining which database to use
The [Configuration](../admin/Import.md#configuration-setup-in-env)
section explains how Nominatim is configured using the
[dotenv](https://github.com/theskumar/python-dotenv) library.
The same configuration mechanism is used with the
Nominatim API library. You should therefore be sure you are familiar with
the section.
The constructor of the 'Nominatim API class' takes one mandatory parameter:
the path to the [project directory](../admin/Import.md#creating-the-project-directory).
You should have set up this directory as part of the Nominatim import.
Any configuration found in the `.env` file in this directory will automatically
used.
Yo may also configure Nominatim be setting environment variables.
Normally, Nominatim will check the operating system environment. This can be
overwritten by giving the constructor a dictionary of configuration parameters.
Let us look up 'Brugge' in the special database named 'belgium' instead of the
standard 'nominatim' database:
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
config_params = {
'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
}
async def search(query):
api = napi.NominatimAPIAsync(Path('.'), environ=config_params)
return await api.search(query)
results = asyncio.run(search('Brugge'))
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
config_params = {
'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
}
api = napi.NominatimAPI(Path('.'), environ=config_params)
results = api.search('Brugge')
```
### Presenting results to humans
All search functions return the raw results from the database. There is no
full human-readable label. To create such a label, you need two things:
* the address details of the place
* adapt the result to the language you wish to use for display
Again searching for 'Brugge', this time with a nicely formatted result:
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
async def search(query):
api = napi.NominatimAPIAsync(Path('.'))
return await api.search(query, address_details=True)
results = asyncio.run(search('Brugge'))
locale = napi.Locales(['fr', 'en'])
for i, result in enumerate(results):
address_parts = result.address_rows.localize(locale)
print(f"{i + 1}. {', '.join(address_parts)}")
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
api = napi.NominatimAPI(Path('.'))
results = api.search('Brugge', address_details=True)
locale = napi.Locales(['fr', 'en'])
for i, result in enumerate(results):
address_parts = result.address_rows.localize(locale)
print(f"{i + 1}. {', '.join(address_parts)}")
```
To request information about the address of a result, add the optional
parameter 'address_details' to your search:
``` python
>>> results = api.search('Brugge', address_details=True)
```
An additional field `address_rows` will set in results that are returned.
It contains a list of all places that make up the address of the place. For
simplicity, this includes name and house number of the place itself. With
the names in this list it is possible to create a human-readable description
of the result. To do that, you first need to decide in which language the
results should be presented. As with the names in the result itself, the
places in `address_rows` contain all possible name translation for each row.
The library has a helper class `Locale` which helps extracting a name of a
place in the preferred language. It takes a single parameter with a list
of language codes in the order of preference. So
``` python
locale = napi.Locale(['fr', 'en'])
```
creates a helper class that returns the name preferably in French. If that is
not possible, it tries English and eventually falls back to the default `name`
or `ref`.
The `Locale` object can be applied to a name dictionary to return the best-matching
name out of it:
``` python
>>> print(locale.display_name(results[0].names))
'Brugges'
```
The `address_row` field has a helper function to apply the function to all
its members and save the result in the `local_name` field. It also returns
all the localized names as a convenient simple list. This list can be used
to create a human-readable output:
``` python
>>> address_parts = results[0].address_rows.localize(locale)
>>> print(', '.join(address_parts))
Bruges, Flandre-Occidentale, Flandre, Belgique
```
This is a fairly simple way to create a human-readable description. The
place information in `address_rows` contains further information about each
place. For example, which OSM `adlin_level` was used, what category the place
belongs to or what rank Nominatim has assigned. Use this to adapt the output
to local address formats.
For more information on address rows, see
[detailed address description](Result-Handling.md#detailed-address-description).

View File

@@ -0,0 +1,62 @@
# Input Parameter Types
This page describes in more detail some of the input parameter types used
in the query functions of the API object.
## Place identification
The [details](NominatimAPI.md#nominatim.api.core.NominatimAPI.details) and
[lookup](NominatimAPI.md#nominatim.api.core.NominatimAPI.lookup) functions
require references to places in the database. Below the possible
types for place identification are listed. All types are dataclasses.
### PlaceID
::: nominatim.api.PlaceID
options:
heading_level: 6
### OsmID
::: nominatim.api.OsmID
options:
heading_level: 6
## Geometry types
::: nominatim.api.GeometryFormat
options:
heading_level: 6
members_order: source
## Geometry input
### Point
::: nominatim.api.Point
options:
heading_level: 6
show_signature_annotations: True
### Bbox
::: nominatim.api.Bbox
options:
heading_level: 6
show_signature_annotations: True
members_order: source
group_by_category: False
## Layers
Layers allow to restrict the search result to thematic groups. This is
orthogonal to restriction by address ranks, which groups places by their
geographic extent.
::: nominatim.api.DataLayer
options:
heading_level: 6
members_order: source

View File

@@ -0,0 +1,56 @@
# Low-level connections
The `NominatimAPIAsync` class allows to directly access the underlying
database connection to explore the raw data. Nominatim uses
[SQLAlchemy](https://docs.sqlalchemy.org/) for building queries. Please
refer to the documentation of the library to understand how to write SQL.
To get access to a search connection, use the `begin()` function of your
API object. This returns a `SearchConnection` object described below
wrapped in a context manager. Its
`t` property has definitions for all Nominatim search tables. For an
overview of available tables, refer to the
[Development Layout](../develop/Database-Layout.md) in in the development
chapter. Note that only tables that are needed for search are accessible
as SQLAlchemy tables.
!!! warning
The database layout is not part of the API definition and may change
without notice. If you play with the low-level access functions, you
need to be prepared for such changes.
Here is a simple example, which prints how many places are available in
the placex table:
```
import asyncio
from pathlib import Path
import sqlalchemy as sa
from nominatim.api import NominatimAPIAsync
async def print_table_size():
api = NominatimAPIAsync(Path('.'))
async with api.begin() as conn:
cnt = await conn.scalar(sa.select(sa.func.count()).select_from(conn.t.placex))
print(f'placex table has {cnt} rows.')
asyncio.run(print_table_size())
```
!!! warning
Low-level connections may only be used to read data from the database.
Do not use it to add or modify data or you might break Nominatim's
normal functions.
## SearchConnection class
::: nominatim.api.SearchConnection
options:
members:
- scalar
- execute
- get_class_table
- get_db_property
- get_property
heading_level: 6

View File

@@ -0,0 +1,36 @@
# The Nominatim API classes
The API classes are the core object of the search library. Always instantiate
one of these classes first. The API classes are **not threadsafe**. You need
to instantiate a separate instance for each thread.
### NominatimAPI
::: nominatim.api.NominatimAPI
options:
members:
- __init__
- config
- close
- status
- details
- lookup
- reverse
- search
- search_address
- search_category
heading_level: 6
group_by_category: False
### NominatimAPIAsync
::: nominatim.api.NominatimAPIAsync
options:
members:
- __init__
- setup_database
- close
- begin
heading_level: 6
group_by_category: False

View File

@@ -0,0 +1,58 @@
# Result handling
The search functions of the Nominatim API always return a result object
with the raw information about the place that is available in the
database. This section discusses data types used in the results and utility
functions that allow further processing of the results.
## Result fields
### Sources
Nominatim takes the result data from multiple sources. The `source_table` field
in the result describes, from which source the result was retrieved.
::: nominatim.api.SourceTable
options:
heading_level: 6
members_order: source
### Detailed address description
When the `address_details` parameter is set, then functions return not
only information about the result place but also about the place that
make up the address. This information is almost always required when you
want to present the user with a human-readable description of the result.
See also [Localization](#localization) below.
The address details are available in the `address_rows` field as a ordered
list of `AddressLine` objects with the country information last. The list also
contains the result place itself and some artificial entries, for example,
for the house number or the country code. This makes processing and creating
a full address easier.
::: nominatim.api.AddressLine
options:
heading_level: 6
members_order: source
### Detailed search terms
The `details` function can return detailed information about which search terms
may be used to find a place, when the `keywords` parameter is set. Search
terms are split into terms for the name of the place and search terms for
its address.
::: nominatim.api.WordInfo
options:
heading_level: 6
## Localization
Results are always returned with the full list of available names.
### Locale
::: nominatim.api.Locales
options:
heading_level: 6

View File

@@ -1,9 +1,13 @@
site_name: Nominatim Documentation
theme: readthedocs
site_name: Nominatim Manual
theme:
name: material
features:
- navigation.tabs
copyright: Copyright &copy; Nominatim developer community
docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
site_url: https://nominatim.org
repo_url: https://github.com/openstreetmap/Nominatim
pages:
nav:
- 'Introduction' : 'index.md'
- 'API Reference':
- 'Overview': 'api/Overview.md'
@@ -18,7 +22,8 @@ pages:
- 'Basic Installation': 'admin/Installation.md'
- 'Import' : 'admin/Import.md'
- 'Update' : 'admin/Update.md'
- 'Deploy' : 'admin/Deployment.md'
- 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
- 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
- 'Maintenance' : 'admin/Maintenance.md'
@@ -28,27 +33,39 @@ pages:
- 'Overview': 'customize/Overview.md'
- 'Import Styles': 'customize/Import-Styles.md'
- 'Configuration Settings': 'customize/Settings.md'
- 'Per-Country Data': 'customize/Country-Settings.md'
- 'Place Ranking' : 'customize/Ranking.md'
- 'Importance' : 'customize/Importance.md'
- 'Tokenizers' : 'customize/Tokenizers.md'
- 'Special Phrases': 'customize/Special-Phrases.md'
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
- 'External data: Postcodes': 'customize/Postcodes.md'
- 'Conversion to SQLite': 'customize/SQLite.md'
- 'Library Guide':
- 'Getting Started': 'library/Getting-Started.md'
- 'Nominatim API class': 'library/NominatimAPI.md'
- 'Configuration': 'library/Configuration.md'
- 'Input Parameter Types': 'library/Input-Parameter-Types.md'
- 'Result Handling': 'library/Result-Handling.md'
- 'Low-level DB Access': 'library/Low-Level-DB-Access.md'
- 'Developers Guide':
- 'Architecture Overview' : 'develop/overview.md'
- 'Database Layout' : 'develop/Database-Layout.md'
- 'Indexing' : 'develop/Indexing.md'
- 'Tokenizers' : 'develop/Tokenizers.md'
- 'Custom modules for ICU tokenizer': 'develop/ICU-Tokenizer-Modules.md'
- 'Setup for Development' : 'develop/Development-Environment.md'
- 'Testing' : 'develop/Testing.md'
- 'External Data Sources': 'develop/data-sources.md'
- 'Appendix':
- 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
- 'Installation on CentOS 8' : 'appendix/Install-on-Centos-8.md'
- 'Installation on Ubuntu 18' : 'appendix/Install-on-Ubuntu-18.md'
- 'Installation on Ubuntu 20' : 'appendix/Install-on-Ubuntu-20.md'
- 'Installation on Ubuntu 22' : 'appendix/Install-on-Ubuntu-22.md'
markdown_extensions:
- codehilite
- admonition
- pymdownx.superfences
- pymdownx.tabbed:
alternate_style: true
- def_list
- toc:
permalink:
@@ -58,6 +75,7 @@ plugins:
- mkdocstrings:
handlers:
python:
rendering:
show_source: false
show_signature_annotations: false
paths: ["${PROJECT_SOURCE_DIR}"]
options:
show_source: False
show_bases: False

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -84,6 +92,10 @@ class AddressDetails
|| $aLine['class'] == 'place')
) {
$aAddress[$sTypeLabel] = $sName;
if (!empty($aLine['name'])) {
$this->addSubdivisionCode($aAddress, $aLine['admin_level'], $aLine['name']);
}
}
}
@@ -166,4 +178,14 @@ class AddressDetails
{
return $this->aAddressLines;
}
private function addSubdivisionCode(&$aAddress, $iAdminLevel, $nameDetails)
{
if (is_string($nameDetails)) {
$nameDetails = json_decode('{' . str_replace('"=>"', '":"', $nameDetails) . '}', true);
}
if (!empty($nameDetails['ISO3166-2'])) {
$aAddress["ISO3166-2-lvl$iAdminLevel"] = $nameDetails['ISO3166-2'];
}
}
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim\ClassTypes;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -30,20 +38,25 @@ class DB
// https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
try {
$conn = new \PDO($this->sDSN, null, null, $aConnOptions);
$this->connection = new \PDO($this->sDSN, null, null, $aConnOptions);
} catch (\PDOException $e) {
$sMsg = 'Failed to establish database connection:' . $e->getMessage();
throw new \Nominatim\DatabaseError($sMsg, 500, null, $e->getMessage());
}
$conn->exec("SET DateStyle TO 'sql,european'");
$conn->exec("SET client_encoding TO 'utf-8'");
$this->connection->exec("SET DateStyle TO 'sql,european'");
$this->connection->exec("SET client_encoding TO 'utf-8'");
// Disable JIT and parallel workers. They interfere badly with search SQL.
$this->connection->exec('SET max_parallel_workers_per_gather TO 0');
if ($this->getPostgresVersion() >= 11) {
$this->connection->exec('SET jit_above_cost TO -1');
}
$iMaxExecution = ini_get('max_execution_time');
if ($iMaxExecution > 0) {
$conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
$this->connection->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
}
$this->connection = $conn;
return true;
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -127,7 +135,7 @@ class Debug
public static function printSQL($sSQL)
{
echo '<p><tt><font color="#aaa">'.$sSQL.'</font></tt></p>'."\n";
echo '<p><tt><b>'.date('c').'</b> <font color="#aaa">'.htmlspecialchars($sSQL, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401).'</font></tt></p>'."\n";
}
private static function outputVar($mVar, $sPreNL)
@@ -170,11 +178,12 @@ class Debug
}
if (is_string($mVar)) {
echo "'$mVar'";
return strlen($mVar) + 2;
$sOut = "'$mVar'";
} else {
$sOut = (string)$mVar;
}
echo (string)$mVar;
return strlen((string)$mVar);
echo htmlspecialchars($sOut, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401);
return strlen($sOut);
}
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -95,7 +103,7 @@ class Geocode
}
$this->iFinalLimit = $iLimit;
$this->iLimit = $iLimit + min($iLimit, 10);
$this->iLimit = $iLimit + max($iLimit, 10);
}
public function setFeatureType($sFeatureType)
@@ -182,7 +190,7 @@ class Geocode
$this->bFallback = $oParams->getBool('fallback', $this->bFallback);
// List of excluded Place IDs - used for more acurate pageing
// List of excluded Place IDs - used for more accurate pageing
$sExcluded = $oParams->getStringList('exclude_place_ids');
if ($sExcluded) {
foreach ($sExcluded as $iExcludedPlaceID) {
@@ -609,16 +617,15 @@ class Geocode
}
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aReverseGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) {
$aReverseGroupedSearches[$aSearch->getRank()] = array();
if (!isset($aGroupedSearches[$aSearch->getRank()])) {
$aGroupedSearches[$aSearch->getRank()] = array();
}
$aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch;
$aGroupedSearches[$aSearch->getRank()][] = $aSearch;
}
}
$aGroupedSearches = $aReverseGroupedSearches;
ksort($aGroupedSearches);
}
} else {
@@ -836,7 +843,9 @@ class Geocode
$aResult['importance'] = 0.001;
$aResult['foundorder'] = $aResult['addressimportance'];
} else {
$aResult['importance'] = max(0.001, $aResult['importance']);
if ($aResult['importance'] == 0) {
$aResult['importance'] = 0.0001;
}
$aResult['importance'] *= $this->viewboxImportanceFactor(
$aResult['lon'],
$aResult['lat']
@@ -865,7 +874,7 @@ class Geocode
$iCountWords = 0;
$sAddress = $aResult['langaddress'];
foreach ($aRecheckWords as $i => $sWord) {
if (stripos($sAddress, $sWord)!==false) {
if (grapheme_stripos($sAddress, $sWord)!==false) {
$iCountWords++;
if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) {
$iCountWords += 0.1;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -14,7 +22,10 @@ class ParameterParser
public function getBool($sName, $bDefault = false)
{
if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
if (!isset($this->aParams[$sName])
|| !is_string($this->aParams[$sName])
|| strlen($this->aParams[$sName]) == 0
) {
return $bDefault;
}
@@ -23,7 +34,7 @@ class ParameterParser
public function getInt($sName, $bDefault = false)
{
if (!isset($this->aParams[$sName])) {
if (!isset($this->aParams[$sName]) || is_array($this->aParams[$sName])) {
return $bDefault;
}
@@ -36,7 +47,7 @@ class ParameterParser
public function getFloat($sName, $bDefault = false)
{
if (!isset($this->aParams[$sName])) {
if (!isset($this->aParams[$sName]) || is_array($this->aParams[$sName])) {
return $bDefault;
}
@@ -49,7 +60,10 @@ class ParameterParser
public function getString($sName, $bDefault = false)
{
if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
if (!isset($this->aParams[$sName])
|| !is_string($this->aParams[$sName])
|| strlen($this->aParams[$sName]) == 0
) {
return $bDefault;
}
@@ -58,11 +72,14 @@ class ParameterParser
public function getSet($sName, $aValues, $sDefault = false)
{
if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
if (!isset($this->aParams[$sName])
|| !is_string($this->aParams[$sName])
|| strlen($this->aParams[$sName]) == 0
) {
return $sDefault;
}
if (!in_array($this->aParams[$sName], $aValues)) {
if (!in_array($this->aParams[$sName], $aValues, true)) {
userError("Parameter '$sName' must be one of: ".join(', ', $aValues));
}
@@ -106,21 +123,27 @@ class ParameterParser
}
foreach ($aLanguages as $sLanguage => $fLanguagePref) {
$aLangPrefOrder['name:'.$sLanguage] = 'name:'.$sLanguage;
$this->addNameTag($aLangPrefOrder, 'name:'.$sLanguage);
}
$aLangPrefOrder['name'] = 'name';
$aLangPrefOrder['brand'] = 'brand';
$this->addNameTag($aLangPrefOrder, 'name');
$this->addNameTag($aLangPrefOrder, 'brand');
foreach ($aLanguages as $sLanguage => $fLanguagePref) {
$aLangPrefOrder['official_name:'.$sLanguage] = 'official_name:'.$sLanguage;
$aLangPrefOrder['short_name:'.$sLanguage] = 'short_name:'.$sLanguage;
$this->addNameTag($aLangPrefOrder, 'official_name:'.$sLanguage);
$this->addNameTag($aLangPrefOrder, 'short_name:'.$sLanguage);
}
$aLangPrefOrder['official_name'] = 'official_name';
$aLangPrefOrder['short_name'] = 'short_name';
$aLangPrefOrder['ref'] = 'ref';
$aLangPrefOrder['type'] = 'type';
$this->addNameTag($aLangPrefOrder, 'official_name');
$this->addNameTag($aLangPrefOrder, 'short_name');
$this->addNameTag($aLangPrefOrder, 'ref');
$this->addNameTag($aLangPrefOrder, 'type');
return $aLangPrefOrder;
}
private function addNameTag(&$aLangPrefOrder, $sTag)
{
$aLangPrefOrder[$sTag] = $sTag;
$aLangPrefOrder['_place_'.$sTag] = '_place_'.$sTag;
}
public function hasSetAny($aParamNames)
{
foreach ($aParamNames as $sName) {

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -24,7 +32,7 @@ class Phrase
}
/**
* Get the orginal phrase of the string.
* Get the original phrase of the string.
*/
public function getPhrase()
{

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -78,7 +86,7 @@ class PlaceLookup
($this->bIncludePolygonAsSVG ? 1 : 0);
if ($iWantedTypes > CONST_PolygonOutput_MaximumTypes) {
if (CONST_PolygonOutput_MaximumTypes) {
userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polgyon output option');
userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polygon output option');
} else {
userError('Polygon output is disabled');
}
@@ -179,12 +187,12 @@ class PlaceLookup
return null;
}
$aResults = $this->lookup(array($iPlaceID => new Result($iPlaceID)));
$aResults = $this->lookup(array($iPlaceID => new Result($iPlaceID)), 0, 30, true);
return empty($aResults) ? null : reset($aResults);
}
public function lookup($aResults, $iMinRank = 0, $iMaxRank = 30)
public function lookup($aResults, $iMinRank = 0, $iMaxRank = 30, $bAllowLinked = false)
{
Debug::newFunction('Place lookup');
@@ -239,7 +247,9 @@ class PlaceLookup
if ($this->sAllowedTypesSQLList) {
$sSQL .= 'AND placex.class in '.$this->sAllowedTypesSQLList;
}
$sSQL .= ' AND linked_place_id is null ';
if (!$bAllowLinked) {
$sSQL .= ' AND linked_place_id is null ';
}
$sSQL .= ' GROUP BY ';
$sSQL .= ' osm_type, ';
$sSQL .= ' osm_id, ';
@@ -340,7 +350,9 @@ class PlaceLookup
$sSQL .= ' null::text AS extra_place ';
$sSQL .= ' FROM (';
$sSQL .= ' SELECT place_id, '; // interpolate the Tiger housenumbers here
$sSQL .= ' ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) AS centroid, ';
$sSQL .= ' CASE WHEN startnumber != endnumber';
$sSQL .= ' THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float)';
$sSQL .= ' ELSE ST_LineInterpolatePoint(linegeo, 0.5) END AS centroid, ';
$sSQL .= ' parent_place_id, ';
$sSQL .= ' housenumber_for_place';
$sSQL .= ' FROM (';
@@ -397,7 +409,7 @@ class PlaceLookup
$sSQL .= ' CASE '; // interpolate the housenumbers here
$sSQL .= ' WHEN startnumber != endnumber ';
$sSQL .= ' THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) ';
$sSQL .= ' ELSE ST_LineInterpolatePoint(linegeo, 0.5) ';
$sSQL .= ' ELSE linegeo ';
$sSQL .= ' END as centroid, ';
$sSQL .= ' parent_place_id, ';
$sSQL .= ' housenumber_for_place ';
@@ -435,18 +447,14 @@ class PlaceLookup
if ($this->bExtraTags) {
if ($aPlace['extra']) {
$aPlace['sExtraTags'] = json_decode($aPlace['extra']);
$aPlace['sExtraTags'] = json_decode($aPlace['extra'], true);
} else {
$aPlace['sExtraTags'] = (object) array();
}
}
if ($this->bNameDetails) {
if ($aPlace['names']) {
$aPlace['sNameDetails'] = json_decode($aPlace['names']);
} else {
$aPlace['sNameDetails'] = (object) array();
}
$aPlace['sNameDetails'] = $this->extractNames($aPlace['names']);
}
$aPlace['addresstype'] = ClassTypes\getLabelTag(
@@ -469,6 +477,33 @@ class PlaceLookup
return $aResults;
}
private function extractNames($sNames)
{
if (!$sNames) {
return (object) array();
}
$aFullNames = json_decode($sNames, true);
$aNames = array();
foreach ($aFullNames as $sKey => $sValue) {
if (strpos($sKey, '_place_') === 0) {
$sSubKey = substr($sKey, 7);
if (array_key_exists($sSubKey, $aFullNames)) {
$aNames[$sKey] = $sValue;
} else {
$aNames[$sSubKey] = $sValue;
}
} else {
$aNames[$sKey] = $sValue;
}
}
return $aNames;
}
/* returns an array which will contain the keys
* aBoundingBox
* and may also contain one or more of the keys
@@ -479,8 +514,6 @@ class PlaceLookup
* lat
* lon
*/
public function getOutlines($iPlaceID, $fLon = null, $fLat = null, $fRadius = null, $fLonReverse = null, $fLatReverse = null)
{
@@ -491,12 +524,7 @@ class PlaceLookup
// Get the bounding box and outline polygon
$sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
if ($fLonReverse != null && $fLatReverse != null) {
$sSQL .= ' ST_Y(closest_point) as centrelat,';
$sSQL .= ' ST_X(closest_point) as centrelon,';
} else {
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
}
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
$sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
$sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
if ($this->bIncludePolygonAsGeoJSON) {
@@ -511,19 +539,21 @@ class PlaceLookup
if ($this->bIncludePolygonAsText) {
$sSQL .= ',ST_AsText(geometry) as astext';
}
$sSQL .= ' FROM (SELECT place_id';
if ($fLonReverse != null && $fLatReverse != null) {
$sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
$sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
$sFrom .=' ELSE centroid END AS closest_point';
$sFrom .= ' from placex where place_id = '.$iPlaceID.') as plx';
$sSQL .= ',CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
$sSQL .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
$sSQL .=' ELSE centroid END AS centroid';
} else {
$sFrom = ' from placex where place_id = '.$iPlaceID;
$sSQL .= ',centroid';
}
if ($this->fPolygonSimplificationThreshold > 0) {
$sSQL .= ' from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'.$sFrom.') as plx';
$sSQL .= ',ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry';
} else {
$sSQL .= $sFrom;
$sSQL .= ',geometry';
}
$sSQL .= ' FROM placex where place_id = '.$iPlaceID.') as plx';
$aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -32,10 +40,10 @@ class ReverseGeocode
9 => 12,
10 => 17, // City
11 => 17,
12 => 18, // Town / Village
13 => 18,
14 => 22, // Suburb
15 => 22,
12 => 18, // Town
13 => 19, // Village
14 => 22, // Neighbourhood
15 => 25, // Locality
16 => 26, // major street
17 => 27, // minor street
18 => 30, // or >, Building
@@ -56,12 +64,15 @@ class ReverseGeocode
{
Debug::newFunction('lookupInterpolation');
$sSQL = 'SELECT place_id, parent_place_id, 30 as rank_search,';
$sSQL .= ' ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fraction,';
$sSQL .= ' startnumber, endnumber, interpolationtype,';
$sSQL .= ' (CASE WHEN endnumber != startnumber';
$sSQL .= ' THEN (endnumber - startnumber) * ST_LineLocatePoint(linegeo,'.$sPointSQL.')';
$sSQL .= ' ELSE startnumber END) as fhnr,';
$sSQL .= ' startnumber, endnumber, step,';
$sSQL .= ' ST_Distance(linegeo,'.$sPointSQL.') as distance';
$sSQL .= ' FROM location_property_osmline';
$sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', linegeo, '.$fSearchDiam.')';
$sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
$sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
$sSQL .= ' and parent_place_id != 0';
$sSQL .= ' ORDER BY distance ASC limit 1';
Debug::printSQL($sSQL);
@@ -74,6 +85,11 @@ class ReverseGeocode
protected function lookupLargeArea($sPointSQL, $iMaxRank)
{
$sCountryCode = $this->getCountryCode($sPointSQL);
if (CONST_Search_WithinCountries and $sCountryCode == null) {
return null;
}
if ($iMaxRank > 4) {
$aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
if ($aPlace) {
@@ -83,12 +99,12 @@ class ReverseGeocode
// If no polygon which contains the searchpoint is found,
// searches in the country_osm_grid table for a polygon.
return $this->lookupInCountry($sPointSQL, $iMaxRank);
return $this->lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode);
}
protected function lookupInCountry($sPointSQL, $iMaxRank)
protected function getCountryCode($sPointSQL)
{
Debug::newFunction('lookupInCountry');
Debug::newFunction('getCountryCode');
// searches for polygon in table country_osm_grid which contains the searchpoint
// and searches for the nearest place node to the searchpoint in this polygon
$sSQL = 'SELECT country_code FROM country_osm_grid';
@@ -100,8 +116,12 @@ class ReverseGeocode
null,
'Could not determine country polygon containing the point.'
);
Debug::printVar('Country code', $sCountryCode);
return $sCountryCode;
}
protected function lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode)
{
Debug::newFunction('lookupInCountry');
if ($sCountryCode) {
if ($iMaxRank > 4) {
// look for place nodes with the given country code
@@ -111,12 +131,13 @@ class ReverseGeocode
$sSQL .= ' FROM placex';
$sSQL .= ' WHERE osm_type = \'N\'';
$sSQL .= ' AND country_code = \''.$sCountryCode.'\'';
$sSQL .= ' AND rank_search < 26 '; // needed to select right index
$sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
$sSQL .= ' AND rank_search between 5 and ' .min(25, $iMaxRank);
$sSQL .= ' AND class = \'place\' AND type != \'postcode\'';
$sSQL .= ' AND type != \'postcode\'';
$sSQL .= ' AND name IS NOT NULL ';
$sSQL .= ' and indexed_status = 0 and linked_place_id is null';
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, 1.8)) p ';
$sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
$sSQL .= ') as a ';
$sSQL .= 'WHERE distance <= reverse_place_diameter(rank_search)';
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' LIMIT 1';
@@ -178,14 +199,16 @@ class ReverseGeocode
$sSQL .= '(select place_id, parent_place_id, rank_address, rank_search, country_code, geometry';
$sSQL .= ' FROM placex';
$sSQL .= ' WHERE ST_GeometryType(geometry) in (\'ST_Polygon\', \'ST_MultiPolygon\')';
$sSQL .= ' AND rank_address Between 5 AND ' .$iMaxRank;
// Ensure that query planner doesn't use the index on rank_search.
$sSQL .= ' AND coalesce(rank_search, 0) between 5 and ' .$iMaxRank;
$sSQL .= ' AND rank_address between 4 and 25'; // needed for index selection
$sSQL .= ' AND geometry && '.$sPointSQL;
$sSQL .= ' AND type != \'postcode\' ';
$sSQL .= ' AND name is not null';
$sSQL .= ' AND indexed_status = 0 and linked_place_id is null';
$sSQL .= ' ORDER BY rank_address DESC LIMIT 50 ) as a';
$sSQL .= ' WHERE ST_CONTAINS(geometry, '.$sPointSQL.' )';
$sSQL .= ' ORDER BY rank_address DESC LIMIT 1';
$sSQL .= ' ORDER BY rank_search DESC LIMIT 50 ) as a';
$sSQL .= ' WHERE ST_Contains(geometry, '.$sPointSQL.' )';
$sSQL .= ' ORDER BY rank_search DESC LIMIT 1';
Debug::printSQL($sSQL);
$aPoly = $this->oDB->getRow($sSQL, null, 'Could not determine polygon containing the point.');
@@ -197,29 +220,24 @@ class ReverseGeocode
$iRankSearch = $aPoly['rank_search'];
$iPlaceID = $aPoly['place_id'];
if ($iRankAddress != $iMaxRank) {
if ($iRankSearch != $iMaxRank) {
$sSQL = 'SELECT place_id FROM ';
$sSQL .= '(SELECT place_id, rank_search, country_code, geometry,';
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
$sSQL .= ' FROM placex';
$sSQL .= ' WHERE osm_type = \'N\'';
// using rank_search because of a better differentiation
// for place nodes at rank_address 16
$sSQL .= ' AND rank_search > '.$iRankSearch;
$sSQL .= ' AND rank_search <= '.$iMaxRank;
$sSQL .= ' AND rank_search < 26 '; // needed to select right index
$sSQL .= ' AND rank_address > 0';
$sSQL .= ' AND class = \'place\'';
$sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
$sSQL .= ' AND type != \'postcode\'';
$sSQL .= ' AND name IS NOT NULL ';
$sSQL .= ' AND indexed_status = 0 AND linked_place_id is null';
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, reverse_place_diameter('.$iRankSearch.'::smallint))';
$sSQL .= ' ORDER BY distance ASC,';
$sSQL .= ' rank_address DESC';
$sSQL .= ' limit 500) as a';
$sSQL .= ' WHERE ST_CONTAINS((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
$sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' limit 100) as a';
$sSQL .= ' WHERE ST_Contains((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
$sSQL .= ' AND distance <= reverse_place_diameter(rank_search)';
$sSQL .= ' ORDER BY distance ASC, rank_search DESC';
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' LIMIT 1';
Debug::printSQL($sSQL);
@@ -255,7 +273,7 @@ class ReverseGeocode
// starts if the search is on POI or street level,
// searches for the nearest POI or street,
// if a street is found and a POI is searched for,
// the nearest POI which the found street is a parent of is choosen.
// the nearest POI which the found street is a parent of is chosen.
$sSQL = 'select place_id,parent_place_id,rank_address,country_code,';
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
$sSQL .= ' FROM ';
@@ -319,9 +337,9 @@ class ReverseGeocode
&& $this->iMaxRank >= 28
) {
$sSQL = 'SELECT place_id,parent_place_id,30 as rank_search,';
$sSQL .= 'ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fraction,';
$sSQL .= 'ST_distance('.$sPointSQL.', linegeo) as distance,';
$sSQL .= 'startnumber,endnumber,interpolationtype';
$sSQL .= ' (endnumber - startnumber) * ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fhnr,';
$sSQL .= ' startnumber, endnumber, step,';
$sSQL .= ' ST_Distance('.$sPointSQL.', linegeo) as distance';
$sSQL .= ' FROM location_property_tiger WHERE parent_place_id = '.$oResult->iId;
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', linegeo, 0.001)';
$sSQL .= ' ORDER BY distance ASC limit 1';
@@ -333,7 +351,11 @@ class ReverseGeocode
if ($aPlaceTiger) {
$aPlace = $aPlaceTiger;
$oResult = new Result($aPlaceTiger['place_id'], Result::TABLE_TIGER);
$oResult->iHouseNumber = closestHouseNumber($aPlaceTiger);
$iRndNum = max(0, round($aPlaceTiger['fhnr'] / $aPlaceTiger['step']) * $aPlaceTiger['step']);
$oResult->iHouseNumber = $aPlaceTiger['startnumber'] + $iRndNum;
if ($oResult->iHouseNumber > $aPlaceTiger['endnumber']) {
$oResult->iHouseNumber = $aPlaceTiger['endnumber'];
}
$iRankAddress = 30;
}
}
@@ -345,7 +367,7 @@ class ReverseGeocode
// We can't reliably go from the closest street to an
// interpolation line because the closest interpolation
// may have a different street segments as a parent.
// Therefore allow an interpolation line to take precendence
// Therefore allow an interpolation line to take precedence
// even when the street is closer.
$fDistance = $iRankAddress < 28 ? 0.001 : $aPlace['distance'];
}
@@ -355,7 +377,11 @@ class ReverseGeocode
if ($aHouse) {
$oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE);
$oResult->iHouseNumber = closestHouseNumber($aHouse);
$iRndNum = max(0, round($aHouse['fhnr'] / $aHouse['step']) * $aHouse['step']);
$oResult->iHouseNumber = $aHouse['startnumber'] + $iRndNum;
if ($oResult->iHouseNumber > $aHouse['endnumber']) {
$oResult->iHouseNumber = $aHouse['endnumber'];
}
$aPlace = $aHouse;
}
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -228,7 +236,7 @@ class SearchDescription
* Add the given full-word token to the list of terms to search for in the
* name.
*
* @param interger iId ID of term to add.
* @param integer iId ID of term to add.
* @param bool bRareName True if the term is infrequent enough to not
* require other constraints for efficient search.
*/
@@ -256,8 +264,10 @@ class SearchDescription
{
if (empty($this->aName)) {
$this->bNameNeedsAddress = $bNeedsAddress;
} elseif ($bSearchable && count($this->aName) >= 2) {
$this->bNameNeedsAddress = false;
} else {
$this->bNameNeedsAddress |= $bNeedsAddress;
$this->bNameNeedsAddress &= $bNeedsAddress;
}
if ($bSearchable) {
$this->aName[$iId] = $iId;
@@ -377,7 +387,7 @@ class SearchDescription
*
* @return mixed[] An array with two fields: IDs contains the list of
* matching place IDs and houseNumber the houseNumber
* if appicable or -1 if not.
* if applicable or -1 if not.
*/
public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
{
@@ -412,28 +422,6 @@ class SearchDescription
$iLimit
);
// Now search for housenumber, if housenumber provided. Can be zero.
if (($this->sHouseNumber || $this->sHouseNumber === '0') && !empty($aResults)) {
$aHnResults = $this->queryHouseNumber($oDB, $aResults);
// Downgrade the rank of the street results, they are missing
// the housenumber. Also drop POI places (rank 30) here, they
// cannot be a parent place and therefore must not be shown
// as a result for a search with a missing housenumber.
foreach ($aResults as $oRes) {
if ($oRes->iAddressRank < 28) {
if ($oRes->iAddressRank >= 26) {
$oRes->iResultRank++;
} else {
$oRes->iResultRank += 2;
}
$aHnResults[$oRes->iId] = $oRes;
}
}
$aResults = $aHnResults;
}
// finally get POIs if requested
if ($this->sClass && !empty($aResults)) {
$aResults = $this->queryPoiByOperator($oDB, $aResults, $iLimit);
@@ -579,41 +567,6 @@ class SearchDescription
$aTerms = array();
$aOrder = array();
// Sort by existence of the requested house number but only if not
// too many results are expected for the street, i.e. if the result
// will be narrowed down by an address. Remember that with ordering
// every single result has to be checked.
if ($this->sHouseNumber && ($this->bRareName || !empty($this->aAddress) || $this->sPostcode)) {
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
// Housenumbers on streets and places.
$sChildHnr = 'SELECT * FROM placex WHERE parent_place_id = search_name.place_id';
$sChildHnr .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
// Interpolations on streets and places.
if (preg_match('/^[0-9]+$/', $this->sHouseNumber)) {
$sIpolHnr = 'SELECT * FROM location_property_osmline ';
$sIpolHnr .= 'WHERE parent_place_id = search_name.place_id ';
$sIpolHnr .= ' AND startnumber is not NULL';
$sIpolHnr .= ' AND '.$this->sHouseNumber.'>=startnumber ';
$sIpolHnr .= ' AND '.$this->sHouseNumber.'<=endnumber ';
} else {
$sIpolHnr = false;
}
// Housenumbers on the object iteself for unlisted places.
$sSelfHnr = 'SELECT * FROM placex WHERE place_id = search_name.place_id';
$sSelfHnr .= " AND housenumber ~* E'".$sHouseNumberRegex."'";
$sSql = '(CASE WHEN address_rank = 30 THEN EXISTS('.$sSelfHnr.') ';
$sSql .= ' ELSE EXISTS('.$sChildHnr.') ';
if ($sIpolHnr) {
$sSql .= 'OR EXISTS('.$sIpolHnr.') ';
}
$sSql .= 'END) DESC';
$aOrder[] = $sSql;
}
if (!empty($this->aName)) {
$aTerms[] = 'name_vector @> '.$oDB->getArraySQL($this->aName);
}
@@ -659,10 +612,6 @@ class SearchDescription
$aTerms[] = 'centroid && '.$this->oContext->sqlViewboxSmall;
}
if ($this->oContext->hasNearPoint()) {
$aOrder[] = $this->oContext->distanceSQL('centroid');
}
if ($this->sHouseNumber) {
$sImportanceSQL = '- abs(26 - address_rank) + 3';
} else {
@@ -685,122 +634,128 @@ class SearchDescription
$sExactMatchSQL = '0::int as exactmatch';
}
if ($this->sHouseNumber || $this->sClass) {
$iLimit = 40;
if (empty($aTerms)) {
return array();
}
$aResults = array();
if ($this->hasHousenumber()) {
$sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M');
if (!empty($aTerms)) {
$sSQL = 'SELECT place_id, address_rank,'.$sExactMatchSQL;
// Housenumbers on streets and places.
$sPlacexSql = 'SELECT array_agg(place_id) FROM placex';
$sPlacexSql .= ' WHERE parent_place_id = sin.place_id AND sin.address_rank < 30';
$sPlacexSql .= $this->oContext->excludeSQL(' AND place_id');
$sPlacexSql .= ' and housenumber ~* E'.$sHouseNumberRegex;
// Interpolations on streets and places.
$sInterpolSql = 'null';
$sTigerSql = 'null';
if (preg_match('/^[0-9]+$/', $this->sHouseNumber)) {
$sIpolHnr = 'WHERE parent_place_id = sin.place_id ';
$sIpolHnr .= ' AND startnumber is not NULL AND sin.address_rank < 30';
$sIpolHnr .= ' AND '.$this->sHouseNumber.' between startnumber and endnumber';
$sIpolHnr .= ' AND ('.$this->sHouseNumber.' - startnumber) % step = 0';
$sInterpolSql = 'SELECT array_agg(place_id) FROM location_property_osmline '.$sIpolHnr;
if (CONST_Use_US_Tiger_Data) {
$sTigerSql = 'SELECT array_agg(place_id) FROM location_property_tiger '.$sIpolHnr;
$sTigerSql .= " and sin.country_code = 'us'";
}
}
if ($this->sClass) {
$iLimit = 40;
}
$sSelfHnr = 'SELECT * FROM placex WHERE place_id = search_name.place_id';
$sSelfHnr .= ' AND housenumber ~* E'.$sHouseNumberRegex;
$aTerms[] = '(address_rank < 30 or exists('.$sSelfHnr.'))';
$sSQL = 'SELECT sin.*, ';
$sSQL .= '('.$sPlacexSql.') as placex_hnr, ';
$sSQL .= '('.$sInterpolSql.') as interpol_hnr, ';
$sSQL .= '('.$sTigerSql.') as tiger_hnr ';
$sSQL .= ' FROM (';
$sSQL .= ' SELECT place_id, address_rank, country_code,'.$sExactMatchSQL.',';
$sSQL .= ' CASE WHEN importance = 0 OR importance IS NULL';
$sSQL .= ' THEN 0.75001-(search_rank::float/40) ELSE importance END as importance';
$sSQL .= ' FROM search_name';
$sSQL .= ' WHERE '.join(' and ', $aTerms);
$sSQL .= ' ORDER BY '.join(', ', $aOrder);
$sSQL .= ' LIMIT 40000';
$sSQL .= ') as sin';
$sSQL .= ' ORDER BY address_rank = 30 desc, placex_hnr, interpol_hnr, tiger_hnr,';
$sSQL .= ' importance';
$sSQL .= ' LIMIT '.$iLimit;
} else {
if ($this->sClass) {
$iLimit = 40;
}
$sSQL = 'SELECT place_id, address_rank, '.$sExactMatchSQL;
$sSQL .= ' FROM search_name';
$sSQL .= ' WHERE '.join(' and ', $aTerms);
$sSQL .= ' ORDER BY '.join(', ', $aOrder);
$sSQL .= ' LIMIT '.$iLimit;
Debug::printSQL($sSQL);
$aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.');
foreach ($aDBResults as $aResult) {
$oResult = new Result($aResult['place_id']);
$oResult->iExactMatches = $aResult['exactmatch'];
$oResult->iAddressRank = $aResult['address_rank'];
$aResults[$aResult['place_id']] = $oResult;
}
}
return $aResults;
}
private function queryHouseNumber(&$oDB, $aRoadPlaceIDs)
{
$aResults = array();
$sRoadPlaceIDs = Result::joinIdsByTableMaxRank(
$aRoadPlaceIDs,
Result::TABLE_PLACEX,
27
);
$sPOIPlaceIDs = Result::joinIdsByTableMinRank(
$aRoadPlaceIDs,
Result::TABLE_PLACEX,
30
);
$aIDCondition = array();
if ($sRoadPlaceIDs) {
$aIDCondition[] = 'parent_place_id in ('.$sRoadPlaceIDs.')';
}
if ($sPOIPlaceIDs) {
$aIDCondition[] = 'place_id in ('.$sPOIPlaceIDs.')';
}
if (empty($aIDCondition)) {
return $aResults;
}
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$sSQL = 'SELECT place_id FROM placex WHERE';
$sSQL .= " housenumber ~* E'".$sHouseNumberRegex."'";
$sSQL .= ' AND ('.join(' OR ', $aIDCondition).')';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);
// XXX should inherit the exactMatches from its parent
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
}
$aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.');
$bIsIntHouseNumber= (bool) preg_match('/[0-9]+/', $this->sHouseNumber);
$iHousenumber = intval($this->sHouseNumber);
if ($bIsIntHouseNumber && $sRoadPlaceIDs && empty($aResults)) {
// if nothing found, search in the interpolation line table
$sSQL = 'SELECT distinct place_id FROM location_property_osmline';
$sSQL .= ' WHERE startnumber is not NULL';
$sSQL .= ' AND parent_place_id in ('.$sRoadPlaceIDs.') AND (';
if ($iHousenumber % 2 == 0) {
// If housenumber is even, look for housenumber in streets
// with interpolationtype even or all.
$sSQL .= "interpolationtype='even'";
} else {
// Else look for housenumber with interpolationtype odd or all.
$sSQL .= "interpolationtype='odd'";
$aResults = array();
foreach ($aDBResults as $aResult) {
$oResult = new Result($aResult['place_id']);
$oResult->iExactMatches = $aResult['exactmatch'];
$oResult->iAddressRank = $aResult['address_rank'];
$bNeedResult = true;
if ($this->hasHousenumber() && $aResult['address_rank'] < 30) {
if ($aResult['placex_hnr']) {
foreach (explode(',', substr($aResult['placex_hnr'], 1, -1)) as $sPlaceID) {
$iPlaceID = intval($sPlaceID);
$oHnrResult = new Result($iPlaceID);
$oHnrResult->iExactMatches = $aResult['exactmatch'];
$oHnrResult->iAddressRank = 30;
$aResults[$iPlaceID] = $oHnrResult;
$bNeedResult = false;
}
}
if ($aResult['interpol_hnr']) {
foreach (explode(',', substr($aResult['interpol_hnr'], 1, -1)) as $sPlaceID) {
$iPlaceID = intval($sPlaceID);
$oHnrResult = new Result($iPlaceID, Result::TABLE_OSMLINE);
$oHnrResult->iExactMatches = $aResult['exactmatch'];
$oHnrResult->iAddressRank = 30;
$oHnrResult->iHouseNumber = intval($this->sHouseNumber);
$aResults[$iPlaceID] = $oHnrResult;
$bNeedResult = false;
}
}
if ($aResult['tiger_hnr']) {
foreach (explode(',', substr($aResult['tiger_hnr'], 1, -1)) as $sPlaceID) {
$iPlaceID = intval($sPlaceID);
$oHnrResult = new Result($iPlaceID, Result::TABLE_TIGER);
$oHnrResult->iExactMatches = $aResult['exactmatch'];
$oHnrResult->iAddressRank = 30;
$oHnrResult->iHouseNumber = intval($this->sHouseNumber);
$aResults[$iPlaceID] = $oHnrResult;
$bNeedResult = false;
}
}
if ($aResult['address_rank'] < 26) {
$oResult->iResultRank += 2;
} else {
$oResult->iResultRank++;
}
}
$sSQL .= " or interpolationtype='all') and ";
$sSQL .= $iHousenumber.'>=startnumber and ';
$sSQL .= $iHousenumber.'<=endnumber';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_OSMLINE);
$oResult->iHouseNumber = $iHousenumber;
$aResults[$iPlaceId] = $oResult;
}
}
// If nothing found then search in Tiger data (location_property_tiger)
if (CONST_Use_US_Tiger_Data && $sRoadPlaceIDs && $bIsIntHouseNumber && empty($aResults)) {
$sSQL = 'SELECT place_id FROM location_property_tiger';
$sSQL .= ' WHERE parent_place_id in ('.$sRoadPlaceIDs.') and (';
if ($iHousenumber % 2 == 0) {
$sSQL .= "interpolationtype='even'";
} else {
$sSQL .= "interpolationtype='odd'";
}
$sSQL .= " or interpolationtype='all') and ";
$sSQL .= $iHousenumber.'>=startnumber and ';
$sSQL .= $iHousenumber.'<=endnumber';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
Debug::printSQL($sSQL);
foreach ($oDB->getCol($sSQL) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_TIGER);
$oResult->iHouseNumber = $iHousenumber;
$aResults[$iPlaceId] = $oResult;
if ($bNeedResult) {
$aResults[$aResult['place_id']] = $oResult;
}
}
@@ -852,6 +807,7 @@ class SearchDescription
$sSQL = 'SELECT geometry FROM placex';
$sSQL .= " WHERE place_id in ($sPlaceIDs)";
$sSQL .= " AND rank_search < $iMaxRank + 5";
$sSQL .= ' AND ST_Area(Box2d(geometry)) < 20';
$sSQL .= " AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')";
$sSQL .= ' ORDER BY rank_search ASC ';
$sSQL .= ' LIMIT 1';

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;
@@ -112,13 +120,18 @@ class SimpleWordList
return array_slice($aWordSets, 0, SimpleWordList::MAX_WORDSETS);
}
/**
* Custom search routine which takes two arrays. The array with the fewest
* items wins. If same number of items then the one with the longest first
* element wins.
*/
public static function cmpByArraylen($aA, $aB)
{
$iALen = count($aA);
$iBLen = count($aB);
if ($iALen == $iBLen) {
return 0;
return strlen($aB[0]) <=> strlen($aA[0]);
}
return ($iALen < $iBLen) ? -1 : 1;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim\Token;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim\Token;
@@ -58,7 +66,7 @@ class HouseNumber
// up of numbers, add a penalty
$iSearchCost = 1;
if (preg_match('/\\d/', $this->sToken) === 0
|| preg_match_all('/[^0-9]/', $this->sToken, $aMatches) > 2) {
|| preg_match_all('/[^0-9 ]/', $this->sToken, $aMatches) > 3) {
$iSearchCost += strlen($this->sToken) - 1;
}
if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) {

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim\Token;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim\Token;
@@ -9,7 +17,7 @@ class Postcode
{
/// Database word id, if available.
private $iId;
/// Full nomralized postcode (upper cased).
/// Full normalized postcode (upper cased).
private $sPostcode;
// Optional country code the postcode belongs to (currently unused).
private $sCountryCode;
@@ -17,7 +25,12 @@ class Postcode
public function __construct($iId, $sPostcode, $sCountryCode = '')
{
$this->iId = $iId;
$this->sPostcode = $sPostcode;
$iSplitPos = strpos($sPostcode, '@');
if ($iSplitPos === false) {
$this->sPostcode = $sPostcode;
} else {
$this->sPostcode = substr($sPostcode, 0, $iSplitPos);
}
$this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim\Token;
@@ -61,19 +69,31 @@ class SpecialTerm
*/
public function extendSearch($oSearch, $oPosition)
{
$iSearchCost = 2;
$iSearchCost = 0;
$iOp = $this->iOperator;
if ($iOp == \Nominatim\Operator::NONE) {
if ($oSearch->hasName() || $oSearch->getContext()->isBoundedSearch()) {
if ($oPosition->isFirstToken()
|| $oSearch->hasName()
|| $oSearch->getContext()->isBoundedSearch()
) {
$iOp = \Nominatim\Operator::NAME;
$iSearchCost += 3;
} else {
$iOp = \Nominatim\Operator::NEAR;
$iSearchCost += 2;
$iSearchCost += 4;
if (!$oPosition->isFirstToken()) {
$iSearchCost += 3;
}
}
} elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
} elseif ($oPosition->isFirstToken()) {
$iSearchCost += 2;
} elseif ($oPosition->isLastToken()) {
$iSearchCost += 4;
} else {
$iSearchCost += 6;
}
if ($oSearch->hasHousenumber()) {
$iSearchCost ++;
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
namespace Nominatim\Token;
@@ -54,7 +62,7 @@ class Word
public function extendSearch($oSearch, $oPosition)
{
// Full words can only be a name if they appear at the beginning
// of the phrase. In structured search the name must forcably in
// of the phrase. In structured search the name must forcibly in
// the first phrase. In unstructured search it may be in a later
// phrase when the first phrase is a house number.
if ($oSearch->hasName()

View File

@@ -1,182 +0,0 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
// Script to extract structured city and street data
// from a running nominatim instance as CSV data
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/ParameterParser.php');
ini_set('memory_limit', '800M');
$aCMDOptions = array(
'Export addresses as CSV file from a Nominatim database',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
"\nAddress ranks: continent, country, state, county, city, suburb, street, path",
'Additional output types: postcode, placeid (placeid for each object)',
"\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
'can be merged into one column by simply using a comma-separated list.',
"\nDefault output-type: street",
'Default output format: street;suburb;city;county;state;country'
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
$aRankmap = array(
'continent' => 1,
'country' => 4,
'state' => 8,
'county' => 12,
'city' => 16,
'suburb' => 20,
'street' => 26,
'path' => 27
);
$oDB = new Nominatim\DB();
$oDB->connect();
if (isset($aCMDResult['output-type'])) {
if (!isset($aRankmap[$aCMDResult['output-type']])) {
fail('unknown output-type: '.$aCMDResult['output-type']);
}
$iOutputRank = $aRankmap[$aCMDResult['output-type']];
} else {
$iOutputRank = $aRankmap['street'];
}
// Preferred language
$oParams = new Nominatim\ParameterParser();
if (!isset($aCMDResult['language'])) {
$aCMDResult['language'] = 'xx';
}
$aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
// output formatting: build up a lookup table that maps address ranks to columns
$aColumnMapping = array();
$iNumCol = 0;
if (!isset($aCMDResult['output-format'])) {
$aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
}
foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
$bHasData = false;
foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
if ($sRank == 'postcode' || $sRank == 'placeid') {
$aColumnMapping[$sRank] = $iNumCol;
$bHasData = true;
} elseif (isset($aRankmap[$sRank])) {
$iRank = $aRankmap[$sRank];
if ($iRank <= $iOutputRank) {
$aColumnMapping[(string)$iRank] = $iNumCol;
$bHasData = true;
}
}
}
if ($bHasData) {
$iNumCol++;
}
}
// build the query for objects
$sPlacexSQL = 'select min(place_id) as place_id, ';
$sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
$sPlacexSQL .= 'country_code as cc, ';
$sPlacexSQL .= 'postcode, ';
// get the address places excluding postcodes
$sPlacexSQL .= 'array(select address_place_id from place_addressline a';
$sPlacexSQL .= ' where a.place_id = placex.place_id and isaddress';
$sPlacexSQL .= ' and address_place_id != placex.place_id';
$sPlacexSQL .= ' and not cached_rank_address in (5,11)';
$sPlacexSQL .= ' and cached_rank_address > 2 order by cached_rank_address)';
$sPlacexSQL .= ' as address';
$sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
$sPlacexSQL .= ' and rank_address = '.$iOutputRank;
if (isset($aCMDResult['restrict-to-country'])) {
$sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']);
}
// restriction to parent place id
$sParentId = false;
$sOsmType = false;
if (isset($aCMDResult['restrict-to-osm-node'])) {
$sOsmType = 'N';
$sOsmId = $aCMDResult['restrict-to-osm-node'];
}
if (isset($aCMDResult['restrict-to-osm-way'])) {
$sOsmType = 'W';
$sOsmId = $aCMDResult['restrict-to-osm-way'];
}
if (isset($aCMDResult['restrict-to-osm-relation'])) {
$sOsmType = 'R';
$sOsmId = $aCMDResult['restrict-to-osm-relation'];
}
if ($sOsmType) {
$sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
$sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
if (!$sParentId) {
fail('Could not find place '.$sOsmType.' '.$sOsmId);
}
}
if ($sParentId) {
$sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
}
$sPlacexSQL .= " group by name->'name', address, postcode, country_code, placex.place_id";
// Iterate over placeids
// to get further hierarchical information
//var_dump($sPlacexSQL);
$oResults = $oDB->getQueryStatement($sPlacexSQL);
$fOutstream = fopen('php://output', 'w');
while ($aRow = $oResults->fetch()) {
$iPlaceID = $aRow['place_id'];
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
$sSQL .= ' WHERE isaddress';
$sSQL .= ' order by rank_address desc,isaddress desc';
$aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID));
$aOutput = array_fill(0, $iNumCol, '');
// output address parts
foreach ($aAddressLines as $aAddress) {
if (isset($aColumnMapping[$aAddress['rank_address']])) {
$aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
}
}
// output postcode
if (isset($aColumnMapping['postcode'])) {
if ($aCMDResult['output-all-postcodes']) {
$sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
$sSQL .= 'on px.place_id = pa.address_place_id ';
$sSQL .= 'where pa.cached_rank_address in (5,11) ';
$sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) ';
$sSQL .= 'group by postcode order by count(*) desc limit 1';
$sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1)));
$aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
} else {
$aOutput[$aColumnMapping['postcode']] = $aRow['postcode'];
}
}
if (isset($aColumnMapping['placeid'])) {
$aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
}
fputcsv($fOutstream, $aOutput);
}
fclose($fOutstream);

View File

@@ -1,106 +0,0 @@
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/log.php');
require_once(CONST_LibDir.'/PlaceLookup.php');
require_once(CONST_LibDir.'/ReverseGeocode.php');
ini_set('memory_limit', '800M');
$aCMDOptions = array(
'Tools to warm nominatim db',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'),
array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
@define('CONST_Log_File', getSetting('LOG_FILE', false));
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
require_once(CONST_LibDir.'/Geocode.php');
$oDB = new Nominatim\DB();
$oDB->connect();
$bVerbose = $aResult['verbose'];
function print_results($aResults, $bVerbose)
{
if ($bVerbose) {
if ($aResults && count($aResults)) {
echo $aResults[0]['langaddress']."\n";
} else {
echo "<not found>\n";
}
} else {
echo '.';
}
}
if (!$aResult['search-only']) {
$oReverseGeocode = new Nominatim\ReverseGeocode($oDB);
$oReverseGeocode->setZoom(20);
$oPlaceLookup = new Nominatim\PlaceLookup($oDB);
$oPlaceLookup->setIncludeAddressDetails(true);
$oPlaceLookup->setLanguagePreference(array('en'));
echo 'Warm reverse: ';
if ($bVerbose) {
echo "\n";
}
for ($i = 0; $i < 1000; $i++) {
$fLat = rand(-9000, 9000) / 100;
$fLon = rand(-18000, 18000) / 100;
if ($bVerbose) {
echo "$fLat, $fLon = ";
}
$oLookup = $oReverseGeocode->lookup($fLat, $fLon);
$aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
print_results($aSearchResults, $bVerbose);
}
echo "\n";
}
if (!$aResult['reverse-only']) {
$oGeocode = new Nominatim\Geocode($oDB);
echo 'Warm search: ';
if ($bVerbose) {
echo "\n";
}
$oTokenizer = new \Nominatim\Tokenizer($oDB);
$aWords = $oTokenizer->mostFrequentWords(1000);
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
foreach ($aWords as $sWord) {
if ($bVerbose) {
echo "$sWord = ";
}
$oGeocode->setLanguagePreference(array('en'));
$oGeocode->setQuery($sWord);
$aSearchResults = $oGeocode->lookup();
print_results($aSearchResults, $bVerbose);
}
echo "\n";
}

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
require_once(CONST_LibDir.'/Shell.php');
@@ -98,7 +106,7 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn
showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is missing');
}
if ($aCounts[$aLine[0]] > $aLine[3]) {
showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is pressent too many times');
showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is present too many times');
}
if ($aLine[6] == 'bool' && !array_key_exists($aLine[0], $aResult)) {
$aResult[$aLine[0]] = false;

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
require('Symfony/Component/Dotenv/autoload.php');

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
require_once('init.php');
require_once('cmd.php');

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
require_once('init.php');
require_once('ParameterParser.php');
@@ -18,7 +26,7 @@ function userError($sMsg)
function exception_handler_json($exception)
{
http_response_code($exception->getCode());
http_response_code($exception->getCode() == 0 ? 500 : $exception->getCode());
header('Content-type: application/json; charset=utf-8');
include(CONST_LibDir.'/template/error-json.php');
exit();
@@ -26,7 +34,7 @@ function exception_handler_json($exception)
function exception_handler_xml($exception)
{
http_response_code($exception->getCode());
http_response_code($exception->getCode() == 0 ? 500 : $exception->getCode());
header('Content-type: text/xml; charset=utf-8');
echo '<?xml version="1.0" encoding="UTF-8" ?>'."\n";
include(CONST_LibDir.'/template/error-xml.php');

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
require_once(CONST_LibDir.'/lib.php');
require_once(CONST_LibDir.'/DB.php');

View File

@@ -1,9 +1,17 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
function loadSettings($sProjectDir)
{
@define('CONST_InstallDir', $sProjectDir);
// Temporary hack to set the direcory via environment instead of
// Temporary hack to set the directory via environment instead of
// the installed scripts. Neither setting is part of the official
// set of settings.
defined('CONST_ConfigDir') or define('CONST_ConfigDir', $_SERVER['NOMINATIM_CONFIGDIR']);
@@ -198,24 +206,34 @@ function parseLatLon($sQuery)
return array($sFound, $fQueryLat, $fQueryLon);
}
function closestHouseNumber($aRow)
function addressRankToGeocodeJsonType($iAddressRank)
{
$fHouse = $aRow['startnumber']
+ ($aRow['endnumber'] - $aRow['startnumber']) * $aRow['fraction'];
switch ($aRow['interpolationtype']) {
case 'odd':
$iHn = (int)($fHouse/2) * 2 + 1;
break;
case 'even':
$iHn = (int)(round($fHouse/2)) * 2;
break;
default:
$iHn = (int)(round($fHouse));
break;
if ($iAddressRank >= 29 && $iAddressRank <= 30) {
return 'house';
}
if ($iAddressRank >= 26 && $iAddressRank < 28) {
return 'street';
}
if ($iAddressRank >= 22 && $iAddressRank < 26) {
return 'locality';
}
if ($iAddressRank >= 17 && $iAddressRank < 22) {
return 'district';
}
if ($iAddressRank >= 13 && $iAddressRank < 17) {
return 'city';
}
if ($iAddressRank >= 10 && $iAddressRank < 13) {
return 'county';
}
if ($iAddressRank >= 5 && $iAddressRank < 10) {
return 'state';
}
if ($iAddressRank >= 4 && $iAddressRank < 5) {
return 'country';
}
return max(min($aRow['endnumber'], $iHn), $aRow['startnumber']);
return 'locality';
}
if (!function_exists('array_key_last')) {

View File

@@ -1,4 +1,12 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
function logStart(&$oDB, $sType = '', $sQuery = '', $aLanguageList = array())
@@ -70,7 +78,7 @@ function logEnd(&$oDB, $hLog, $iNumResults)
if (CONST_Log_DB) {
$aEndTime = explode('.', $fEndTime);
if (!$aEndTime[1]) {
if (!isset($aEndTime[1])) {
$aEndTime[1] = '0';
}
$sEndTime = date('Y-m-d H:i:s', $aEndTime[0]).'.'.$aEndTime[1];

Some files were not shown because too many files have changed in this diff Show More