Compare commits

...

234 Commits

Author SHA1 Message Date
Sarah Hoffmann
896a40b7d5 prepare release 4.4.1 2024-08-20 13:54:37 +02:00
Sarah Hoffmann
1e71085004 update CI scripts 2024-08-20 13:54:37 +02:00
Sarah Hoffmann
3c05d98b80 make sure SQLAlchemy can handle the loaded dialect
The psycopg dialect was only added in SQLAlchemy 2.0. To avoid loading
errors when SQLAlchemy 1.4 is installed together with psycopg3,
check that the dialect is really available.
2024-08-20 10:10:09 +02:00
Sarah Hoffmann
7bbdf57b08 restrict interpolation housenumbers to 0-999999
Ensures that the numbers fit into integers.
2024-08-20 09:45:01 +02:00
Markus Döring
5e91b78ff4 update Search.md, fix typo (#3454) 2024-08-20 09:43:43 +02:00
Sarah Hoffmann
ad72641424 restrict invalidation of child objects on large street features
When streets become very large, it is more likely a mapping error.
So ignore such changes.
2024-08-20 09:43:20 +02:00
marc tobias
75130e4332 docs: use nominatim-project everywhere 2024-08-20 09:41:42 +02:00
marc tobias
47b41ed510 Import documentation: reverse-only only a little faster import [skip ci] 2024-08-20 09:40:52 +02:00
marc tobias
b4e2e7de16 geocodejson: admin level output should only print boundaries 2024-08-20 09:40:21 +02:00
marc tobias
1c3ed66ca1 docs: correct URL of an internal link 2024-08-20 09:39:06 +02:00
Sarah Hoffmann
e5a5f02666 prepare release 4.4.0 2024-03-07 11:43:01 +01:00
Sarah Hoffmann
11ced26025 Merge pull request #3358 from lonvia/pg-module-for-pg16
Fix compilation of legacy module for PostgreSQL 16
2024-03-07 11:39:24 +01:00
Sarah Hoffmann
edb1eec46d actions: run legacy test against newest postgresql 16 2024-03-05 19:38:06 +01:00
Sarah Hoffmann
63eacc5589 fix compilation of PG module for PostgreSQL 16
This version requires an additional include for the macros used.
2024-03-05 16:31:02 +01:00
Sarah Hoffmann
e929693cae Merge pull request #3356 from lonvia/use-date-from-osm2pgsql-prop
Use import date from osm2pgsql property table if available
2024-03-05 15:32:16 +01:00
Sarah Hoffmann
ae7c584e28 use import date from osm2pgsql property table if available 2024-03-05 11:33:32 +01:00
Sarah Hoffmann
4d5faf9423 Merge pull request #3353 from mtmail/add-codespell
Github Actions: add codespell linter, warn only
2024-03-04 14:02:00 +01:00
marc tobias
b7eea4d53a Github Actions: add codespell linter, warn only 2024-03-04 00:22:24 +01:00
Sarah Hoffmann
dd2c794de5 Merge pull request #3350 from lonvia/improve-postcode-handling
Improve handling of postcode areas
2024-02-28 18:45:31 +01:00
Sarah Hoffmann
3b6d35fc12 Merge pull request #3349 from lonvia/remove-way-geometry-table-after-import
Drop lower-rank-ways index after import
2024-02-28 17:47:34 +01:00
Sarah Hoffmann
9fa73cfb15 improve display name for postcodes
Don't add the postcode again in the list of address details and
make sure that the result proper always comes before anything else
independently of the address rank.
2024-02-28 16:50:40 +01:00
Sarah Hoffmann
62b7670e0c for postcodes use rank_search as base rank for finding addresses
The rank_address reflects the position in the address which is
usually lower than what one would expect for a postcode area.
2024-02-28 14:40:36 +01:00
Sarah Hoffmann
d7bb449e74 drop lower-rank-ways index after import
The index becomes quite big and is only needed during import
because the full geometry import does not exist yet.
2024-02-28 14:35:56 +01:00
Sarah Hoffmann
247065ff6f Merge pull request #3342 from mtmail/tyops
Correct some typos
2024-02-28 14:25:16 +01:00
Sarah Hoffmann
9a84adef59 Merge pull request #3347 from lonvia/tweak-boundary-imports
Assorted style tweaks
2024-02-28 14:22:08 +01:00
Sarah Hoffmann
1879cf902c Merge pull request #3346 from lonvia/reduce-artificial-importance
Reduce default importance
2024-02-28 14:21:46 +01:00
Sarah Hoffmann
019a68a4bb Merge pull request #3345 from lonvia/simplify-large-geometries
Simplify very large polygons that are not used in addresses
2024-02-28 12:06:49 +01:00
Sarah Hoffmann
110491011f Merge pull request #3344 from lonvia/osm2pgsql-new-middle
Update osm2pgsql to latest 1.11.0 and add support for new middle format
2024-02-28 12:06:33 +01:00
Sarah Hoffmann
36b1660121 add support for new middle table format of osm2pgsql
Functions are adapted according to the format detected from the
osm2pgsql property table.
2024-02-27 18:18:19 +01:00
Sarah Hoffmann
56201feb28 simplify very large polygons non used in addresses
Polygons with rank_address = 0 are only used in search and (rarely)
for reverse lookup. Geometries do not need to be precise for that
because topology does not matter. OSM has some very large polygons
of natural features with sizes of more than 10MB. Simplify these
polygons to keep the database and indexes smaller.
2024-02-27 10:16:18 +01:00
Sarah Hoffmann
c6d40d4bf4 reduce importance when computed from search rank 2024-02-27 10:15:54 +01:00
Sarah Hoffmann
a4f2e6a893 do not send outdated parameters to osm2pgsql flex 2024-02-27 10:15:36 +01:00
Sarah Hoffmann
b427fc7965 update osm2pgsql to 1.11.0 2024-02-27 10:15:36 +01:00
Sarah Hoffmann
e264604894 drop more railway tags 2024-02-27 10:15:08 +01:00
Sarah Hoffmann
3a5d9f0377 drop amenity=parking_space/entrance objects
Parking is sufficiently covered with amenity=parking.
2024-02-27 10:15:08 +01:00
Sarah Hoffmann
8be27015b2 drop boundary=land_area
Usually a version of administrative boundaries without ocean area.
Resulting polygons are pretty larrge and having the boundaries
should be enough.
2024-02-27 10:15:08 +01:00
Sarah Hoffmann
100391fb8e import leisure=natural_reserve as fallback only
About half of the natural reserves have a double tagging with
boundary=protected_area. Avoid importing these objects twice.
2024-02-27 10:15:08 +01:00
Sarah Hoffmann
dc1baaa0af prefer min() function over if construct
Fixes a linter complaint.
2024-02-27 09:26:50 +01:00
marc tobias
7205491b84 Correct some typos 2024-02-26 18:13:30 +01:00
Sarah Hoffmann
918fec73c6 Merge pull request #3341 from mtmail/remove-php-faq-entries
PHP related FAQ entries are no longer needed
2024-02-21 08:56:18 +01:00
marc tobias
b6df486525 PHP related FAQ entries are no longer needed 2024-02-20 18:55:02 +01:00
Sarah Hoffmann
8bd8a040e0 Merge pull request #3340 from lonvia/fix-lua-liniting-issues
Fix some issues in the style files found by luacheck
2024-02-20 12:00:01 +01:00
Sarah Hoffmann
781e83ddc3 fix issues found by luacheck
The variable shadowing causes bad results when used with LuaJIT.
2024-02-20 10:43:51 +01:00
Sarah Hoffmann
5afd96d210 Merge pull request #3339 from lonvia/python-frontend-as-default
Switch to Python frontend as the default
2024-02-20 10:17:21 +01:00
Sarah Hoffmann
cf49a070fd switch Ubuntu installation scripts to Python frontend 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
4aba36c5ac API debug: properly escape non-highlighted code 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
ca6e65fff1 bdd: be more verbose on HTML parsing error 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
1e0025b095 also switch unit tests for cli 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
173e85c9e6 actions: make php the legacy tests 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
ffb467028e update documentation to recommend Python frontend 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
05fad607ff make Python frontend default and PHP optional 2024-02-19 18:39:01 +01:00
Sarah Hoffmann
19360a9552 Merge pull request #3338 from lonvia/remove-nested-cli-groups
Add documentation for importing without superuser rights
2024-02-16 19:27:25 +01:00
Sarah Hoffmann
b087f3ab7b actions: switch no-superuser test to PostgreSQL 16
The new premission restrictions on the public schema took only
place in PG15, so the previsouly used PG14 does not show any issues.
2024-02-16 17:14:47 +01:00
Sarah Hoffmann
2c8fb31381 add documentation for non-superuser import 2024-02-16 17:14:47 +01:00
Sarah Hoffmann
b2d3f0a8b3 remove unnecessary nested group in CLI import command 2024-02-16 11:32:50 +01:00
Sarah Hoffmann
bd8025feab Merge pull request #3333 from lonvia/set-empty-extratags-to-null
Extratags should become null when empty
2024-02-08 14:26:49 +01:00
Sarah Hoffmann
4c19762e33 extratags should become null when empty
Removing the artifical entries in the extratags may lead to
an empty hstore. Set it to null in that case.

Fixes #3055.
2024-02-08 10:21:48 +01:00
Sarah Hoffmann
1015ac40ae Merge pull request #3332 from lonvia/improve-cutting-of-result-list
Prefilter bad results before adding details and reranking
2024-02-07 10:50:32 +01:00
Sarah Hoffmann
4ce13f5c1f prefilter bad results before adding details and reranking
Move the first cutting of the result list before reranking
by result match. This means that results with significantly
less importance are removed early and independently of the
fact how well they match the original query.

Fixes #3266.
2024-02-06 20:29:48 +01:00
Sarah Hoffmann
2833362cf6 Merge pull request #3331 from lonvia/fix-word-table-rights
Properly grant rights to read-only user when switching out word table
2024-02-06 20:21:25 +01:00
Sarah Hoffmann
bc51378aee properly grant rights to read-only user when switching out word table 2024-02-06 17:30:01 +01:00
Sarah Hoffmann
39039e2a55 docs: mark layer parameter as python-only 2024-02-06 15:59:04 +01:00
Sarah Hoffmann
f523c01571 Merge pull request #3328 from lonvia/word-count-into-new-table
Recreate word table when refreshing counts
2024-02-05 11:58:11 +01:00
Sarah Hoffmann
81eed0680c recreate word table when refreshing counts
The counting touches a large part of the word table, leaving
bloated tables and indexes. Thus recreate the table instead and
swap it in.
2024-02-04 21:35:10 +01:00
Sarah Hoffmann
33c0f249b1 avoid LookupAny with address and too many name tokens
The index for nameaddress_vector has grown so large that PostgreSQL
will resort to a sequential scan if there are too many items
in the LookupAny list.
2024-01-29 16:52:14 +01:00
Sarah Hoffmann
76eadc562c print any collected debug output when returning a timeout error 2024-01-28 22:30:34 +01:00
Sarah Hoffmann
3cc3e3b2e3 Merge pull request #3321 from lonvia/remove-duplicate-partials
Improvements to query parsing
2024-01-28 20:32:58 +01:00
Sarah Hoffmann
f07f8530a8 housenumber-only searches cannot be combined with qualifiers 2024-01-28 19:03:11 +01:00
Sarah Hoffmann
103800a732 adjust rankings for housenumber-only searches
A normal address search with housenumber will use name rankings for
the street name. This is slightly different than weighing for
address parts. Use the same ranking for the first part of the
address for housenumber-only searches to make sure that penalties
remain comparable.
2024-01-28 19:03:11 +01:00
Sarah Hoffmann
f9ba7a465a always add a penalty for name + address search fallback
If there already was a search by full names, the search is likely
a repeatition that yields the same results, only running slower.
2024-01-28 19:03:11 +01:00
Sarah Hoffmann
fed46240d5 disallow category tokens in the middle of a query string
This already worked for left-to-right readings and now is also
implemented for right-to-left reading. A qualifier must always be
before or after the name.
2024-01-28 19:03:11 +01:00
Sarah Hoffmann
2703442fd2 protect against very frequent bad partials 2024-01-28 19:03:11 +01:00
Sarah Hoffmann
2813bf18e6 avoid duplicates in the list of partial tokens for a query
This messes with the estimates for expected results.
2024-01-28 19:03:11 +01:00
Sarah Hoffmann
dcebea376d Merge pull request #3320 from lonvia/fix-timeout-return-code
Fix returned HTTP error when query runs too long
2024-01-28 10:37:37 +01:00
Sarah Hoffmann
b3a2b3d484 catch special async timeout error in servers
In Python <= 3.10 this is not yet the same as TimeoutError.

Fixes #3303.
2024-01-27 20:57:23 +01:00
Sarah Hoffmann
7321e66d08 Merge pull request #3317 from lonvia/postcodes-for-highway-areas
Search postcodes for highway areas around the area
2024-01-26 19:51:02 +01:00
Sarah Hoffmann
9627352ee4 search postcodes for highway areas around the area
So far the code would only accept postcodes that are inside the area.

Fixes #3304.
2024-01-26 18:14:11 +01:00
Sarah Hoffmann
bfc7acbb18 Merge pull request #3301 from lonvia/fix-class-search-regression
Interpret stand-alone special terms always as near term
2024-01-17 10:47:35 +01:00
Sarah Hoffmann
e0ca2ce6ec interpret stand-alone special terms always as near term
Fixes #3298.
2024-01-16 17:19:21 +01:00
Sarah Hoffmann
b969c5a62f Merge pull request #3293 from lonvia/rematch-against-country-code
Add country code to words to be rematched
2024-01-08 18:48:32 +01:00
Sarah Hoffmann
28f7e51279 add country code to words to be rematched 2024-01-08 12:23:23 +01:00
Sarah Hoffmann
d35eb4105e Merge pull request #3292 from lonvia/faster-country-search
Speed up country search
2024-01-07 20:42:20 +01:00
Sarah Hoffmann
b2afe3ce3e when a country is in the results, restrict further searches to places
A country search result usually comes with a very high importance.
As a result only other very well known places will show up together
with country results and that means only places with lower address
ranks. Name searches for country names tend to yield a lot of POI
results because the country name is part of the name
(think "embassy of Sweden"). By excluding POIs from further searches,
the search is sped up quite a bit.
2024-01-07 17:29:12 +01:00
Sarah Hoffmann
7337898b84 dump params in log view 2024-01-07 15:37:53 +01:00
Sarah Hoffmann
4305160c91 prioritize country searches when penaly is equal 2024-01-07 15:28:37 +01:00
Sarah Hoffmann
dc52d0954e Merge pull request #3238 from mtmail/check-database-for-version-match
admin --check-database also checks database vs nominatim version
2024-01-07 15:24:00 +01:00
Sarah Hoffmann
d3a575319f Merge pull request #3289 from lonvia/viewbox-and-housenumbers
Do not restrict by viewbox when housenumber or postcode is available
2024-01-07 15:23:14 +01:00
Sarah Hoffmann
2592bf1954 Merge pull request #3290 from lonvia/near-vs-quaifier-words
Do not run near queries on qualifier words
2024-01-07 15:23:00 +01:00
Sarah Hoffmann
88d7ffa274 Merge pull request #3291 from lonvia/fix-timezone-handling
Fix timezone handling for timestamps from the database
2024-01-07 15:22:42 +01:00
Sarah Hoffmann
474d4230b8 fix timezone handling for timestamps from the database
SQLite is not timezone-aware, so make sure to convert to UTC
before inserting any data.
2024-01-07 11:37:40 +01:00
Sarah Hoffmann
10a5424a71 do not run near queries on qualifier words
There is too much potential for confusion (e.g. 'Rio Grande' read
as 'river near Grande') fir too little gain. Use near phrases
instead.
2024-01-07 11:33:11 +01:00
Sarah Hoffmann
7eb04f67e2 do not restrict by viewbox when housenumber or postcode is available
Fixes #3274.
2024-01-07 11:29:26 +01:00
Marc Tobias
1d7e078a2c check-database also checks database vs nominatim version 2024-01-06 20:56:56 +01:00
Sarah Hoffmann
f03ec3ea12 Merge pull request #3286 from lonvia/avoid-bind-parameters-in-lambdas
Avoid closure variables in lambda statements
2024-01-05 21:24:48 +01:00
Sarah Hoffmann
8e90fa3395 avoid closure variables in lambda statements
There is a bug in SQLAlchemy that assigns the wrong value to bind
parameters from closure variables when reusing lambda statements
that are later extended with other non-lambda expressions.

Thus either avoid lambda statements with closure variables or extending
them with non-lambda expressions.
2024-01-05 17:49:28 +01:00
Sarah Hoffmann
02af0a2c87 use correct SQLAlchemy pool for asynchronous connections
See https://github.com/sqlalchemy/sqlalchemy/issues/8771
2024-01-02 16:15:44 +01:00
Sarah Hoffmann
fa4e5513d1 API: avoid engine disposal on startup 2024-01-02 16:10:30 +01:00
Sarah Hoffmann
93afe5a7c3 update typing for latest changes in SQLAlchemy 2023-12-29 20:55:33 +01:00
Sarah Hoffmann
af85ad390f Merge pull request #3273 from lonvia/search-with-sqlite
Add forward search capability for SQLite databases
2023-12-12 12:15:22 +01:00
Sarah Hoffmann
ab45db5360 add minimal documentation for the SQLite usage 2023-12-09 16:30:31 +01:00
Sarah Hoffmann
89094cf92e error out when a SQLite database does not exist
Requires to mark the databse r/w when it is newly created in the
convert function.
2023-12-07 10:24:53 +01:00
Sarah Hoffmann
3f5484f48f enable search for sqlite conversion by default 2023-12-07 09:33:42 +01:00
Sarah Hoffmann
ff06b64329 enable all BDD API tests for sqlite 2023-12-07 09:32:02 +01:00
Sarah Hoffmann
6d39563b87 enable all API tests for sqlite and port missing features 2023-12-07 09:32:02 +01:00
Sarah Hoffmann
0d840c8d4e extend sqlite converter for search tables 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
381bd0b576 remove unused function 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
b5c61e0b5b improve typing for @compiles constructs
The first parameter is in fact the self parameter referring to
the function class.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
df6eddebcd void unnecessary aliases 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
b6c8c0e72b factor out SQL for filtering by location
Also improves on the decision if an indexed is used or not.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
b06f5fddcb simplify handling of SQL lookup code for search_name
Use function classes which can be instantiated directly.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
8791c6cb69 correctly close API objects during testing 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
615b166c68 clean up ST_DWithin and intersects() functions
A non-index version of ST_DWithin is not necessary. ST_Distance
can be used for that purpose. Index use for intersects can be
covered with a simple parameter.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
c41f2fed21 simplify weigh_search() function
Use JSON arrays which can have mixed types and therefore have
a more logical structure than separate arrays. Avoid JSON dicts
because of their verboseness.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
05e47fbb28 fix parameter formatting in sqlite debug output 2023-12-07 09:31:00 +01:00
Sarah Hoffmann
1b7c8240ba enable connection pools for sqlite
Connecting is reasonably expensive because the spatialite extension
needs to be loaded. Disable pooling for tests because there is some
memory leak when quickly opening and closing QueuePools with sqlite
connections.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
c4fd3ab97f hide type differences between Postgres and Sqlite in custom types
Also define a custom set of operators in preparation of differences
in implementation.
2023-12-07 09:31:00 +01:00
Sarah Hoffmann
8c7140d92b Merge pull request #3108 from mtmail/remove-legacy-wikipedia-tag-syntax
These days the OSM wikipedia tab no longer contains URLs
2023-12-07 09:24:32 +01:00
Sarah Hoffmann
3969ce0f55 Merge pull request #3268 from mtmail/wikipedia-file-path-warning
Improve error message when Wikipedia importance file is not found
2023-12-07 09:21:59 +01:00
Robbe Haesendonck
4f5f5ea8fc Removed unnecessary check for --prepare-database flag 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
5f7cc91cf9 Connect using localhost instead of socket 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
424c1f0d41 Added changing permissions of nominatim-project dir 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
cff05394a1 Added osm-import to sudoers file for tokenizer setup 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
638b40c3ec Fixing CI tests for install-no-superuser 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
53d2050dc5 Fixed typechecking error 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
97ac036df5 Added missing return types to functions 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
482f7fe3ba CI tests: Removed creation of user www-data.
Removed creation of user www-data as it already exists
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
567c31ab6a Fixed legacy import command
Check whether prepare-database is true, if so exit early
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
7d28fc35d1 Disabled pylint too-many-branches 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
c06f902398 Fixed setting tokenizer property 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
59ae63e6f5 Updated no-superuser install ci test
Added nominatim_database_webuser (www-data).
Set non-superuser password for importing
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
9c7d947fd1 Updated check to see if osm_file is set 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
58db0ad6d8 Fixed formatting 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
3b09c39dbf Updated ci tests to reflect changes in Nominatim CLI 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
db917cb0d4 Made sure legacy import command still works 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
ba6cdd875d Removed unused variable, fixed connection 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
d231ff60ed Removed _is_complete_import check 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
c74904d075 Improved logic.
Fixed small oversight in mutually exclusiveness of arguments
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
22204050f2 Added new psql user for importing the data 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
667197a47e Changed naming of flags.
Made sure legacy import is also still ok
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
e8b866aa88 Added check to see if hstore is loaded 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
e7b8e1a2c2 Fixed ci-tests, osm-file flag
Preparing database should work without osm-file
2023-12-07 09:04:33 +01:00
Robbe Haesendonck
279b4fd6d2 Renamed flags 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
b7c83d3580 Fixed pylint warnings 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
d4018f2e3b Added check for hstore extension 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
38369ca3cf Fixed typo 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
cc0bdd34e9 Fixed linting errors 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
8e71ff329c Added version check for PostGis and Postgres 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
b4e3d0ea44 Updated CI tests to check new import flags 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
992703b15e Added --prepare-database flag 2023-12-07 09:04:33 +01:00
Robbe Haesendonck
ba5ec80611 Added --no-superuser flag
To allow import into already existing databases.
Refs: #2719
2023-12-07 09:04:33 +01:00
marc tobias
1c1447e709 Improve error message when Wikipedia importance file is not found 2023-12-06 14:31:07 +01:00
Sarah Hoffmann
3c32c0354a actions: pin pytest-asyncio version for Ubuntu 20
Newest versions are no longer compatible with older pytest.
2023-12-04 14:32:03 +01:00
Sarah Hoffmann
8a2c6067a2 skip lookup with full names when there are none 2023-12-01 12:11:58 +01:00
Sarah Hoffmann
d60a45715a Merge pull request #3263 from lonvia/near-search-penalty-inheritance
More tweaks for Python frontend search
2023-11-29 14:02:24 +01:00
Sarah Hoffmann
3c7a28dab0 further restrict stop search criterion 2023-11-29 11:28:54 +01:00
Sarah Hoffmann
0c72a434e0 use restrict for housenumber lookups with few numbers 2023-11-29 11:28:54 +01:00
Sarah Hoffmann
32e7b59b1f NearSearch needs to inherit penalty from inner search 2023-11-29 11:28:52 +01:00
Sarah Hoffmann
f448423727 Merge pull request #3262 from lonvia/fix-category-search
Fix use of category parameter for search() function
2023-11-28 20:02:36 +01:00
Sarah Hoffmann
b2319e52ff correctly exclude streets with housenumber searches
Street result are not subject to the full filtering in the SQL
query, so recheck.
2023-11-28 17:53:37 +01:00
Sarah Hoffmann
25279d009a add tests for interaction of category parameter with category terms 2023-11-28 16:56:08 +01:00
Sarah Hoffmann
3f72ca4bca rename use of category as POI search to near_item
Use the term category only as a short-cut for "tuple of key and value".
2023-11-28 16:27:05 +01:00
Sarah Hoffmann
70dc4957dc the category parameter in search should result in a qualifier 2023-11-28 12:01:49 +01:00
Sarah Hoffmann
d8ed565bce Merge pull request #3260 from lonvia/improve-catgeory-search
Various improvements to search with special phrases for Python frontend
2023-11-27 12:02:11 +01:00
Sarah Hoffmann
a7f5c6c8f5 drop category tokens when they make up a full phrase 2023-11-26 20:58:50 +01:00
Sarah Hoffmann
a8b023e57e restrict base results in near search by rank
This avoids in particular that roads or POIs are used as base
for the near search when a place result is present.
2023-11-26 17:41:29 +01:00
Sarah Hoffmann
47ca56f21b deduplicate categories/qualifiers 2023-11-26 17:11:15 +01:00
Sarah Hoffmann
580a7b032f order near searches by distance instead of importance 2023-11-26 16:48:04 +01:00
Sarah Hoffmann
8fcc2bb7f5 avoid duplicate lines during category search 2023-11-26 14:53:20 +01:00
Sarah Hoffmann
d6fe58f84e fix polygon selection for classtable lookups
Polygons should be used preferably with higher address ranks
where the areas are smaller.
2023-11-25 21:01:27 +01:00
Sarah Hoffmann
2d54de09bb Merge pull request #3257 from lonvia/slow-queries
Performance tweaks for search
2023-11-23 12:05:50 +01:00
Sarah Hoffmann
4e4d29f653 increase penalty for one-letter words 2023-11-23 10:51:58 +01:00
Sarah Hoffmann
195c13ee8a more preference for name-only queries in search 2023-11-22 23:57:23 +01:00
Sarah Hoffmann
ac5ef64701 avoid index use when filtering by layer 2023-11-22 20:54:04 +01:00
Sarah Hoffmann
e7dc24c026 add timestamps to text logging 2023-11-22 17:38:32 +01:00
Sarah Hoffmann
155f26060d avoid index on rank_address in near search 2023-11-22 17:33:17 +01:00
Sarah Hoffmann
a87fe8d8bf exclude country-level searches with non-address layers 2023-11-22 17:01:41 +01:00
Sarah Hoffmann
158df6b2e8 Merge pull request #3252 from kumarUjjawal/patch-1
minor typo fix
2023-11-22 12:51:37 +01:00
Kumar Ujjawal
b8db76c925 minor typo fix
fixed a minor typo, from 'wit' to 'with'.
2023-11-20 17:23:42 +05:30
Sarah Hoffmann
fffdfc9b88 add report on newest vulnerability 2023-11-20 10:44:48 +01:00
Sarah Hoffmann
6478409b05 improve code to collect the PostGIS version
The SQL contained an unchecked string literal, which may in theory be
used to attack the database.
2023-11-16 11:14:29 +01:00
Sarah Hoffmann
ee556fd42e Merge pull request #3248 from lonvia/fix-postcode-area-computation
PHP frontent: fix postcode area computation for address points
2023-11-15 22:10:21 +01:00
Sarah Hoffmann
9a1b8a67d6 adapt typing to newest version of SQLAlchemy 2023-11-15 19:59:26 +01:00
Sarah Hoffmann
383e3ccd25 php frontend: fix on-the-fly lookup of postcode areas 2023-11-15 17:45:12 +01:00
Sarah Hoffmann
b4ce1fb599 remove now unnecessary type igonre comment 2023-11-01 11:57:57 +01:00
Sarah Hoffmann
2bf8e62580 fix assertion on address list, it may be empty
Fixes #3237.
2023-10-31 21:10:54 +01:00
Sarah Hoffmann
afb439b089 Merge pull request #3235 from lonvia/fix-python-deploy
Python frontend deployment: add systemd service for the socket
2023-10-25 14:54:13 +02:00
Sarah Hoffmann
78a87ad16b python deployment: add systemd service for the socket 2023-10-25 11:31:42 +02:00
Sarah Hoffmann
5bf55a69a5 Merge pull request #3234 from lonvia/reduce-admin-style
Streamline admin style
2023-10-25 09:38:17 +02:00
Sarah Hoffmann
ca782e2f20 Merge pull request #3233 from lonvia/support-for-sqlite
Add support for SQLite DBs in frontend: reverse
2023-10-24 11:54:35 +02:00
Sarah Hoffmann
308de35802 further reduce admin style import to admin-only objects 2023-10-24 09:50:43 +02:00
Sarah Hoffmann
a9ac68a729 restrict geometry size for SQLite 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
84d6b481ae convert sqlite: add index on parent_place_id 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
613c8635a8 remove type info from SQLALchemy condition functions
A boolean type makes the SQLite dialect produce a costruct like
'func() = 1' in WHERE condition. While syntactically correct, it tends
to confuse the query planer.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
899a04ad26 make code work with Spatialite 4.3
Transfer is_address_point into SQLAlchemy function, so that
json.has_key() can use the older json_extract() function.
And work around broken Distance function.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
d8dca2a3a9 enable BDD tests for sqlite databases
The database must currently be created by hand and the name handed
in via -DAPI_TEST_DB='sqlite:...'.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
8216899a9a trim all coordinate output to 7 digits 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
b1d419f458 add indexing support for DWithin and intersects for sqlite 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
0417946153 make reverse API work with sqlite 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
1149578e8f actions: enable sqlite testing 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
37488ee82b try future annotations to resolve Generics compatibility 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
06bbd501fd make status API work with sqlite incl. unit tests 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
07e6c5cf69 make details API work with sqlite incl. unit tests 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
d0c91e4acf make lookup call work with sqlite
Includes porting unit tests.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
114cdafe7e add exporting of SQLite table 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
837bdecde8 add skeleton code for convert function 2023-10-23 17:19:12 +02:00
Sarah Hoffmann
d9d0e70e5b streamline SQLAlchemy DB schema
Remove all information not strictly used by the frontend as well as
any index information. This will make it easier to create a SQLite
database from the schema.
2023-10-23 17:19:12 +02:00
Sarah Hoffmann
1255efba7f remove unused code 2023-10-23 11:55:18 +02:00
Sarah Hoffmann
6ad397d4a9 Merge branch 'clean-deleted-relations' of https://github.com/lujoh/Nominatim into lujoh-clean-deleted-relations 2023-10-23 10:47:31 +02:00
Sarah Hoffmann
570ca22d71 Merge pull request #3228 from pawel-wroniszewski/fix/postcode-validation
Properly validate postcodes with country code
2023-10-23 10:35:42 +02:00
lujoh
418f381b49 made age a required argument for the -clean-deleted command 2023-10-20 15:31:55 -04:00
Paweł Wroniszewski
2cae37ccde Revert country settings 2023-10-20 12:50:28 +02:00
lujoh
650fbc2563 added --clean deleted command to the documentation 2023-10-18 02:05:46 -04:00
lujoh
9ec26c60ff adjusted tests for --clean-deleted-relations command 2023-10-17 23:03:37 -04:00
lujoh
06204dfcd8 moved sql function flush_deleted_places() to utils 2023-10-17 18:22:27 -04:00
Paweł Wroniszewski
fbe40e005d Properly validate postcodes with country code
Include postcode pattern in postcode normalisation regex, instead of
removing it from postcode pattern in config.

It properly handles postcode validation and normalization when country code
is part of the postcode, e.g. for Isle of Man, Jersey, Anguilla, Andorra,
Cayman Islands and more.

Fixes #3227.
2023-10-17 01:04:07 +02:00
lujoh
e9efef9095 added subcommand to clean deleted relations for issue # 2444 2023-10-16 11:30:58 -04:00
Sarah Hoffmann
95c3181a35 adapt typing for newer version of mypy 2023-10-16 17:03:48 +02:00
Sarah Hoffmann
12dbfb0777 Merge pull request #3222 from lonvia/fix-river-output
Make sure the place name always comes first in output
2023-10-10 11:20:12 +02:00
Sarah Hoffmann
b62dbd1f92 reduce influence of viewbox
Perfectly matching city names should still get priority.
2023-10-07 22:00:52 +02:00
Sarah Hoffmann
5011fde176 make sure the place name always comes first in output
Also deleted some now unused code.
2023-10-05 16:37:28 +02:00
Sarah Hoffmann
54cb9a33b1 Merge pull request #3212 from lonvia/more-tests
Add more unit tests for search
2023-10-05 16:36:53 +02:00
Sarah Hoffmann
f1fbcd863d switch back meaning of reverse and search in warm
Also do'n try to warm up searches on reverse-only databases.

Fixes #3213.
2023-10-04 15:19:24 +02:00
Sarah Hoffmann
b00b16aa3a more unit tests for search 2023-09-27 15:00:05 +02:00
Sarah Hoffmann
0f19695225 Merge pull request #3211 from lonvia/replace-get-addressdata
Move get_addressdata() implementation to Python
2023-09-26 21:29:15 +02:00
Sarah Hoffmann
7fcbe13669 move get_addressdata() implementation to Python
The pgsql function get_addressdata() does a lookup of a lot of data
that is already available in Python.
2023-09-26 11:21:36 +02:00
Sarah Hoffmann
87c91ec5c4 Merge pull request #3206 from lonvia/rerank-by-locale
Reintroduce result reranking
2023-09-21 09:24:18 +02:00
Sarah Hoffmann
0e10916b07 adapt tests to improved search 2023-09-20 14:58:54 +02:00
Sarah Hoffmann
21df87dedc filter duplicate results after DB query 2023-09-20 14:58:54 +02:00
Sarah Hoffmann
fd26310d6a rerank results by query
The algorithm is similar to the PHP reranking and uses the terms from
the display name to check against the query terms. However instead of
exact matching it uses a per-word-edit-distance, so that it is less
strict when it comes to mismatching accents or other one letter
differences.

Country names get a higher penalty because they don't receive a
penalty during token matching right now.

This will work badly with the legacy tokenizer. Given that it is
marked for removal, it is simply not worth optimising for it.
2023-09-20 14:52:05 +02:00
Sarah Hoffmann
5762a5bc80 move localization into add_result_details
This means that the locale now needs to be handed in into the search
functions already. At least search needs them for reranking.
2023-09-19 11:17:04 +02:00
Sarah Hoffmann
8106e67f14 Merge pull request #3201 from lonvia/tweak-expected-count
Reduce expected count for multi-part words
2023-09-17 17:04:27 +02:00
Sarah Hoffmann
f029fb3c65 'fix' issues with recent changes to psycopg2 typing
Some of the changes just make the warnings go away. The typing info
is still incorrect on the stub side, as far as I can determine.
2023-09-17 15:09:34 +02:00
Sarah Hoffmann
44da684d1d reduce expected count for multi-part words
Fixes #3196.
2023-09-11 17:45:34 +02:00
Marc Tobias
f59a072aa6 These days the OSM wikipedia tab no longer contains URLs 2023-07-07 17:36:11 +02:00
171 changed files with 4947 additions and 1787 deletions

7
.codespellrc Normal file
View File

@@ -0,0 +1,7 @@
# https://github.com/codespell-project/codespell
[codespell]
skip = ./man/nominatim.1,data,./docs/styles.css,lib-php,module,munin,osm2pgsql,./test,./settings/*.lua,./settings/*.yaml,./settings/**/*.yaml,./settings/icu-rules,./nominatim/tokenizer/token_analysis/config_variants.py
# Need to be lowercase in the list
# Unter = Unter den Linden (an example address)
ignore-words-list = inout,unter

View File

@@ -25,12 +25,12 @@ runs:
shell: bash
- name: Install${{ matrix.flavour }} prerequisites
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg aiosqlite
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
pip3 install sqlalchemy psycopg
pip3 install sqlalchemy psycopg aiosqlite
fi
shell: bash
env:

View File

@@ -11,7 +11,7 @@ jobs:
with:
submodules: true
- uses: actions/cache@v3
- uses: actions/cache@v4
with:
path: |
data/country_osm_grid.sql.gz
@@ -27,7 +27,7 @@ jobs:
mv nominatim-src.tar.bz2 Nominatim
- name: 'Upload Artifact'
uses: actions/upload-artifact@v3
uses: actions/upload-artifact@v4
with:
name: full-source
path: nominatim-src.tar.bz2
@@ -43,41 +43,29 @@ jobs:
ubuntu: 20
postgresql: '9.6'
postgis: '2.5'
php: '7.3'
lua: '5.1'
- flavour: ubuntu-20
ubuntu: 20
postgresql: 13
postgis: 3
php: '7.4'
lua: '5.3'
- flavour: ubuntu-22
ubuntu: 22
postgresql: 15
postgis: 3
php: '8.1'
lua: '5.3'
runs-on: ubuntu-${{ matrix.ubuntu }}.04
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
tools: phpunit:9, phpcs, composer
ini-values: opcache.jit=disable
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: 3.7
if: matrix.flavour == 'oldstuff'
@@ -113,22 +101,16 @@ jobs:
if: matrix.flavour == 'oldstuff'
- name: Install Python webservers
run: pip3 install falcon starlette
run: pip3 install falcon starlette asgi_lifespan
- name: Install latest pylint
run: pip3 install -U pylint asgi_lifespan
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
run: pip3 install -U pylint
if: matrix.flavour == 'ubuntu-22'
- name: Python linting
run: python3 -m pylint nominatim
working-directory: Nominatim
- name: PHP unit tests
run: phpunit ./
working-directory: Nominatim/test/php
if: matrix.flavour == 'ubuntu-22'
- name: Python unit tests
run: python3 -m pytest test/python
@@ -152,8 +134,12 @@ jobs:
needs: create-archive
runs-on: ubuntu-20.04
strategy:
matrix:
postgresql: ["13", "16"]
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: full-source
@@ -167,11 +153,13 @@ jobs:
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: 13
postgresql-version: ${{ matrix.postgresql }}
postgis-version: 3
- name: Install Postgresql server dev
run: sudo apt-get install postgresql-server-dev-13
run: sudo apt-get install postgresql-server-dev-$PGVER
env:
PGVER: ${{ matrix.postgresql }}
- uses: ./Nominatim/.github/actions/build-nominatim
with:
@@ -182,16 +170,16 @@ jobs:
- name: BDD tests (legacy tokenizer)
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy --format=progress3
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php -DTOKENIZER=legacy --format=progress3
working-directory: Nominatim/test/bdd
python-api-test:
php-test:
needs: create-archive
runs-on: ubuntu-22.04
steps:
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: full-source
@@ -203,6 +191,23 @@ jobs:
postgresql-version: 15
postgis-version: 3
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.1
tools: phpunit:9, phpcs, composer
ini-values: opcache.jit=disable
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
- name: PHP unit tests
run: phpunit ./
working-directory: Nominatim/test/php
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: 'ubuntu-22'
@@ -210,12 +215,9 @@ jobs:
- name: Install test prerequsites
run: sudo apt-get install -y -qq python3-behave
- name: Install Python webservers
run: pip3 install starlette asgi_lifespan httpx
- name: BDD tests (starlette)
- name: BDD tests (php)
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=starlette --format=progress3
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php --format=progress3
working-directory: Nominatim/test/bdd
@@ -265,7 +267,7 @@ jobs:
OS: ${{ matrix.name }}
INSTALL_MODE: ${{ matrix.install_mode }}
- uses: actions/download-artifact@v3
- uses: actions/download-artifact@v4
with:
name: full-source
path: /home/nominatim
@@ -346,3 +348,54 @@ jobs:
- name: Clean up database (reverse-only import)
run: nominatim refresh --postcodes --word-tokens
working-directory: /home/nominatim/nominatim-project
install-no-superuser:
runs-on: ubuntu-latest
needs: create-archive
steps:
- uses: actions/download-artifact@v4
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: 16
postgis-version: 3
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: ubuntu-22
lua: 5.3
- name: Prepare import environment
run: |
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
rm -rf Nominatim
- name: Prepare Database
run: |
nominatim import --prepare-database
- name: Create import user
run: |
sudo -u postgres createuser osm-import
psql -d nominatim -c "ALTER USER \"osm-import\" WITH PASSWORD 'osm-import'"
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "osm-import"'
- name: Run import
run: |
NOMINATIM_DATABASE_DSN="pgsql:host=127.0.0.1;dbname=nominatim;user=osm-import;password=osm-import" nominatim import --continue import-from-file --osm-file test.pbf
- name: Check full import
run: nominatim admin --check-database
codespell:
runs-on: ubuntu-latest
steps:
- uses: codespell-project/actions-codespell@v2
with:
only_warn: 1

View File

@@ -19,8 +19,8 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 4)
set(NOMINATIM_VERSION_MINOR 3)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION_MINOR 4)
set(NOMINATIM_VERSION_PATCH 1)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
@@ -82,13 +82,14 @@ endif()
# Setting PHP binary variable as to command line (prevailing) or auto detect
if (BUILD_API OR BUILD_IMPORTER)
if (BUILD_API)
if (NOT PHP_BIN)
find_program (PHP_BIN php)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHP_BIN})
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
message(WARNING "PHP binary not found. Only Python frontend can be used.")
set(PHP_BIN "")
else()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
@@ -226,7 +227,11 @@ if (BUILD_IMPORTER)
PATTERN "paths.py" EXCLUDE
PATTERN __pycache__ EXCLUDE)
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
if (EXISTS ${PHP_BIN})
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
else()
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py-no-php.tmpl paths-py.installed)
endif()
install(FILES ${PROJECT_BINARY_DIR}/paths-py.installed
DESTINATION ${NOMINATIM_LIBDIR}/lib-python/nominatim
RENAME paths.py)
@@ -254,7 +259,7 @@ if (BUILD_MODULE)
DESTINATION ${NOMINATIM_LIBDIR}/module)
endif()
if (BUILD_API)
if (BUILD_API AND EXISTS ${PHP_BIN})
install(DIRECTORY lib-php DESTINATION ${NOMINATIM_LIBDIR})
endif()

View File

@@ -69,7 +69,7 @@ Before submitting a pull request make sure that the tests pass:
Nominatim follows semantic versioning. Major releases are done for large changes
that require (or at least strongly recommend) a reimport of the databases.
Minor releases can usually be applied to exisiting databases. Patch releases
Minor releases can usually be applied to existing databases. Patch releases
contain bug fixes only and are released from a separate branch where the
relevant changes are cherry-picked from the master branch.

View File

@@ -1,3 +1,54 @@
4.4.1
* fix geocodejson output: admin level output should only print boundaries
* updating: restrict invalidation of child objects on large street features
* restrict valid interpolation house numbers to 0-999999
* fix import error when SQLAlchemy 1.4 and psycopg3 are installed
* various typo fixes in the documentation
4.4.0
* add export to SQLite database and SQLite support for the frontend
* switch to Python frontend as the default frontend
* update to osm2pgsql 1.11.0
* add support for new osm2pgsql middle table format
* simplify geometry for large polygon objects not used in addresses
* various performance tweaks for search in Python frontend
* fix regression in search with categories where it was confused with near
search
* partially roll back use of SQLAlchemy lambda statements due to bugs
in SQLAchemy
* fix handling of timezones for timestamps from the database
* fix handling of full address searches in connection with a viewbox
* fix postcode computation of highway areas
* fix handling of timeout errors for Python <= 3.10
* fix address computation for postcode areas
* fix variable shadowing in osm2pgsql flex script, causing bugs with LuaJIT
* make sure extratags are always null when empty
* reduce importance of places without wikipedia reference
* improve performance of word count computations
* drop support for wikipedia tags with full URLs
* replace get_addressdata() SQL implementation with a Python function
* improve display name for non-address features
* fix postcode validation for postcodes with country code
(thanks @pawel-wroniszewski)
* add possibility to run imports without superuser database rights
(thanks @robbe-haesendonck)
* new CLI command for cleaning deleted relations (thanks @lujoh)
* add check for database version in the CLI check command
* updates to import styles ignoring more unused objects
* various typo fixes (thanks @kumarUjjawal)
4.3.2
* fix potential SQL injection issue for 'nominatim admin --collect-os-info'
* PHP frontend: fix on-the-fly lookup of postcode areas near boundaries
* Python frontend: improve handling of viewbox
* Python frontend: correct deployment instructions
4.3.1
* reintroduce result rematching
* improve search of multi-part names
* fix accidentally switched meaning of --reverse-only and --search-only in
warm command
4.3.0
* fix failing importance recalculation command
* fix merging of linked names into unnamed boundaries
@@ -23,7 +74,7 @@
* new documentation section for library
* various smaller fixes to existing documentation
(thanks @woodpeck, @bloom256, @biswajit-k)
* updates to vagrant install scripts, drop support for Ubunut 18
* updates to vagrant install scripts, drop support for Ubuntu 18
(thanks @n-timofeev)
* removed obsolete configuration variables from env.defaults
* add script for generating a taginfo description (thanks @biswajit-k)
@@ -240,7 +291,7 @@
* increase splitting for large geometries to improve indexing speed
* remove deprecated get_magic_quotes_gpc() function
* make sure that all postcodes have an entry in word and are thus searchable
* remove use of ST_Covers in conjunction woth ST_Intersects,
* remove use of ST_Covers in conjunction with ST_Intersects,
causes bad query planning and slow updates in Postgis3
* update osm2pgsql
@@ -297,7 +348,7 @@
* exclude postcode ranges separated by colon from centre point calculation
* update osm2pgsql, better handling of imports without flatnode file
* switch to more efficient algorithm for word set computation
* use only boundries for country and state parts of addresses
* use only boundaries for country and state parts of addresses
* improve updates of addresses with housenumbers and interpolations
* remove country from place_addressline table and use country_code instead
* optimise indexes on search_name partition tables
@@ -336,7 +387,7 @@
* complete rewrite of reverse search algorithm
* add new geojson and geocodejson output formats
* add simple export script to exprot addresses to CSV
* add simple export script to export addresses to CSV
* remove is_in terms from address computation
* remove unused search_name_country tables
* various smaller fixes to query parsing
@@ -401,7 +452,7 @@
* move installation documentation into this repo
* add self-documenting vagrant scripts
* remove --create-website, recommend to use website directory in build
* add accessor functions for URL parameters and improve erro checking
* add accessor functions for URL parameters and improve error checking
* remove IP blocking and rate-limiting code
* enable CI via travis
* reformatting for more consistent coding style
@@ -412,7 +463,7 @@
* update to refactored osm2pgsql which use libosmium based types
* switch from osmosis to pyosmium for updates
* be more strict when matching against special search terms
* handle postcode entries with mutliple values correctly
* handle postcode entries with multiple values correctly
2.5

View File

@@ -9,10 +9,10 @@ versions.
| Version | End of support for security updates |
| ------- | ----------------------------------- |
| 4.4.x | 2026-03-07 |
| 4.3.x | 2025-09-07 |
| 4.2.x | 2024-11-24 |
| 4.1.x | 2024-08-05 |
| 4.0.x | 2023-11-02 |
## Reporting a Vulnerability
@@ -36,5 +36,6 @@ incident. Announcements will also be published at the
## List of Previous Incidents
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html)
* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)

2
Vagrantfile vendored
View File

@@ -38,7 +38,7 @@ Vagrant.configure("2") do |config|
lv.memory = 2048
lv.nested = true
if ENV['CHECKOUT'] != 'y' then
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs'
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs', nfs_udp: false
end
end

View File

@@ -0,0 +1,15 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Path settings for extra data used by Nominatim (installed version).
"""
from pathlib import Path
PHPLIB_DIR = None
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()

View File

@@ -5,6 +5,35 @@ your Nominatim database. It is assumed that you have already successfully
installed the Nominatim software itself, if not return to the
[installation page](Installation.md).
## Importing with a database user without superuser rights
Nominatim usually creates its own PostgreSQL database at the beginning of the
import process. This makes usage easier for the user but means that the
database user doing the import needs the appropriate rights.
If you prefer to run the import with a database user with limited rights,
you can do so by changing the import process as follows:
1. Run the command for database preparation with a database user with
superuser rights. For example, to use a db user 'dbadmin' for a
database 'nominatim', execute:
```
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=dbadmin" nominatim import --prepare-database
```
2. Grant the import user the right to create tables. For example, foe user 'import-user':
```
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "import-user"'
```
3. Now run the reminder of the import with the import user:
```
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=import-user" nominatim import --continue import-from-file --osm-file file.pbf
```
## Importing multiple regions (without updates)
To import multiple regions in your database you can simply give multiple

View File

@@ -43,6 +43,22 @@ virtualenv /srv/nominatim-venv
Next you need to set up the service that runs the Nominatim frontend. This is
easiest done with a systemd job.
First you need to tell systemd to create a socket file to be used by
hunicorn. Create the following file `/etc/systemd/system/nominatim.socket`:
``` systemd
[Unit]
Description=Gunicorn socket for Nominatim
[Socket]
ListenStream=/run/nominatim.sock
SocketUser=www-data
[Install]
WantedBy=multi-user.target
```
Now you can add the systemd service for Nominatim itself.
Create the following file `/etc/systemd/system/nominatim.service`:
``` systemd
@@ -74,12 +90,14 @@ its own Python process using
[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
connections to the database to serve requests in parallel.
Make the new service known to systemd and start it:
Make the new services known to systemd and start it:
``` sh
sudo systemctl daemon-reload
sudo systemctl enable nominatim
sudo systemctl start nominatim
sudo systemctl enable nominatim.socket
sudo systemctl start nominatim.socket
sudo systemctl enable nominatim.service
sudo systemctl start nominatim.service
```
This sets the service up, so that Nominatim is automatically started

View File

@@ -37,40 +37,6 @@ nominatim import --continue indexing
Otherwise it's best to start the full setup from the beginning.
### PHP "open_basedir restriction in effect" warnings
PHP Warning: file_get_contents(): open_basedir restriction in effect.
You need to adjust the
[open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir)
setting in your PHP configuration (`php.ini` file). By default this setting may
look like this:
open_basedir = /srv/http/:/home/:/tmp/:/usr/share/pear/
Either add reported directories to the list or disable this setting temporarily
by adding ";" at the beginning of the line. Don't forget to enable this setting
again once you are done with the PHP command line operations.
### PHP timezeone warnings
The Apache log may contain lots of PHP warnings like this:
`PHP Warning: date_default_timezone_set() function.`
You should set the default time zone as instructed in the warning in
your `php.ini` file. Find the entry about timezone and set it to
something like this:
; Defines the default timezone used by the date functions
; https://php.net/date.timezone
date.timezone = 'America/Denver'
Or
```
echo "date.timezone = 'America/Denver'" > /etc/php.d/timezone.ini
```
### nominatim.so version mismatch
@@ -170,7 +136,7 @@ recreate `nominatim.so`. Try
cmake $main_Nominatim_path && make
```
### Setup.php fails with "DB Error: extension not found"
### Setup fails with "DB Error: extension not found"
Make sure you have the PostgreSQL extensions "hstore" and "postgis" installed.
See the installation instructions for a full list of required packages.

View File

@@ -14,15 +14,15 @@ to a single Nominatim setup: configuration, extra data, etc. Create a project
directory apart from the Nominatim software and change into the directory:
```
mkdir ~/nominatim-planet
cd ~/nominatim-planet
mkdir ~/nominatim-project
cd ~/nominatim-project
```
In the following, we refer to the project directory as `$PROJECT_DIR`. To be
able to copy&paste instructions, you can export the appropriate variable:
```
export PROJECT_DIR=~/nominatim-planet
export PROJECT_DIR=~/nominatim-project
```
The Nominatim tool assumes per default that the current working directory is
@@ -153,7 +153,7 @@ if you plan to use the installation only for exports to a
[photon](https://photon.komoot.io/) database, then you can set up a database
without search indexes. Add `--reverse-only` to your setup command above.
This saves about 5% of disk space.
This saves about 5% of disk space, import time won't be significant faster.
### Filtering Imported Data
@@ -228,7 +228,7 @@ to load the OSM data into the PostgreSQL database. This step is very demanding
in terms of RAM usage. osm2pgsql and PostgreSQL are running in parallel at
this point. PostgreSQL blocks at least the part of RAM that has been configured
with the `shared_buffers` parameter during
[PostgreSQL tuning](Installation.md#postgresql-tuning)
[PostgreSQL tuning](Installation.md#tuning-the-postgresql-database)
and needs some memory on top of that. osm2pgsql needs at least 2GB of RAM for
its internal data structures, potentially more when it has to process very large
relations. In addition it needs to maintain a cache for node locations. The size
@@ -268,27 +268,10 @@ nominatim reverse --lat 51 --lon 45
```
If you want to run Nominatim as a service, you need to make a choice between
running the traditional PHP frontend or the new experimental Python frontend.
running the modern Python frontend and the legacy PHP frontend.
Make sure you have installed the right packages as per
[Installation](Installation.md#software).
#### Testing the PHP frontend
You can run a small test server with the PHP frontend like this:
```sh
nominatim serve
```
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
or, for reverse-only installations a reverse query,
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the PHP frontend](Deployment-PHP.md).
#### Testing the Python frontend
To run the test server against the Python frontend, you must choose a
@@ -296,10 +279,10 @@ web framework to use, either starlette or falcon. Make sure the appropriate
packages are installed. Then run
``` sh
nominatim serve --engine falcon
nominatim serve
```
or
or, if you prefer to use Starlette instead of Falcon as webserver,
``` sh
nominatim serve --engine starlette
@@ -314,6 +297,24 @@ Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the Python frontend](Deployment-Python.md).
#### Testing the PHP frontend
You can run a small test server with the PHP frontend like this:
```sh
nominatim serve --engine php
```
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
or, for reverse-only installations a reverse query,
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the PHP frontend](Deployment-PHP.md).
## Enabling search by category phrases

View File

@@ -55,23 +55,24 @@ For running Nominatim:
* [PyYaml](https://pyyaml.org/) (5.1+)
* [datrie](https://github.com/pytries/datrie)
When running the PHP frontend:
* [PHP](https://php.net) (7.3+)
* PHP-pgsql
* PHP-intl (bundled with PHP)
For running continuous updates:
* [pyosmium](https://osmcode.org/pyosmium/)
For running the experimental Python frontend:
For running the Python frontend:
* one of the following web frameworks:
* [falcon](https://falconframework.org/) (3.0+)
* [starlette](https://www.starlette.io/)
* [uvicorn](https://www.uvicorn.org/)
For running the legacy PHP frontend:
* [PHP](https://php.net) (7.3+)
* PHP-pgsql
* PHP-intl (bundled with PHP)
For dependencies for running tests and building documentation, see
the [Development section](../develop/Development-Environment.md).

View File

@@ -60,16 +60,13 @@ to finish the recomputation.
## Removing large deleted objects
Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up.
There is currently no command to do that. You can use the following SQL
query to force a deletion on all objects that have been deleted more than
a certain timespan ago (here: 1 month):
To run this command you will need to pass a PostgreSQL time interval. For example to
delete any objects that have been deleted more than a month ago you would run:
`nominatim admin --clean-deleted '1 month'`
```sql
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
and age(p.indexed_date) > '1 month'::interval
```

View File

@@ -149,6 +149,8 @@ In terms of address details the zoom levels are as follows:
|-----------| ----- | ------- |
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
**`[Python-only]`**
The layer filter allows to select places by themes.
The `address` layer contains all places that make up an address:
@@ -163,7 +165,7 @@ The `railway` layer includes railway infrastructure like tracks.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database.
The `natural` layer collects feautures like rivers, lakes and mountains while
The `natural` layer collects features like rivers, lakes and mountains while
the `manmade` layer functions as a catch-all for features not covered by the
other layers.

View File

@@ -165,7 +165,7 @@ results and thus give a preference to some results over others.
|-----------| ----- | ------- |
| countrycodes | comma-separated list of country codes | _unset_ |
Filer that limits the search results to one or more countries.
Filter that limits the search results to one or more countries.
The country code must be the
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
of the country, e.g. `gb` for the United Kingdom, `de` for Germany.
@@ -179,7 +179,7 @@ also excluded when the filter is set.
This parameter should not be confused with the 'country' parameter of
the structured query. The 'country' parameter contains a search term
and will be handled with some fuzziness. The `countrycodes` parameter
is a hard filter and as such should be prefered. Having both parameters
is a hard filter and as such should be preferred. Having both parameters
in the same query will work. If the parameters contradict each other,
the search will come up empty.
@@ -187,6 +187,8 @@ also excluded when the filter is set.
|-----------| ----- | ------- |
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
**`[Python-only]`**
The layer filter allows to select places by themes.
The `address` layer contains all places that make up an address:
@@ -201,7 +203,7 @@ The `railway` layer includes railway infrastructure like tracks.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database.
The `natural` layer collects feautures like rivers, lakes and mountains while
The `natural` layer collects features like rivers, lakes and mountains while
the `manmade` layer functions as a catch-all for features not covered by the
other layers.
@@ -215,7 +217,7 @@ the 'state', 'country' or 'city' part of an address. A featureType of
settlement selects any human inhabited feature from 'state' down to
'neighbourhood'.
When featureType ist set, then results are automatically restricted
When featureType is set, then results are automatically restricted
to the address layer (see above).
!!! tip
@@ -225,7 +227,7 @@ to the address layer (see above).
| Parameter | Value | Default |
|-----------| ----- | ------- |
| exclude_place_ids | comma-separeted list of place ids |
| exclude_place_ids | comma-separated list of place ids |
If you do not want certain OSM objects to appear in the search
result, give a comma separated list of the `place_id`s you want to skip.
@@ -246,7 +248,7 @@ box. `x` is longitude, `y` is latitude.
| bounded | 0 or 1 | 0 |
When set to 1, then it turns the 'viewbox' parameter (see above) into
a filter paramter, excluding any results outside the viewbox.
a filter parameter, excluding any results outside the viewbox.
When `bounded=1` is given and the viewbox is small enough, then an amenity-only
search is allowed. Give the special keyword for the amenity in square

View File

@@ -280,7 +280,7 @@ kinds of geometries can be used:
* __relation_as_multipolygon__ creates a (Multi)Polygon from the ways in
the relation. If the ways do not form a valid area, then the object is
silently discarded.
* __relation_as_multiline__ creates a (Mutli)LineString from the ways in
* __relation_as_multiline__ creates a (Multi)LineString from the ways in
the relations. Ways are combined as much as possible without any regards
to their order in the relation.

55
docs/customize/SQLite.md Normal file
View File

@@ -0,0 +1,55 @@
A Nominatim database can be converted into an SQLite database and used as
a read-only source for geocoding queries. This sections describes how to
create and use an SQLite database.
!!! danger
This feature is in an experimental state at the moment. Use at your own
risk.
## Installing prerequisites
To use a SQLite database, you need to install:
* SQLite (>= 3.30)
* Spatialite (> 5.0.0)
On Ubuntu/Debian, you can run:
sudo apt install sqlite3 libsqlite3-mod-spatialite libspatialite7
## Creating a new SQLite database
Nominatim cannot import directly into SQLite database. Instead you have to
first create a geocoding database in PostgreSQL by running a
[regular Nominatim import](../admin/Import.md).
Once this is done, the database can be converted to SQLite with
nominatim convert -o mydb.sqlite
This will create a database where all geocoding functions are available.
Depending on what functions you need, the database can be made smaller:
* `--without-reverse` omits indexes only needed for reverse geocoding
* `--without-search` omit tables and indexes used for forward search
* `--without-details` leaves out extra information only available in the
details API
## Using an SQLite database
Once you have created the database, you can use it by simply pointing the
database DSN to the SQLite file:
NOMINATIM_DATABASE_DSN=sqlite:dbname=mydb.sqlite
Please note that SQLite support is only available for the Python frontend. To
use the test server with an SQLite database, you therefore need to switch
the frontend engine:
nominatim serve --engine falcon
You need to install falcon or starlette for this, depending on which engine
you choose.
The CLI query commands and the library interface already use the new Python
frontend and therefore work right out of the box.

View File

@@ -394,7 +394,7 @@ The analyzer cannot be customized.
##### Postcode token analyzer
The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
a 'lookup' varaint of the token, which produces variants with optional
a 'lookup' variant of the token, which produces variants with optional
spaces. Use together with the clean-postcodes sanitizer.
The analyzer cannot be customized.

View File

@@ -129,7 +129,7 @@ sanitizers:
!!! warning
This example is just a simplified show case on how to create a sanitizer.
It is not really read for real-world use: while the sanitizer would
correcly transform `West 5th Street` into `5th Street`. it would also
correctly transform `West 5th Street` into `5th Street`. it would also
shorten a simple `North Street` to `Street`.
For more sanitizer examples, have a look at the sanitizers provided by Nominatim.

View File

@@ -10,7 +10,7 @@ There are two kind of tests in this test suite. There are functional tests
which test the API interface using a BDD test framework and there are unit
tests for specific PHP functions.
This test directory is sturctured as follows:
This test directory is structured as follows:
```
-+- bdd Functional API tests

View File

@@ -18,7 +18,7 @@ elseif (has 'addr:place'?) then (yes)
**with same name**;
kill
else (no)
:add addr:place to adress;
:add addr:place to address;
:**Use closest place**\n**rank 16 to 25**;
kill
endif

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 9.8 KiB

After

Width:  |  Height:  |  Size: 9.8 KiB

View File

@@ -14,7 +14,7 @@ in the database.
The library also misses a proper installation routine, so some manipulation
of the PYTHONPATH is required. At the moment, use is only recommended for
developers wit some experience in Python.
developers with some experience in Python.
## Installation

View File

@@ -22,8 +22,8 @@ nav:
- 'Basic Installation': 'admin/Installation.md'
- 'Import' : 'admin/Import.md'
- 'Update' : 'admin/Update.md'
- 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
- 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
- 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
- 'Maintenance' : 'admin/Maintenance.md'
@@ -40,6 +40,7 @@ nav:
- 'Special Phrases': 'customize/Special-Phrases.md'
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
- 'External data: Postcodes': 'customize/Postcodes.md'
- 'Conversion to SQLite': 'customize/SQLite.md'
- 'Library Guide':
- 'Getting Started': 'library/Getting-Started.md'
- 'Nominatim API class': 'library/NominatimAPI.md'

View File

@@ -187,6 +187,7 @@ BEGIN
-- --- Return the record for the base entry.
current_rank_address := 1000;
FOR location IN
SELECT placex.place_id, osm_type, osm_id, name,
coalesce(extratags->'linked_place', extratags->'place') as place_type,
@@ -261,7 +262,7 @@ BEGIN
-- If the place had a postcode assigned, take this one only
-- into consideration when it is an area and the place does not have
-- a postcode itself.
IF location.fromarea AND location.isaddress
IF location.fromarea AND location_isaddress
AND (place.address is null or not place.address ? 'postcode')
THEN
place.postcode := null; -- remove the less exact postcode

View File

@@ -62,10 +62,6 @@ BEGIN
WHILE langs[i] IS NOT NULL LOOP
wiki_article := extratags->(case when langs[i] in ('english','country') THEN 'wikipedia' ELSE 'wikipedia:'||langs[i] END);
IF wiki_article is not null THEN
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/wiki/',E'\\2:');
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/w/index.php\\?title=',E'\\2:');
wiki_article := regexp_replace(wiki_article,E'^(.*?)/([a-z]{2,3})/wiki/',E'\\2:');
--wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3})[=:]',E'\\2:');
wiki_article := replace(wiki_article,' ','_');
IF strpos(wiki_article, ':') IN (3,4) THEN
wiki_article_language := lower(trim(split_part(wiki_article, ':', 1)));
@@ -134,7 +130,7 @@ BEGIN
-- Still nothing? Fall back to a default.
IF result.importance is null THEN
result.importance := 0.75001 - (rank_search::float / 40);
result.importance := 0.40001 - (rank_search::float / 75);
END IF;
{% if 'secondary_importance' in db.tables %}

View File

@@ -219,10 +219,11 @@ BEGIN
-- formatted postcode and therefore 'postcode' contains a derived
-- variant.
CASE WHEN address ? 'postcode' THEN placex.postcode ELSE NULL::text END as postcode,
substring(address->'housenumber','[0-9]+')::integer as hnr
(address->'housenumber')::integer as hnr
FROM placex, generate_series(1, array_upper(waynodes, 1)) nodeidpos
WHERE osm_type = 'N' and osm_id = waynodes[nodeidpos]::BIGINT
and address is not NULL and address ? 'housenumber'
and address->'housenumber' ~ '^[0-9]{1,6}$'
and ST_Distance(NEW.linegeo, geometry) < 0.0005
ORDER BY nodeidpos
LOOP

View File

@@ -296,7 +296,9 @@ BEGIN
extratags = NEW.extratags,
admin_level = NEW.admin_level,
indexed_status = 2,
geometry = NEW.geometry
geometry = CASE WHEN existingplacex.rank_address = 0
THEN simplify_large_polygons(NEW.geometry)
ELSE NEW.geometry END
WHERE place_id = existingplacex.place_id;
-- Invalidate linked places: they potentially get a new name and addresses.
@@ -363,57 +365,3 @@ BEGIN
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;
-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;
-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

View File

@@ -2,7 +2,7 @@
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2022 by the Nominatim developer community.
-- Copyright (C) 2024 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- Trigger functions for the placex table.
@@ -119,12 +119,14 @@ CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
AS $$
DECLARE
location RECORD;
member JSONB;
parent RECORD;
result BIGINT;
distance FLOAT;
new_distance FLOAT;
waygeom GEOMETRY;
BEGIN
{% if db.middle_db_format == '1' %}
FOR location IN
SELECT members FROM planet_osm_rels
WHERE parts @> ARRAY[poi_osm_id]
@@ -161,6 +163,40 @@ BEGIN
END LOOP;
END LOOP;
{% else %}
FOR member IN
SELECT value FROM planet_osm_rels r, LATERAL jsonb_array_elements(members)
WHERE planet_osm_member_ids(members, poi_osm_type::char(1)) && ARRAY[poi_osm_id]
and tags->>'type' = 'associatedStreet'
and value->>'role' = 'street'
LOOP
FOR parent IN
SELECT place_id, geometry
FROM placex
WHERE osm_type = (member->>'type')::char(1)
and osm_id = (member->>'ref')::bigint
and name is not null
and rank_search between 26 and 27
LOOP
-- Find the closest 'street' member.
-- Avoid distance computation for the frequent case where there is
-- only one street member.
IF waygeom is null THEN
result := parent.place_id;
waygeom := parent.geometry;
ELSE
distance := coalesce(distance, ST_Distance(waygeom, bbox));
new_distance := ST_Distance(parent.geometry, bbox);
IF new_distance < distance THEN
distance := new_distance;
result := parent.place_id;
waygeom := parent.geometry;
END IF;
END IF;
END LOOP;
END LOOP;
{% endif %}
RETURN result;
END;
$$
@@ -257,7 +293,11 @@ CREATE OR REPLACE FUNCTION find_linked_place(bnd placex)
RETURNS placex
AS $$
DECLARE
{% if db.middle_db_format == '1' %}
relation_members TEXT[];
{% else %}
relation_members JSONB;
{% endif %}
rel_member RECORD;
linked_placex placex%ROWTYPE;
bnd_name TEXT;
@@ -678,6 +718,12 @@ BEGIN
NEW.country_code := NULL;
END IF;
-- Simplify polygons with a very large memory footprint when they
-- do not take part in address computation.
IF NEW.rank_address = 0 THEN
NEW.geometry := simplify_large_polygons(NEW.geometry);
END IF;
END IF;
{% if debug %}RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %}
@@ -685,10 +731,12 @@ BEGIN
{% if not disable_diff_updates %}
-- The following is not needed until doing diff updates, and slows the main index process down
IF NEW.rank_address > 0 THEN
IF NEW.rank_address between 2 and 27 THEN
IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN
-- Performance: We just can't handle re-indexing for country level changes
IF st_area(NEW.geometry) < 1 THEN
IF (NEW.rank_address < 26 and st_area(NEW.geometry) < 1)
OR (NEW.rank_address >= 26 and st_area(NEW.geometry) < 0.01)
THEN
-- mark items within the geometry for re-indexing
-- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
@@ -703,9 +751,11 @@ BEGIN
or name is not null
or (NEW.rank_address >= 16 and address ? 'place'));
END IF;
ELSE
ELSEIF ST_GeometryType(NEW.geometry) not in ('ST_LineString', 'ST_MultiLineString')
OR ST_Length(NEW.geometry) < 0.5
THEN
-- mark nearby items for re-indexing, where 'nearby' depends on the features rank_search and is a complete guess :(
diameter := update_place_diameter(NEW.rank_search);
diameter := update_place_diameter(NEW.rank_address);
IF diameter > 0 THEN
-- RAISE WARNING 'placex point insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,diameter;
IF NEW.rank_search >= 26 THEN
@@ -749,7 +799,11 @@ CREATE OR REPLACE FUNCTION placex_update()
DECLARE
i INTEGER;
location RECORD;
{% if db.middle_db_format == '1' %}
relation_members TEXT[];
{% else %}
relation_member JSONB;
{% endif %}
geom GEOMETRY;
parent_address_level SMALLINT;
@@ -794,6 +848,9 @@ BEGIN
result := deleteLocationArea(NEW.partition, NEW.place_id, NEW.rank_search);
NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
IF NEW.extratags = ''::hstore THEN
NEW.extratags := NULL;
END IF;
-- NEW.linked_place_id contains the precomputed linkee. Save this and restore
-- the previous link status.
@@ -968,6 +1025,7 @@ BEGIN
-- waterway ways are linked when they are part of a relation and have the same class/type
IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN
{% if db.middle_db_format == '1' %}
FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[]
LOOP
FOR i IN 1..array_upper(relation_members, 1) BY 2 LOOP
@@ -986,6 +1044,29 @@ BEGIN
END IF;
END LOOP;
END LOOP;
{% else %}
FOR relation_member IN
SELECT value FROM planet_osm_rels r, LATERAL jsonb_array_elements(r.members)
WHERE r.id = NEW.osm_id
LOOP
IF relation_member->>'role' IN ('', 'main_stream', 'side_stream')
and relation_member->>'type' = 'W'
THEN
{% if debug %}RAISE WARNING 'waterway parent %, child %', NEW.osm_id, relation_member;{% endif %}
FOR linked_node_id IN
SELECT place_id FROM placex
WHERE osm_type = 'W' and osm_id = (relation_member->>'ref')::bigint
and class = NEW.class and type in ('river', 'stream', 'canal', 'drain', 'ditch')
and (relation_member->>'role' != 'side_stream' or NEW.name->'name' = name->'name')
LOOP
UPDATE placex SET linked_place_id = NEW.place_id WHERE place_id = linked_node_id;
{% if 'search_name' in db.tables %}
DELETE FROM search_name WHERE place_id = linked_node_id;
{% endif %}
END LOOP;
END IF;
END LOOP;
{% endif %}
{% if debug %}RAISE WARNING 'Waterway processed';{% endif %}
END IF;
@@ -1188,6 +1269,8 @@ BEGIN
END IF;
ELSEIF NEW.rank_address > 25 THEN
max_rank := 25;
ELSEIF NEW.class in ('place','boundary') and NEW.type in ('postcode','postal_code') THEN
max_rank := NEW.rank_search;
ELSE
max_rank := NEW.rank_address;
END IF;
@@ -1241,7 +1324,9 @@ BEGIN
OR ST_GeometryType(NEW.geometry) not in ('ST_LineString','ST_MultiLineString')
OR ST_Length(NEW.geometry) < 0.02)
THEN
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
NEW.postcode := get_nearest_postcode(NEW.country_code,
CASE WHEN NEW.rank_address > 25
THEN NEW.centroid ELSE NEW.geometry END);
END IF;
{% if debug %}RAISE WARNING 'place update % % finished.', NEW.osm_type, NEW.osm_id;{% endif %}

View File

@@ -287,21 +287,19 @@ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
term_vectors TEXT[],
weight_vectors FLOAT[],
rankings TEXT,
def_weight FLOAT)
RETURNS FLOAT
AS $$
DECLARE
pos INT := 1;
terms TEXT;
rank JSON;
BEGIN
FOREACH terms IN ARRAY term_vectors
FOR rank IN
SELECT * FROM json_array_elements(rankings::JSON)
LOOP
IF search_vector @> terms::INTEGER[] THEN
RETURN weight_vectors[pos];
IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN
RETURN (rank->>0)::float;
END IF;
pos := pos + 1;
END LOOP;
RETURN def_weight;
END;

View File

@@ -73,6 +73,26 @@ END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION get_rel_node_members(members JSONB, memberLabels TEXT[])
RETURNS SETOF BIGINT
AS $$
DECLARE
member JSONB;
BEGIN
FOR member IN SELECT * FROM jsonb_array_elements(members)
LOOP
IF member->>'type' = 'N' and member->>'role' = ANY(memberLabels) THEN
RETURN NEXT (member->>'ref')::bigint;
END IF;
END LOOP;
RETURN;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Copy 'name' to or from the default language.
--
-- \param country_code Country code of the object being named.
@@ -416,6 +436,20 @@ END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION simplify_large_polygons(geometry GEOMETRY)
RETURNS GEOMETRY
AS $$
BEGIN
IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
and ST_MemSize(geometry) > 3000000
THEN
geometry := ST_SimplifyPreserveTopology(geometry, 0.0001);
END IF;
RETURN geometry;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION place_force_delete(placeid BIGINT)
RETURNS BOOLEAN
@@ -487,3 +521,56 @@ BEGIN
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;
-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;
-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

View File

@@ -23,6 +23,10 @@ CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
---
CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
USING GIST (geometry) {{db.tablespace.search_index}};
-- Index is needed during import but can be dropped as soon as a full
-- geometry index is in place. The partial index is almost as big as the full
-- index.
DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
---
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
ON placex USING gist (geometry) {{db.tablespace.search_index}}

View File

@@ -298,7 +298,15 @@ CREATE TABLE IF NOT EXISTS wikipedia_redirect (
-- osm2pgsql does not create indexes on the middle tables for Nominatim
-- Add one for lookup of associated street relations.
CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts) WHERE tags @> ARRAY['associatedStreet'];
{% if db.middle_db_format == '1' %}
CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts)
{{db.tablespace.address_index}}
WHERE tags @> ARRAY['associatedStreet'];
{% else %}
CREATE INDEX planet_osm_rels_relation_members_idx ON planet_osm_rels USING gin(planet_osm_member_ids(members, 'R'::character(1)))
WITH (fastupdate=off)
{{db.tablespace.address_index}};
{% endif %}
-- Needed for lookups if a node is part of an interpolation.
CREATE INDEX IF NOT EXISTS idx_place_interpolations

View File

@@ -1,40 +0,0 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2022 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS word;
CREATE TABLE word (
word_id INTEGER,
word_token text NOT NULL,
type text NOT NULL,
word text,
info jsonb
) {{db.tablespace.search_data}};
CREATE INDEX idx_word_word_token ON word
USING BTREE (word_token) {{db.tablespace.search_index}};
-- Used when updating country names from the boundary relation.
CREATE INDEX idx_word_country_names ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'C';
-- Used when inserting new postcodes on updates.
CREATE INDEX idx_word_postcodes ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'P';
-- Used when inserting full words.
CREATE INDEX idx_word_full_word ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'W';
-- Used when inserting analyzed housenumbers (exclude old-style entries).
CREATE INDEX idx_word_housenumbers ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'H' and word is not null;
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
DROP SEQUENCE IF EXISTS seq_word;
CREATE SEQUENCE seq_word start 1;
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";

View File

@@ -347,7 +347,7 @@ BEGIN
END LOOP;
END IF;
-- consider parts before an opening braket a full word as well
-- consider parts before an opening bracket a full word as well
words := regexp_split_to_array(value, E'[(]');
IF array_upper(words, 1) > 1 THEN
s := make_standard_name(words[1]);

View File

@@ -1,6 +1,6 @@
# just use the pgxs makefile
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "15" "14" "13" "12" "11" "10" "9.6")
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "16" "15" "14" "13" "12" "11" "10" "9.6")
list(APPEND PG_CONFIG_HINTS
"/usr/pgsql-${suffix}/bin")
endforeach()

View File

@@ -11,10 +11,12 @@
#include "mb/pg_wchar.h"
#include <utfasciitable.h>
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#if PG_MAJORVERSION_NUM > 15
#include "varatt.h"
#endif
PG_MODULE_MAGIC;
Datum transliteration( PG_FUNCTION_ARGS );
Datum gettokenstring( PG_FUNCTION_ARGS );
void str_replace(char* buffer, int* len, int* changes, char* from, int fromlen, char* to, int tolen, int);

View File

@@ -19,6 +19,7 @@ import sqlalchemy.ext.asyncio as sa_asyncio
from nominatim.errors import UsageError
from nominatim.db.sqlalchemy_schema import SearchTables
from nominatim.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
import nominatim.db.sqlite_functions
from nominatim.config import Configuration
from nominatim.api.connection import SearchConnection
from nominatim.api.status import get_status, StatusResult
@@ -81,41 +82,78 @@ class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
if self._engine:
return
dsn = self.config.get_database_params()
pool_size = self.config.get_int('API_POOL_SIZE')
extra_args: Dict[str, Any] = {'future': True,
'echo': self.config.get_bool('DEBUG_SQL')}
query = {k: v for k, v in dsn.items()
if k not in ('user', 'password', 'dbname', 'host', 'port')}
if self.config.get_int('API_POOL_SIZE') == 0:
extra_args['poolclass'] = sa.pool.NullPool
else:
extra_args['poolclass'] = sa.pool.AsyncAdaptedQueuePool
extra_args['max_overflow'] = 0
extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
dburl = sa.engine.URL.create(
f'postgresql+{PGCORE_LIB}',
database=dsn.get('dbname'),
username=dsn.get('user'), password=dsn.get('password'),
host=dsn.get('host'), port=int(dsn['port']) if 'port' in dsn else None,
query=query)
engine = sa_asyncio.create_async_engine(dburl, future=True,
max_overflow=0, pool_size=pool_size,
echo=self.config.get_bool('DEBUG_SQL'))
try:
async with engine.begin() as conn:
result = await conn.scalar(sa.text('SHOW server_version_num'))
server_version = int(result)
except (PGCORE_ERROR, sa.exc.OperationalError):
is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
if is_sqlite:
params = dict((p.split('=', 1)
for p in self.config.DATABASE_DSN[7:].split(';')))
dburl = sa.engine.URL.create('sqlite+aiosqlite',
database=params.get('dbname'))
if not ('NOMINATIM_DATABASE_RW' in self.config.environ
and self.config.get_bool('DATABASE_RW')) \
and not Path(params.get('dbname', '')).is_file():
raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
else:
dsn = self.config.get_database_params()
query = {k: v for k, v in dsn.items()
if k not in ('user', 'password', 'dbname', 'host', 'port')}
dburl = sa.engine.URL.create(
f'postgresql+{PGCORE_LIB}',
database=dsn.get('dbname'),
username=dsn.get('user'),
password=dsn.get('password'),
host=dsn.get('host'),
port=int(dsn['port']) if 'port' in dsn else None,
query=query)
engine = sa_asyncio.create_async_engine(dburl, **extra_args)
if is_sqlite:
server_version = 0
if server_version >= 110000:
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
nominatim.db.sqlite_functions.install_custom_functions(dbapi_con)
cursor = dbapi_con.cursor()
cursor.execute("SET jit_above_cost TO '-1'")
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
# Make sure that all connections get the new settings
await self.close()
cursor.execute("SELECT load_extension('mod_spatialite')")
cursor.execute('SELECT SetDecimalPrecision(7)')
dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
else:
try:
async with engine.begin() as conn:
result = await conn.scalar(sa.text('SHOW server_version_num'))
server_version = int(result)
if server_version >= 110000:
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
await conn.execute(sa.text(
"SET max_parallel_workers_per_gather TO '0'"))
except (PGCORE_ERROR, sa.exc.OperationalError):
server_version = 0
if server_version >= 110000:
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
cursor = dbapi_con.cursor()
cursor.execute("SET jit_above_cost TO '-1'")
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
self._property_cache['DB:server_version'] = server_version
self._tables = SearchTables(sa.MetaData(), engine.name) # pylint: disable=no-member
self._tables = SearchTables(sa.MetaData()) # pylint: disable=no-member
self._engine = engine
@@ -336,7 +374,7 @@ class NominatimAPI:
""" Close all active connections to the database.
This function also closes the asynchronous worker loop making
the NominatimAPI object unusuable.
the NominatimAPI object unusable.
"""
self._loop.run_until_complete(self._async_api.close())
self._loop.close()
@@ -409,7 +447,7 @@ class NominatimAPI:
place. Only meaning full for POI-like objects (places with a
rank_address of 30).
linked_place_id (Optional[int]): Internal ID of the place this object
linkes to. When this ID is set then there is no guarantee that
links to. When this ID is set then there is no guarantee that
the rest of the result information is complete.
admin_level (int): Value of the `admin_level` OSM tag. Only meaningful
for administrative boundary objects.

View File

@@ -13,6 +13,7 @@ import datetime as dt
import textwrap
import io
import re
import html
import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection
@@ -83,33 +84,49 @@ class BaseLogger:
def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable',
extra_params: Union[Mapping[str, Any],
Sequence[Mapping[str, Any]], None]) -> str:
""" Return the comiled version of the statement.
""" Return the compiled version of the statement.
"""
compiled = cast('sa.ClauseElement', statement).compile(conn.sync_engine)
params = dict(compiled.params)
if isinstance(extra_params, Mapping):
for k, v in extra_params.items():
params[k] = str(v)
if hasattr(v, 'to_wkt'):
params[k] = v.to_wkt()
elif isinstance(v, (int, float)):
params[k] = v
else:
params[k] = str(v)
elif isinstance(extra_params, Sequence) and extra_params:
for k in extra_params[0]:
params[k] = f':{k}'
sqlstr = str(compiled)
if sa.__version__.startswith('1'):
try:
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
return sqlstr % tuple((repr(params.get(name, None))
for name in compiled.positiontup)) # type: ignore
except TypeError:
return sqlstr
if conn.dialect.name == 'postgresql':
if sa.__version__.startswith('1'):
try:
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
return sqlstr % tuple((repr(params.get(name, None))
for name in compiled.positiontup)) # type: ignore
except TypeError:
return sqlstr
# Fixes an odd issue with Python 3.7 where percentages are not
# quoted correctly.
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
return sqlstr % params
# Fixes an odd issue with Python 3.7 where percentages are not
# quoted correctly.
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
return sqlstr % params
assert conn.dialect.name == 'sqlite'
# params in positional order
pparams = (repr(params.get(name, None)) for name in compiled.positiontup) # type: ignore
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', '?', sqlstr)
sqlstr = re.sub(r"\?", lambda m: next(pparams), sqlstr)
return sqlstr
class HTMLLogger(BaseLogger):
""" Logger that formats messages in HTML.
@@ -211,7 +228,7 @@ class HTMLLogger(BaseLogger):
HtmlFormatter(nowrap=True, lineseparator='<br />'))
self._write(f'<div class="highlight"><code class="lang-sql">{sqlstr}</code></div>')
else:
self._write(f'<code class="lang-sql">{sqlstr}</code>')
self._write(f'<code class="lang-sql">{html.escape(sqlstr)}</code>')
def _python_var(self, var: Any) -> str:
@@ -219,7 +236,7 @@ class HTMLLogger(BaseLogger):
fmt = highlight(str(var), PythonLexer(), HtmlFormatter(nowrap=True))
return f'<div class="highlight"><code class="lang-python">{fmt}</code></div>'
return f'<code class="lang-python">{str(var)}</code>'
return f'<code class="lang-python">{html.escape(str(var))}</code>'
def _write(self, text: str) -> None:
@@ -235,6 +252,10 @@ class TextLogger(BaseLogger):
self.buffer = io.StringIO()
def _timestamp(self) -> None:
self._write(f'[{dt.datetime.now()}]\n')
def get_buffer(self) -> str:
return self.buffer.getvalue()
@@ -247,6 +268,7 @@ class TextLogger(BaseLogger):
def section(self, heading: str) -> None:
self._timestamp()
self._write(f"\n# {heading}\n\n")
@@ -283,6 +305,7 @@ class TextLogger(BaseLogger):
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
self._timestamp()
self._write(f'{heading}:\n')
total = 0
for rank, res in results:
@@ -298,6 +321,7 @@ class TextLogger(BaseLogger):
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
self._timestamp()
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
self._write(f"| {sqlstr}\n\n")

View File

@@ -77,8 +77,8 @@ async def find_in_osmline(conn: SearchConnection, place: ntyp.PlaceRef,
sql = sql.where(t.c.osm_id == place.osm_id).limit(1)
if place.osm_class and place.osm_class.isdigit():
sql = sql.order_by(sa.func.greatest(0,
sa.func.least(int(place.osm_class) - t.c.endnumber),
t.c.startnumber - int(place.osm_class)))
int(place.osm_class) - t.c.endnumber,
t.c.startnumber - int(place.osm_class)))
else:
return None
@@ -163,11 +163,10 @@ async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
return sql.add_columns(sa.literal_column(f"""
ST_AsGeoJSON(CASE WHEN ST_NPoints({column.name}) > 5000
THEN ST_SimplifyPreserveTopology({column.name}, 0.0001)
ELSE {column.name} END)
""").label('geometry_geojson'))
return sql.add_columns(sa.func.ST_AsGeoJSON(
sa.case((sa.func.ST_NPoints(column) > 5000,
sa.func.ST_SimplifyPreserveTopology(column, 0.0001)),
else_=column), 7).label('geometry_geojson'))
else:
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
return sql.add_columns(sa.func.ST_GeometryType(column).label('geometry_type'))
@@ -183,9 +182,9 @@ async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
# add missing details
assert result is not None
result.parent_place_id = row.parent_place_id
result.linked_place_id = getattr(row, 'linked_place_id', None)
result.admin_level = getattr(row, 'admin_level', 15)
if 'type' in result.geometry:
result.geometry['type'] = GEOMETRY_TYPE_MAP.get(result.geometry['type'],
result.geometry['type'])
indexed_date = getattr(row, 'indexed_date', None)
if indexed_date is not None:
result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
@@ -211,13 +210,13 @@ async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if details.geometry_output & ntyp.GeometryFormat.TEXT:
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if details.geometry_output & ntyp.GeometryFormat.KML:
out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if details.geometry_output & ntyp.GeometryFormat.SVG:
out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
@@ -239,3 +238,14 @@ async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
await nres.add_result_details(conn, [result], details)
return result
GEOMETRY_TYPE_MAP = {
'POINT': 'ST_Point',
'MULTIPOINT': 'ST_MultiPoint',
'LINESTRING': 'ST_LineString',
'MULTILINESTRING': 'ST_MultiLineString',
'POLYGON': 'ST_Polygon',
'MULTIPOLYGON': 'ST_MultiPolygon',
'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
}

View File

@@ -5,7 +5,7 @@
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Helper classes and functions for formating results into API responses.
Helper classes and functions for formatting results into API responses.
"""
from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping
from collections import defaultdict

View File

@@ -11,14 +11,15 @@ Data classes are part of the public API while the functions are for
internal use only. That's why they are implemented as free-standing functions
instead of member functions.
"""
from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable
import enum
import dataclasses
import datetime as dt
import sqlalchemy as sa
from nominatim.typing import SaSelect, SaRow, SaColumn
from nominatim.typing import SaSelect, SaRow
from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.types import Point, Bbox, LookupDetails
from nominatim.api.connection import SearchConnection
from nominatim.api.logging import log
@@ -81,12 +82,6 @@ class AddressLine:
and its function as an address object. Most fields are optional.
Their presence depends on the kind and function of the address part.
"""
place_id: Optional[int]
""" Internal ID of the place.
"""
osm_object: Optional[Tuple[str, int]]
""" OSM type and ID of the place, if such an object exists.
"""
category: Tuple[str, str]
""" Main category of the place, described by a key-value pair.
"""
@@ -94,16 +89,6 @@ class AddressLine:
""" All available names for the place including references, alternative
names and translations.
"""
extratags: Optional[Dict[str, str]]
""" Any extra information available about the place. This is a dictionary
that usually contains OSM tag key-value pairs.
"""
admin_level: Optional[int]
""" The administrative level of a boundary as tagged in the input data.
This field is only meaningful for places of the category
(boundary, administrative).
"""
fromarea: bool
""" If true, then the exact area of the place is known. Without area
information, Nominatim has to make an educated guess if an address
@@ -123,6 +108,22 @@ class AddressLine:
distance: float
""" Distance in degrees between the result place and this address part.
"""
place_id: Optional[int] = None
""" Internal ID of the place.
"""
osm_object: Optional[Tuple[str, int]] = None
""" OSM type and ID of the place, if such an object exists.
"""
extratags: Optional[Dict[str, str]] = None
""" Any extra information available about the place. This is a dictionary
that usually contains OSM tag key-value pairs.
"""
admin_level: Optional[int] = None
""" The administrative level of a boundary as tagged in the input data.
This field is only meaningful for places of the category
(boundary, administrative).
"""
local_name: Optional[str] = None
""" Place holder for localization of this address part. See
@@ -184,6 +185,9 @@ class BaseResult:
place_id : Optional[int] = None
osm_object: Optional[Tuple[str, int]] = None
parent_place_id: Optional[int] = None
linked_place_id: Optional[int] = None
admin_level: int = 15
locale_name: Optional[str] = None
display_name: Optional[str] = None
@@ -229,7 +233,7 @@ class BaseResult:
of the value or an artificial value computed from the place's
search rank.
"""
return self.importance or (0.7500001 - (self.rank_search/40.0))
return self.importance or (0.40001 - (self.rank_search/75.0))
def localize(self, locales: Locales) -> None:
@@ -251,9 +255,6 @@ class DetailedResult(BaseResult):
""" A search result with more internal information from the database
added.
"""
parent_place_id: Optional[int] = None
linked_place_id: Optional[int] = None
admin_level: int = 15
indexed_date: Optional[dt.datetime] = None
@@ -292,12 +293,6 @@ class SearchResults(List[SearchResult]):
May be empty when no result was found.
"""
def localize(self, locales: Locales) -> None:
""" Apply the given locales to all results.
"""
for result in self:
result.localize(locales)
def _filter_geometries(row: SaRow) -> Dict[str, str]:
return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
@@ -317,6 +312,9 @@ def create_from_placex_row(row: Optional[SaRow],
place_id=row.place_id,
osm_object=(row.osm_type, row.osm_id),
category=(row.class_, row.type),
parent_place_id = row.parent_place_id,
linked_place_id = getattr(row, 'linked_place_id', None),
admin_level = getattr(row, 'admin_level', 15),
names=_mingle_name_tags(row.name),
address=row.address,
extratags=row.extratags,
@@ -347,6 +345,7 @@ def create_from_osmline_row(row: Optional[SaRow],
res = class_type(source_table=SourceTable.OSMLINE,
place_id=row.place_id,
parent_place_id = row.parent_place_id,
osm_object=('W', row.osm_id),
category=('place', 'houses' if hnr is None else 'house'),
address=row.address,
@@ -383,6 +382,7 @@ def create_from_tiger_row(row: Optional[SaRow],
res = class_type(source_table=SourceTable.TIGER,
place_id=row.place_id,
parent_place_id = row.parent_place_id,
osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
category=('place', 'houses' if hnr is None else 'house'),
postcode=row.postcode,
@@ -411,6 +411,7 @@ def create_from_postcode_row(row: Optional[SaRow],
return class_type(source_table=SourceTable.POSTCODE,
place_id=row.place_id,
parent_place_id = row.parent_place_id,
category=('place', 'postcode'),
names={'ref': row.postcode},
rank_search=row.rank_search,
@@ -434,7 +435,8 @@ def create_from_country_row(row: Optional[SaRow],
centroid=Point.from_wkb(row.centroid),
names=row.name,
rank_address=4, rank_search=4,
country_code=row.country_code)
country_code=row.country_code,
geometry=_filter_geometries(row))
async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
@@ -459,19 +461,24 @@ async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
log().comment('Query keywords')
for result in results:
await complete_keywords(conn, result)
for result in results:
result.localize(details.locales)
def _result_row_to_address_row(row: SaRow) -> AddressLine:
""" Create a new AddressLine from the results of a datbase query.
def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
""" Create a new AddressLine from the results of a database query.
"""
extratags: Dict[str, str] = getattr(row, 'extratags', {})
if hasattr(row, 'place_type') and row.place_type:
extratags['place'] = row.place_type
extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
if 'linked_place' in extratags:
extratags['place'] = extratags['linked_place']
names = _mingle_name_tags(row.name) or {}
if getattr(row, 'housenumber', None) is not None:
names['housenumber'] = row.housenumber
if isaddress is None:
isaddress = getattr(row, 'isaddress', True)
return AddressLine(place_id=row.place_id,
osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
category=(getattr(row, 'class'), row.type),
@@ -479,109 +486,216 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
extratags=extratags,
admin_level=row.admin_level,
fromarea=row.fromarea,
isaddress=getattr(row, 'isaddress', True),
isaddress=isaddress,
rank_address=row.rank_address,
distance=row.distance)
def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
places = []
hnrs = []
for result in results:
if result.place_id:
housenumber = -1
if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
if result.housenumber is not None:
housenumber = int(result.housenumber)
elif result.extratags is not None and 'startnumber' in result.extratags:
# details requests do not come with a specific house number
housenumber = int(result.extratags['startnumber'])
places.append(result.place_id)
hnrs.append(housenumber)
def _get_address_lookup_id(result: BaseResultT) -> int:
assert result.place_id
if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
return result.parent_place_id or result.place_id
return places, hnrs
return result.linked_place_id or result.place_id
async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
assert result.address_rows is not None
if result.category[0] not in ('boundary', 'place')\
or result.category[1] not in ('postal_code', 'postcode'):
postcode = result.postcode
if not postcode and result.address:
postcode = result.address.get('postcode')
if postcode and ',' not in postcode and ';' not in postcode:
result.address_rows.append(AddressLine(
category=('place', 'postcode'),
names={'ref': postcode},
fromarea=False, isaddress=True, rank_address=5,
distance=0.0))
if result.country_code:
async def _get_country_names() -> Optional[Dict[str, str]]:
t = conn.t.country_name
sql = sa.select(t.c.name, t.c.derived_name)\
.where(t.c.country_code == result.country_code)
for cres in await conn.execute(sql):
names = cast(Dict[str, str], cres[0])
if cres[1]:
names.update(cast(Dict[str, str], cres[1]))
return names
return None
country_names = await conn.get_cached_value('COUNTRY_NAME',
result.country_code,
_get_country_names)
if country_names:
result.address_rows.append(AddressLine(
category=('place', 'country'),
names=country_names,
fromarea=False, isaddress=True, rank_address=4,
distance=0.0))
result.address_rows.append(AddressLine(
category=('place', 'country_code'),
names={'ref': result.country_code}, extratags = {},
fromarea=True, isaddress=False, rank_address=4,
distance=0.0))
def _setup_address_details(result: BaseResultT) -> None:
""" Retrieve information about places that make up the address of the result.
"""
result.address_rows = AddressLines()
if result.names:
result.address_rows.append(AddressLine(
place_id=result.place_id,
osm_object=result.osm_object,
category=result.category,
names=result.names,
extratags=result.extratags or {},
admin_level=result.admin_level,
fromarea=True, isaddress=True,
rank_address=result.rank_address, distance=0.0))
if result.source_table == SourceTable.PLACEX and result.address:
housenumber = result.address.get('housenumber')\
or result.address.get('streetnumber')\
or result.address.get('conscriptionnumber')
elif result.housenumber:
housenumber = result.housenumber
else:
housenumber = None
if housenumber:
result.address_rows.append(AddressLine(
category=('place', 'house_number'),
names={'ref': housenumber},
fromarea=True, isaddress=True, rank_address=28, distance=0))
if result.address and '_unlisted_place' in result.address:
result.address_rows.append(AddressLine(
category=('place', 'locality'),
names={'name': result.address['_unlisted_place']},
fromarea=False, isaddress=True, rank_address=25, distance=0))
async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
""" Retrieve information about places that make up the address of the result.
"""
places, hnrs = _get_housenumber_details(results)
for result in results:
_setup_address_details(result)
if not places:
### Lookup entries from place_address line
lookup_ids = [{'pid': r.place_id,
'lid': _get_address_lookup_id(r),
'names': list(r.address.values()) if r.address else [],
'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
for r in results if r.place_id]
if not lookup_ids:
return
def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
return sa.func.get_addressdata(place_id, hnr)\
.table_valued( # type: ignore[no-untyped-call]
sa.column('place_id', type_=sa.Integer),
'osm_type',
sa.column('osm_id', type_=sa.BigInteger),
sa.column('name', type_=conn.t.types.Composite),
'class', 'type', 'place_type',
sa.column('admin_level', type_=sa.Integer),
sa.column('fromarea', type_=sa.Boolean),
sa.column('isaddress', type_=sa.Boolean),
sa.column('rank_address', type_=sa.SmallInteger),
sa.column('distance', type_=sa.Float),
joins_implicitly=True)
ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
.table_valued(sa.column('value', type_=sa.JSON))
t = conn.t.placex
taddr = conn.t.addressline
if len(places) == 1:
# Optimized case for exactly one result (reverse)
sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
.order_by(sa.column('rank_address').desc(),
sa.column('isaddress').desc())
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.extratags,
t.c.admin_level, taddr.c.fromarea,
sa.case((t.c.rank_address == 11, 5),
else_=t.c.rank_address).label('rank_address'),
taddr.c.distance, t.c.country_code, t.c.postcode)\
.join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
.join(t, taddr.c.address_place_id == t.c.place_id)\
.order_by('src_place_id')\
.order_by(sa.column('rank_address').desc())\
.order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
.order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
(taddr.c.isaddress, 0),
(sa.and_(taddr.c.fromarea,
t.c.geometry.ST_Contains(
sa.func.ST_GeomFromEWKT(
ltab.c.value['c'].as_string()))), 1),
else_=-1).desc())\
.order_by(taddr.c.fromarea.desc())\
.order_by(taddr.c.distance.desc())\
.order_by(t.c.rank_search.desc())
alines = AddressLines()
for row in await conn.execute(sql):
alines.append(_result_row_to_address_row(row))
for result in results:
if result.place_id == places[0]:
result.address_rows = alines
return
darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
.table_valued( # type: ignore[no-untyped-call]
sa.column('place_id', type_= sa.Integer),
sa.column('housenumber', type_= sa.Integer)
).render_derived()
sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
.order_by(darray.c.place_id,
sa.column('rank_address').desc(),
sa.column('isaddress').desc())
current_result = None
current_rank_address = -1
for row in await conn.execute(sql):
if current_result is None or row.result_place_id != current_result.place_id:
for result in results:
if result.place_id == row.result_place_id:
current_result = result
break
if current_result is None or row.src_place_id != current_result.place_id:
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
assert current_result is not None
current_rank_address = -1
location_isaddress = row.rank_address != current_rank_address
if current_result.country_code is None and row.country_code:
current_result.country_code = row.country_code
if row.type in ('postcode', 'postal_code') and location_isaddress:
if not row.fromarea or \
(current_result.address and 'postcode' in current_result.address):
location_isaddress = False
else:
assert False
current_result.address_rows = AddressLines()
current_result.address_rows.append(_result_row_to_address_row(row))
current_result.postcode = None
assert current_result.address_rows is not None
current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
current_rank_address = row.rank_address
for result in results:
await _finalize_entry(conn, result)
### Finally add the record for the parent entry where necessary.
parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
if parent_lookup_ids:
ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
.table_valued(sa.column('value', type_=sa.JSON))
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.extratags,
t.c.admin_level,
t.c.rank_address)\
.where(t.c.place_id == ltab.c.value['lid'].as_integer())
for row in await conn.execute(sql):
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
assert current_result is not None
assert current_result.address_rows is not None
current_result.address_rows.append(AddressLine(
place_id=row.place_id,
osm_object=(row.osm_type, row.osm_id),
category=(row.class_, row.type),
names=row.name, extratags=row.extratags or {},
admin_level=row.admin_level,
fromarea=True, isaddress=True,
rank_address=row.rank_address, distance=0.0))
### Now sort everything
def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
for result in results:
assert result.address_rows is not None
result.address_rows.sort(key=mk_sort_key(result.place_id))
# pylint: disable=consider-using-f-string
def _placex_select_address_row(conn: SearchConnection,
centroid: Point) -> SaSelect:
t = conn.t.placex
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_.label('class'), t.c.type,
t.c.admin_level, t.c.housenumber,
sa.literal_column("""ST_GeometryType(geometry) in
('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
t.c.geometry.is_area().label('fromarea'),
t.c.rank_address,
sa.literal_column(
"""ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
""" % centroid).label('distance'))
t.c.geometry.distance_spheroid(
sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
@@ -615,10 +729,10 @@ async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
for name_tokens, address_tokens in await conn.execute(sql):
for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
result.name_keywords.append(WordInfo(*row))
for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
result.address_keywords.append(WordInfo(*row))

View File

@@ -19,7 +19,6 @@ import nominatim.api.results as nres
from nominatim.api.logging import log
from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
from nominatim.db.sqlalchemy_types import Geometry
import nominatim.db.sqlalchemy_functions as snfn
# In SQLAlchemy expression which compare with NULL need to be expressed with
# the equal sign.
@@ -57,6 +56,7 @@ def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
t.c.importance, t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
centroid,
t.c.linked_place_id, t.c.admin_level,
distance.label('distance'),
t.c.geometry.ST_Expand(0).label('bbox'))
@@ -84,12 +84,6 @@ def _locate_interpolation(table: SaFromClause) -> SaLabel:
else_=0).label('position')
def _is_address_point(table: SaFromClause) -> SaColumn:
return sa.and_(table.c.rank_address == 30,
sa.or_(table.c.housenumber != None,
table.c.name.has_key('addr:housename')))
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
return min(rows, key=lambda row: 1000 if row is None else row.distance)
@@ -146,13 +140,13 @@ class ReverseGeocoder:
col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
if self.params.geometry_output & GeometryFormat.GEOJSON:
out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if self.params.geometry_output & GeometryFormat.TEXT:
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if self.params.geometry_output & GeometryFormat.KML:
out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if self.params.geometry_output & GeometryFormat.SVG:
out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
@@ -181,12 +175,12 @@ class ReverseGeocoder:
t = self.conn.t.placex
# PostgreSQL must not get the distance as a parameter because
# there is a danger it won't be able to proberly estimate index use
# there is a danger it won't be able to properly estimate index use
# when used with prepared statements
diststr = sa.text(f"{distance}")
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
.where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
.where(t.c.geometry.within_distance(WKT_PARAM, diststr))
.where(t.c.indexed_status == 0)
.where(t.c.linked_place_id == None)
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
@@ -203,7 +197,7 @@ class ReverseGeocoder:
max_rank = min(29, self.max_rank)
restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
if self.max_rank == 30:
restrict.append(lambda: _is_address_point(t))
restrict.append(lambda: sa.func.IsAddressPoint(t))
if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
t.c.class_.not_in(('place', 'building')),
@@ -224,17 +218,21 @@ class ReverseGeocoder:
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.placex
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
.where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
.where(t.c.parent_place_id == parent_place_id)
.where(_is_address_point(t))
.where(t.c.indexed_status == 0)
.where(t.c.linked_place_id == None)
.order_by('distance')
.limit(1))
def _base_query() -> SaSelect:
return _select_from_placex(t)\
.where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.where(sa.func.IsAddressPoint(t))\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.order_by('distance')\
.limit(1)
sql: SaLambdaSelect
if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
sql = self._add_geometry_columns(_base_query(), t.c.geometry)
else:
sql = sa.lambda_stmt(_base_query)
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
@@ -243,30 +241,26 @@ class ReverseGeocoder:
distance: float) -> Optional[SaRow]:
t = self.conn.t.osmline
sql: Any = sa.lambda_stmt(lambda:
sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
.where(t.c.startnumber != None)
.order_by('distance')
.limit(1))
sql = sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
.where(t.c.startnumber != None)\
.order_by('distance')\
.limit(1)
if parent_place_id is not None:
sql += lambda s: s.where(t.c.parent_place_id == parent_place_id)
sql = sql.where(t.c.parent_place_id == parent_place_id)
def _wrap_query(base_sql: SaLambdaSelect) -> SaSelect:
inner = base_sql.subquery('ipol')
inner = sql.subquery('ipol')
return sa.select(inner.c.place_id, inner.c.osm_id,
sql = sa.select(inner.c.place_id, inner.c.osm_id,
inner.c.parent_place_id, inner.c.address,
_interpolated_housenumber(inner),
_interpolated_position(inner),
inner.c.postcode, inner.c.country_code,
inner.c.distance)
sql += _wrap_query
if self.has_geometries():
sub = sql.subquery('geom')
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
@@ -281,7 +275,7 @@ class ReverseGeocoder:
inner = sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
.where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.order_by('distance')\
.limit(1)\
@@ -294,11 +288,12 @@ class ReverseGeocoder:
inner.c.postcode,
inner.c.distance)
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
sql: SaLambdaSelect
if self.has_geometries():
sub = sql.subquery('geom')
sub = _base_query().subquery('geom')
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
else:
sql = sa.lambda_stmt(_base_query)
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
@@ -370,7 +365,7 @@ class ReverseGeocoder:
inner = sa.select(t, sa.literal(0.0).label('distance'))\
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
.where(t.c.geometry.intersects(WKT_PARAM))\
.where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\
.where(sa.func.PlacexGeometryReverseLookuppolygon())\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('area')
@@ -400,10 +395,7 @@ class ReverseGeocoder:
.where(t.c.rank_search > address_rank)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
.where(t.c.indexed_status == 0)\
.where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
.where(t.c.geometry
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
.intersects(WKT_PARAM))\
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('places')
@@ -412,13 +404,15 @@ class ReverseGeocoder:
return _select_from_placex(inner, False)\
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
.where(touter.c.place_id == address_id)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql = sa.lambda_stmt(_place_inside_area_query)
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('places.geometry'))
sql = self._add_geometry_columns(_place_inside_area_query(),
sa.literal_column('places.geometry'))
else:
sql = sa.lambda_stmt(_place_inside_area_query)
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (place node)', place_address_row)
@@ -439,10 +433,9 @@ class ReverseGeocoder:
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.where(self._filter_by_layer(t))\
.where(t.c.geometry
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
.intersects(WKT_PARAM))\
.where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
.order_by(sa.desc(t.c.rank_search))\
.order_by('distance')\
.limit(50)\
.subquery()
@@ -513,22 +506,22 @@ class ReverseGeocoder:
.where(t.c.rank_search <= MAX_RANK_PARAM)\
.where(t.c.indexed_status == 0)\
.where(t.c.country_code.in_(ccodes))\
.where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
.where(t.c.geometry
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
.intersects(WKT_PARAM))\
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('area')
return _select_from_placex(inner, False)\
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
sql: SaLambdaSelect
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
sql = self._add_geometry_columns(_base_query(),
sa.literal_column('area.geometry'))
else:
sql = sa.lambda_stmt(_base_query)
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (addressable place node)', address_row)
@@ -537,16 +530,19 @@ class ReverseGeocoder:
if address_row is None:
# Still nothing, then return a country with the appropriate country code.
sql = sa.lambda_stmt(lambda: _select_from_placex(t)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)\
.where(t.c.rank_search == 4)\
.where(t.c.linked_place_id == None)\
.order_by('distance')\
.limit(1))
def _country_base_query() -> SaSelect:
return _select_from_placex(t)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)\
.where(t.c.rank_search == 4)\
.where(t.c.linked_place_id == None)\
.order_by('distance')\
.limit(1)
if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
else:
sql = sa.lambda_stmt(_country_base_query)
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()

View File

@@ -5,9 +5,9 @@
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Convertion from token assignment to an abstract DB search.
Conversion from token assignment to an abstract DB search.
"""
from typing import Optional, List, Tuple, Iterator
from typing import Optional, List, Tuple, Iterator, Dict
import heapq
from nominatim.api.types import SearchDetails, DataLayer
@@ -15,6 +15,7 @@ from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange
from nominatim.api.search.token_assignment import TokenAssignment
import nominatim.api.search.db_search_fields as dbf
import nominatim.api.search.db_searches as dbs
import nominatim.api.search.db_search_lookups as lookups
def wrap_near_search(categories: List[Tuple[str, str]],
@@ -89,12 +90,14 @@ class SearchBuilder:
if sdata is None:
return
categories = self.get_search_categories(assignment)
near_items = self.get_near_items(assignment)
if near_items is not None and not near_items:
return # impossible compbination of near items and category parameter
if assignment.name is None:
if categories and not sdata.postcodes:
sdata.qualifiers = categories
categories = None
if near_items and not sdata.postcodes:
sdata.qualifiers = near_items
near_items = None
builder = self.build_poi_search(sdata)
elif assignment.housenumber:
hnr_tokens = self.query.get_tokens(assignment.housenumber,
@@ -102,16 +105,19 @@ class SearchBuilder:
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
else:
builder = self.build_special_search(sdata, assignment.address,
bool(categories))
bool(near_items))
else:
builder = self.build_name_search(sdata, assignment.name, assignment.address,
bool(categories))
bool(near_items))
if categories:
penalty = min(categories.penalties)
categories.penalties = [p - penalty for p in categories.penalties]
if near_items:
penalty = min(near_items.penalties)
near_items.penalties = [p - penalty for p in near_items.penalties]
for search in builder:
yield dbs.NearSearch(penalty + assignment.penalty, categories, search)
search_penalty = search.penalty
search.penalty = 0.0
yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
near_items, search)
else:
for search in builder:
search.penalty += assignment.penalty
@@ -147,7 +153,7 @@ class SearchBuilder:
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
[t.token for r in address
for t in self.query.get_partials_list(r)],
'restrict')]
lookups.Restrict)]
penalty += 0.2
yield dbs.PostcodeSearch(penalty, sdata)
@@ -157,23 +163,28 @@ class SearchBuilder:
""" Build a simple address search for special entries where the
housenumber is the main name token.
"""
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')]
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
expected_count = sum(t.count for t in hnrs)
partials = [t for trange in address
for t in self.query.get_partials_list(trange)]
partials = {t.token: t.count for trange in address
for t in self.query.get_partials_list(trange)}
if len(partials) != 1 or partials[0].count < 10000:
if expected_count < 8000:
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
[t.token for t in partials], 'lookup_all'))
list(partials), lookups.Restrict))
elif len(partials) != 1 or list(partials.values())[0] < 10000:
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
list(partials), lookups.LookupAll))
else:
addr_fulls = [t.token for t
in self.query.get_tokens(address[0], TokenType.WORD)]
if len(addr_fulls) > 5:
return
sdata.lookups.append(
dbf.FieldLookup('nameaddress_vector',
[t.token for t
in self.query.get_tokens(address[0], TokenType.WORD)],
'lookup_any'))
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny))
sdata.housenumbers = dbf.WeightedStrings([], [])
yield dbs.PlaceSearch(0.05, sdata, sum(t.count for t in hnrs))
yield dbs.PlaceSearch(0.05, sdata, expected_count)
def build_name_search(self, sdata: dbf.SearchData,
@@ -198,45 +209,48 @@ class SearchBuilder:
are and tries to find a lookup that optimizes index use.
"""
penalty = 0.0 # extra penalty
name_partials = self.query.get_partials_list(name)
name_tokens = [t.token for t in name_partials]
name_partials = {t.token: t for t in self.query.get_partials_list(name)}
addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
addr_tokens = [t.token for t in addr_partials]
addr_tokens = list({t.token for t in addr_partials})
partials_indexed = all(t.is_indexed for t in name_partials) \
partials_indexed = all(t.is_indexed for t in name_partials.values()) \
and all(t.is_indexed for t in addr_partials)
exp_count = min(t.count for t in name_partials)
exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
if (len(name_partials) > 3 or exp_count < 3000) and partials_indexed:
yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
return
# Partial term to frequent. Try looking up by rare full names first.
name_fulls = self.query.get_tokens(name, TokenType.WORD)
fulls_count = sum(t.count for t in name_fulls)
# At this point drop unindexed partials from the address.
# This might yield wrong results, nothing we can do about that.
if not partials_indexed:
addr_tokens = [t.token for t in addr_partials if t.is_indexed]
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
# Any of the full names applies with all of the partials from the address
yield penalty, fulls_count / (2**len(addr_partials)),\
dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
'restrict' if fulls_count < 10000 else 'lookup_all')
if name_fulls:
fulls_count = sum(t.count for t in name_fulls)
# At this point drop unindexed partials from the address.
# This might yield wrong results, nothing we can do about that.
if not partials_indexed:
addr_tokens = [t.token for t in addr_partials if t.is_indexed]
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
# Any of the full names applies with all of the partials from the address
yield penalty, fulls_count / (2**len(addr_tokens)),\
dbf.lookup_by_any_name([t.token for t in name_fulls],
addr_tokens,
fulls_count > 30000 / max(1, len(addr_tokens)))
# To catch remaining results, lookup by name and address
# We only do this if there is a reasonable number of results expected.
exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
if exp_count < 10000 and all(t.is_indexed for t in name_partials):
lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')]
exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
if exp_count < 10000 and all(t.is_indexed for t in name_partials.values()):
lookup = [dbf.FieldLookup('name_vector', list(name_partials.keys()), lookups.LookupAll)]
if addr_tokens:
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
penalty += 0.35 * max(0, 5 - len(name_partials) - len(addr_tokens))
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll))
penalty += 0.35 * max(1 if name_fulls else 0.1,
5 - len(name_partials) - len(addr_tokens))
yield penalty, exp_count, lookup
def get_name_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
def get_name_ranking(self, trange: TokenRange,
db_field: str = 'name_vector') -> dbf.FieldRanking:
""" Create a ranking expression for a name term in the given range.
"""
name_fulls = self.query.get_tokens(trange, TokenType.WORD)
@@ -245,7 +259,7 @@ class SearchBuilder:
# Fallback, sum of penalty for partials
name_partials = self.query.get_partials_list(trange)
default = sum(t.penalty for t in name_partials) + 0.2
return dbf.FieldRanking('name_vector', default, ranks)
return dbf.FieldRanking(db_field, default, ranks)
def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
@@ -303,11 +317,9 @@ class SearchBuilder:
sdata = dbf.SearchData()
sdata.penalty = assignment.penalty
if assignment.country:
tokens = self.query.get_tokens(assignment.country, TokenType.COUNTRY)
if self.details.countries:
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
if not tokens:
return None
tokens = self.get_country_tokens(assignment.country)
if not tokens:
return None
sdata.set_strings('countries', tokens)
elif self.details.countries:
sdata.countries = dbf.WeightedStrings(self.details.countries,
@@ -321,34 +333,70 @@ class SearchBuilder:
self.query.get_tokens(assignment.postcode,
TokenType.POSTCODE))
if assignment.qualifier:
sdata.set_qualifiers(self.query.get_tokens(assignment.qualifier,
TokenType.QUALIFIER))
tokens = self.get_qualifier_tokens(assignment.qualifier)
if not tokens:
return None
sdata.set_qualifiers(tokens)
elif self.details.categories:
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))
if assignment.address:
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
if not assignment.name and assignment.housenumber:
# housenumber search: the first item needs to be handled like
# a name in ranking or penalties are not comparable with
# normal searches.
sdata.set_ranking([self.get_name_ranking(assignment.address[0],
db_field='nameaddress_vector')]
+ [self.get_addr_ranking(r) for r in assignment.address[1:]])
else:
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
else:
sdata.rankings = []
return sdata
def get_search_categories(self,
assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
""" Collect tokens for category search or use the categories
def get_country_tokens(self, trange: TokenRange) -> List[Token]:
""" Return the list of country tokens for the given range,
optionally filtered by the country list from the details
parameters.
"""
tokens = self.query.get_tokens(trange, TokenType.COUNTRY)
if self.details.countries:
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
return tokens
def get_qualifier_tokens(self, trange: TokenRange) -> List[Token]:
""" Return the list of qualifier tokens for the given range,
optionally filtered by the qualifier list from the details
parameters.
"""
tokens = self.query.get_tokens(trange, TokenType.QUALIFIER)
if self.details.categories:
tokens = [t for t in tokens if t.get_category() in self.details.categories]
return tokens
def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
""" Collect tokens for near items search or use the categories
requested per parameter.
Returns None if no category search is requested.
"""
if assignment.category:
tokens = [t for t in self.query.get_tokens(assignment.category,
TokenType.CATEGORY)
if not self.details.categories
or t.get_category() in self.details.categories]
return dbf.WeightedCategories([t.get_category() for t in tokens],
[t.penalty for t in tokens])
if self.details.categories:
return dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))
if assignment.near_item:
tokens: Dict[Tuple[str, str], float] = {}
for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
cat = t.get_category()
# The category of a near search will be that of near_item.
# Thus, if search is restricted to a category parameter,
# the two sets must intersect.
if (not self.details.categories or cat in self.details.categories)\
and t.penalty < tokens.get(cat, 1000.0):
tokens[cat] = t.penalty
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
return None

View File

@@ -7,14 +7,16 @@
"""
Data structures for more complex fields in abstract search descriptions.
"""
from typing import List, Tuple, Iterator, cast
from typing import List, Tuple, Iterator, Dict, Type
import dataclasses
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import ARRAY
from nominatim.typing import SaFromClause, SaColumn, SaExpression
from nominatim.api.search.query import Token
import nominatim.api.search.db_search_lookups as lookups
from nominatim.utils.json_writer import JsonWriter
@dataclasses.dataclass
class WeightedStrings:
@@ -92,7 +94,7 @@ class RankedTokens:
def with_token(self, t: Token, transition_penalty: float) -> 'RankedTokens':
""" Create a new RankedTokens list with the given token appended.
The tokens penalty as well as the given transision penalty
The tokens penalty as well as the given transition penalty
are added to the overall penalty.
"""
return RankedTokens(self.penalty + t.penalty + transition_penalty,
@@ -129,11 +131,17 @@ class FieldRanking:
"""
assert self.rankings
return sa.func.weigh_search(table.c[self.column],
[f"{{{','.join((str(s) for s in r.tokens))}}}"
for r in self.rankings],
[r.penalty for r in self.rankings],
self.default)
rout = JsonWriter().start_array()
for rank in self.rankings:
rout.start_array().value(rank.penalty).next()
rout.start_array()
for token in rank.tokens:
rout.value(token).next()
rout.end_array()
rout.end_array().next()
rout.end_array()
return sa.func.weigh_search(table.c[self.column], rout(), self.default)
@dataclasses.dataclass
@@ -146,19 +154,12 @@ class FieldLookup:
"""
column: str
tokens: List[int]
lookup_type: str
lookup_type: Type[lookups.LookupType]
def sql_condition(self, table: SaFromClause) -> SaColumn:
""" Create an SQL expression for the given match condition.
"""
col = table.c[self.column]
if self.lookup_type == 'lookup_all':
return col.contains(self.tokens)
if self.lookup_type == 'lookup_any':
return cast(SaColumn, col.overlap(self.tokens))
return sa.func.array_cat(col, sa.text('ARRAY[]::integer[]'),
type_=ARRAY(sa.Integer())).contains(self.tokens)
return self.lookup_type(table, self.column, self.tokens)
class SearchData:
@@ -195,10 +196,16 @@ class SearchData:
""" Set the qulaifier field from the given tokens.
"""
if tokens:
min_penalty = min(t.penalty for t in tokens)
categories: Dict[Tuple[str, str], float] = {}
min_penalty = 1000.0
for t in tokens:
min_penalty = min(min_penalty, t.penalty)
cat = t.get_category()
if t.penalty < categories.get(cat, 1000.0):
categories[cat] = t.penalty
self.penalty += min_penalty
self.qualifiers = WeightedCategories([t.get_category() for t in tokens],
[t.penalty - min_penalty for t in tokens])
self.qualifiers = WeightedCategories(list(categories.keys()),
list(categories.values()))
def set_ranking(self, rankings: List[FieldRanking]) -> None:
@@ -217,22 +224,23 @@ def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[Fiel
""" Create a lookup list where name tokens are looked up via index
and potential address tokens are used to restrict the search further.
"""
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_all')]
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
if addr_tokens:
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))
return lookup
def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
lookup_type: str) -> List[FieldLookup]:
use_index_for_addr: bool) -> List[FieldLookup]:
""" Create a lookup list where name tokens are looked up via index
and only one of the name tokens must be present.
Potential address tokens are used to restrict the search further.
"""
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')]
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
if addr_tokens:
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookup_type))
lookup.append(FieldLookup('nameaddress_vector', addr_tokens,
lookups.LookupAll if use_index_for_addr else lookups.Restrict))
return lookup
@@ -241,5 +249,5 @@ def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[Field
""" Create a lookup list where address tokens are looked up via index
and the name tokens are only used to restrict the search further.
"""
return [FieldLookup('name_vector', name_tokens, 'restrict'),
FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]

View File

@@ -0,0 +1,114 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of lookup functions for the search_name table.
"""
from typing import List, Any
import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
from nominatim.typing import SaFromClause
from nominatim.db.sqlalchemy_types import IntArray
# pylint: disable=consider-using-f-string
LookupType = sa.sql.expression.FunctionElement[Any]
class LookupAll(LookupType):
""" Find all entries in search_name table that contain all of
a given list of tokens using an index for the search.
"""
inherit_cache = True
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
super().__init__(table.c.place_id, getattr(table.c, column), column,
sa.type_coerce(tokens, IntArray))
@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
def _default_lookup_all(element: LookupAll,
compiler: 'sa.Compiled', **kw: Any) -> str:
_, col, _, tokens = list(element.clauses)
return "(%s @> %s)" % (compiler.process(col, **kw),
compiler.process(tokens, **kw))
@compiles(LookupAll, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_lookup_all(element: LookupAll,
compiler: 'sa.Compiled', **kw: Any) -> str:
place, col, colname, tokens = list(element.clauses)
return "(%s IN (SELECT CAST(value as bigint) FROM"\
" (SELECT array_intersect_fuzzy(places) as p FROM"\
" (SELECT places FROM reverse_search_name"\
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
" AND column = %s"\
" ORDER BY length(places)) as x) as u,"\
" json_each('[' || u.p || ']'))"\
" AND array_contains(%s, %s))"\
% (compiler.process(place, **kw),
compiler.process(tokens, **kw),
compiler.process(colname, **kw),
compiler.process(col, **kw),
compiler.process(tokens, **kw)
)
class LookupAny(LookupType):
""" Find all entries that contain at least one of the given tokens.
Use an index for the search.
"""
inherit_cache = True
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
super().__init__(table.c.place_id, getattr(table.c, column), column,
sa.type_coerce(tokens, IntArray))
@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
def _default_lookup_any(element: LookupAny,
compiler: 'sa.Compiled', **kw: Any) -> str:
_, col, _, tokens = list(element.clauses)
return "(%s && %s)" % (compiler.process(col, **kw),
compiler.process(tokens, **kw))
@compiles(LookupAny, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_lookup_any(element: LookupAny,
compiler: 'sa.Compiled', **kw: Any) -> str:
place, _, colname, tokens = list(element.clauses)
return "%s IN (SELECT CAST(value as bigint) FROM"\
" (SELECT array_union(places) as p FROM reverse_search_name"\
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
" AND column = %s) as u,"\
" json_each('[' || u.p || ']'))" % (compiler.process(place, **kw),
compiler.process(tokens, **kw),
compiler.process(colname, **kw))
class Restrict(LookupType):
""" Find all entries that contain all of the given tokens.
Do not use an index for the search.
"""
inherit_cache = True
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
super().__init__(getattr(table.c, column),
sa.type_coerce(tokens, IntArray))
@compiles(Restrict) # type: ignore[no-untyped-call, misc]
def _default_restrict(element: Restrict,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
compiler.process(arg2, **kw))
@compiles(Restrict, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_restrict(element: Restrict,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "array_contains(%s)" % compiler.process(element.clauses, **kw)

View File

@@ -5,13 +5,12 @@
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the acutal database accesses for forward search.
Implementation of the actual database accesses for forward search.
"""
from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
import abc
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import ARRAY, array_agg
from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
@@ -19,11 +18,18 @@ from nominatim.api.connection import SearchConnection
from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
import nominatim.api.results as nres
from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
from nominatim.db.sqlalchemy_types import Geometry
from nominatim.db.sqlalchemy_types import Geometry, IntArray
#pylint: disable=singleton-comparison,not-callable
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
def no_index(expr: SaColumn) -> SaColumn:
""" Wrap the given expression, so that the query planner will
refrain from using the expression for index lookup.
"""
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
""" Create a dictionary from search parameters that can be used
as bind parameter for SQL execute.
@@ -48,19 +54,37 @@ NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
def filter_by_area(sql: SaSelect, t: SaFromClause,
details: SearchDetails, avoid_index: bool = False) -> SaSelect:
""" Apply SQL statements for filtering by viewbox and near point,
if applicable.
"""
if details.near is not None and details.near_radius is not None:
if details.near_radius < 0.1 and not avoid_index:
sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
else:
sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
use_index=not avoid_index and
details.viewbox.area < 0.2))
return sql
def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
def _select_placex(t: SaFromClause) -> SaSelect:
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.importance, t.c.wikipedia,
t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
t.c.linked_place_id, t.c.admin_level,
t.c.centroid,
t.c.geometry.ST_Expand(0).label('bbox'))
@@ -72,20 +96,20 @@ def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDet
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
if details.geometry_output & GeometryFormat.GEOJSON:
out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if details.geometry_output & GeometryFormat.TEXT:
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if details.geometry_output & GeometryFormat.KML:
out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if details.geometry_output & GeometryFormat.SVG:
out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
numerals: List[int], details: SearchDetails) -> SaScalarSelect:
all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call]
all_ids = sa.func.ArrayAgg(table.c.place_id)
sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
if len(numerals) == 1:
@@ -106,14 +130,12 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
orexpr: List[SaExpression] = []
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
orexpr.append(table.c.rank_address.between(1, 30))
orexpr.append(no_index(table.c.rank_address).between(1, 30))
elif layers & DataLayer.ADDRESS:
orexpr.append(table.c.rank_address.between(1, 29))
orexpr.append(sa.and_(table.c.rank_address == 30,
sa.or_(table.c.housenumber != None,
table.c.address.has_key('addr:housename'))))
orexpr.append(no_index(table.c.rank_address).between(1, 29))
orexpr.append(sa.func.IsAddressPoint(table))
elif layers & DataLayer.POI:
orexpr.append(sa.and_(table.c.rank_address == 30,
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
table.c.class_.not_in(('place', 'building'))))
if layers & DataLayer.MANMADE:
@@ -123,7 +145,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
if not layers & DataLayer.NATURAL:
exclude.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
table.c.rank_address == 0))
no_index(table.c.rank_address) == 0))
else:
include = []
if layers & DataLayer.RAILWAY:
@@ -131,7 +153,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
if layers & DataLayer.NATURAL:
include.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
table.c.rank_address == 0))
no_index(table.c.rank_address) == 0))
if len(orexpr) == 1:
return orexpr[0]
@@ -150,7 +172,8 @@ async def _get_placex_housenumbers(conn: SearchConnection,
place_ids: List[int],
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
t = conn.t.placex
sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
sql = _select_placex(t).add_columns(t.c.importance)\
.where(t.c.place_id.in_(place_ids))
if details.geometry_output:
sql = _add_geometry_columns(sql, t.c.geometry, details)
@@ -162,12 +185,21 @@ async def _get_placex_housenumbers(conn: SearchConnection,
yield result
def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
""" Create a subselect that returns the given list of integers
as rows in the column 'nr'.
"""
vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
.table_valued(sa.column('value', type_=sa.JSON))
return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
async def _get_osmline(conn: SearchConnection, place_ids: List[int],
numerals: List[int],
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
t = conn.t.osmline
values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
.data([(n,) for n in numerals])
values = _int_list_to_subquery(numerals)
sql = sa.select(t.c.place_id, t.c.osm_id,
t.c.parent_place_id, t.c.address,
values.c.nr.label('housenumber'),
@@ -190,8 +222,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int],
numerals: List[int], osm_id: int,
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
t = conn.t.tiger
values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
.data([(n,) for n in numerals])
values = _int_list_to_subquery(numerals)
sql = sa.select(t.c.place_id, t.c.parent_place_id,
sa.literal('W').label('osm_type'),
sa.literal(osm_id).label('osm_id'),
@@ -214,6 +245,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int],
class AbstractSearch(abc.ABC):
""" Encapuslation of a single lookup in the database.
"""
SEARCH_PRIO: int = 2
def __init__(self, penalty: float) -> None:
self.penalty = penalty
@@ -247,9 +279,20 @@ class NearSearch(AbstractSearch):
base.sort(key=lambda r: (r.accuracy, r.rank_search))
max_accuracy = base[0].accuracy + 0.5
if base[0].rank_address == 0:
min_rank = 0
max_rank = 0
elif base[0].rank_address < 26:
min_rank = 1
max_rank = min(25, base[0].rank_address + 4)
else:
min_rank = 26
max_rank = 30
base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
and r.accuracy <= max_accuracy
and r.bbox and r.bbox.area < 20)
and r.bbox and r.bbox.area < 20
and r.rank_address >= min_rank
and r.rank_address <= max_rank)
if base:
baseids = [b.place_id for b in base[:5] if b.place_id]
@@ -271,30 +314,39 @@ class NearSearch(AbstractSearch):
"""
table = await conn.get_class_table(*category)
t = conn.t.placex
tgeom = conn.t.placex.alias('pgeom')
sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
.where(t.c.class_ == category[0])\
.where(t.c.type == category[1])
if table is None:
# No classtype table available, do a simplified lookup in placex.
sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\
.order_by(tgeom.c.centroid.ST_Distance(t.c.centroid))
table = conn.t.placex
sql = sa.select(table.c.place_id,
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
.label('dist'))\
.join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
.where(table.c.class_ == category[0])\
.where(table.c.type == category[1])
else:
# Use classtype table. We can afford to use a larger
# radius for the lookup.
sql = sql.join(table, t.c.place_id == table.c.place_id)\
.join(tgeom,
table.c.centroid.ST_CoveredBy(
sa.case((sa.and_(tgeom.c.rank_address < 9,
sql = sa.select(table.c.place_id,
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
.label('dist'))\
.join(tgeom,
table.c.centroid.ST_CoveredBy(
sa.case((sa.and_(tgeom.c.rank_address > 9,
tgeom.c.geometry.is_area()),
tgeom.c.geometry),
else_ = tgeom.c.centroid.ST_Expand(0.05))))\
.order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
tgeom.c.geometry),
else_ = tgeom.c.centroid.ST_Expand(0.05))))
sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
inner = sql.where(tgeom.c.place_id.in_(ids))\
.group_by(table.c.place_id).subquery()
t = conn.t.placex
sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
.join(inner, inner.c.place_id == t.c.place_id)\
.order_by(inner.c.dist)
sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
if details.countries:
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
if details.excluded:
@@ -334,8 +386,10 @@ class PoiSearch(AbstractSearch):
# simply search in placex table
def _base_query() -> SaSelect:
return _select_placex(t) \
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance'))\
.where(t.c.linked_place_id == None) \
.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
.limit(LIMIT_PARAM)
@@ -362,6 +416,7 @@ class PoiSearch(AbstractSearch):
table = await conn.get_class_table(*category)
if table is not None:
sql = _select_placex(t)\
.add_columns(t.c.importance)\
.join(table, t.c.place_id == table.c.place_id)\
.where(t.c.class_ == category[0])\
.where(t.c.type == category[1])
@@ -371,8 +426,8 @@ class PoiSearch(AbstractSearch):
if details.near and details.near_radius is not None:
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
.where(table.c.centroid.ST_DWithin(NEAR_PARAM,
NEAR_RADIUS_PARAM))
.where(table.c.centroid.within_distance(NEAR_PARAM,
NEAR_RADIUS_PARAM))
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
@@ -394,6 +449,8 @@ class PoiSearch(AbstractSearch):
class CountrySearch(AbstractSearch):
""" Search for a country name or country code.
"""
SEARCH_PRIO = 0
def __init__(self, sdata: SearchData) -> None:
super().__init__(sdata.penalty)
self.countries = sdata.countries
@@ -407,6 +464,7 @@ class CountrySearch(AbstractSearch):
ccodes = self.countries.values
sql = _select_placex(t)\
.add_columns(t.c.importance)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)
@@ -416,11 +474,7 @@ class CountrySearch(AbstractSearch):
if details.excluded:
sql = sql.where(_exclude_places(t))
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
if details.near is not None and details.near_radius is not None:
sql = sql.where(_within_near(t))
sql = filter_by_area(sql, t, details)
results = nres.SearchResults()
for row in await conn.execute(sql, _details_to_bind_params(details)):
@@ -430,7 +484,14 @@ class CountrySearch(AbstractSearch):
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
return results or await self.lookup_in_country_table(conn, details)
if not results:
results = await self.lookup_in_country_table(conn, details)
if results:
details.min_rank = min(5, details.max_rank)
details.max_rank = min(25, details.max_rank)
return results
async def lookup_in_country_table(self, conn: SearchConnection,
@@ -448,29 +509,28 @@ class CountrySearch(AbstractSearch):
sql = sa.select(tgrid.c.country_code,
tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
.label('centroid'))\
.label('centroid'),
tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
.where(tgrid.c.country_code.in_(self.countries.values))\
.group_by(tgrid.c.country_code)
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
if details.near is not None and details.near_radius is not None:
sql = sql.where(_within_near(tgrid))
sql = filter_by_area(sql, tgrid, details, avoid_index=True)
sub = sql.subquery('grid')
sql = sa.select(t.c.country_code,
(t.c.name
+ sa.func.coalesce(t.c.derived_name,
sa.cast('', type_=conn.t.types.Composite))
).label('name'),
sub.c.centroid)\
t.c.name.merge(t.c.derived_name).label('name'),
sub.c.centroid, sub.c.bbox)\
.join(sub, t.c.country_code == sub.c.country_code)
if details.geometry_output:
sql = _add_geometry_columns(sql, sub.c.centroid, details)
results = nres.SearchResults()
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_country_row(row, nres.SearchResult)
assert result
result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
results.append(result)
@@ -507,19 +567,16 @@ class PostcodeSearch(AbstractSearch):
penalty: SaExpression = sa.literal(self.penalty)
if details.viewbox is not None:
if details.bounded_viewbox:
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
else:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
else_=2.0)
if details.viewbox is not None and not details.bounded_viewbox:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
else_=1.0)
if details.near is not None:
if details.near_radius is not None:
sql = sql.where(_within_near(t))
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
sql = filter_by_area(sql, t, details)
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
@@ -528,13 +585,11 @@ class PostcodeSearch(AbstractSearch):
if self.lookups:
assert len(self.lookups) == 1
assert self.lookups[0].lookup_type == 'restrict'
tsearch = conn.t.search_name
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
.where(sa.func.array_cat(tsearch.c.name_vector,
tsearch.c.nameaddress_vector,
type_=ARRAY(sa.Integer))
.contains(self.lookups[0].tokens))
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
.contains(sa.type_coerce(self.lookups[0].tokens,
IntArray)))
for ranking in self.rankings:
penalty += ranking.sql_penalty(conn.t.search_name)
@@ -559,6 +614,8 @@ class PostcodeSearch(AbstractSearch):
class PlaceSearch(AbstractSearch):
""" Generic search for an address or named place.
"""
SEARCH_PRIO = 1
def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
super().__init__(sdata.penalty + extra_penalty)
self.countries = sdata.countries
@@ -578,15 +635,7 @@ class PlaceSearch(AbstractSearch):
tsearch = conn.t.search_name
sql: SaLambdaSelect = sa.lambda_stmt(lambda:
sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
t.c.centroid,
t.c.geometry.ST_Expand(0).label('bbox'))
.where(t.c.place_id == tsearch.c.place_id))
_select_placex(t).where(t.c.place_id == tsearch.c.place_id))
if details.geometry_output:
@@ -607,11 +656,11 @@ class PlaceSearch(AbstractSearch):
sql = sql.where(tsearch.c.address_rank > 9)
tpc = conn.t.postcode
pcs = self.postcodes.values
if self.expected_count > 1000:
if self.expected_count > 5000:
# Many results expected. Restrict by postcode.
sql = sql.where(sa.select(tpc.c.postcode)
.where(tpc.c.postcode.in_(pcs))
.where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
.where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
.exists())
# Less results, only have a preference for close postcodes
@@ -619,31 +668,30 @@ class PlaceSearch(AbstractSearch):
.where(tpc.c.postcode.in_(pcs))\
.scalar_subquery()
penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
else_=sa.func.coalesce(pc_near, 2.0))
else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
if details.viewbox is not None:
if details.bounded_viewbox:
if details.viewbox.area < 0.2:
sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
else:
sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
elif self.expected_count >= 10000:
if details.viewbox.area < 0.5:
sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
else:
sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
sql = sql.where(tsearch.c.centroid
.intersects(VIEWBOX_PARAM,
use_index=details.viewbox.area < 0.2))
elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
sql = sql.where(tsearch.c.centroid
.intersects(VIEWBOX2_PARAM,
use_index=details.viewbox.area < 0.5))
else:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
else_=2.0)
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
else_=1.0)
if details.near is not None:
if details.near_radius is not None:
if details.near_radius < 0.1:
sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
NEAR_RADIUS_PARAM))
else:
sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
NEAR_RADIUS_PARAM))
sql = sql.where(tsearch.c.centroid
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance'))
sql = sql.order_by(sa.desc(sa.text('importance')))
@@ -652,7 +700,7 @@ class PlaceSearch(AbstractSearch):
or (details.viewbox is not None and details.viewbox.area < 0.5):
sql = sql.order_by(
penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
else_=0.40001-(sa.cast(tsearch.c.search_rank, sa.Float())/75)))
sql = sql.add_columns(t.c.importance)
@@ -662,10 +710,10 @@ class PlaceSearch(AbstractSearch):
sql = sql.order_by(sa.text('accuracy'))
if self.housenumbers:
hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
hnr_list = '|'.join(self.housenumbers.values)
sql = sql.where(tsearch.c.address_rank.between(16, 30))\
.where(sa.or_(tsearch.c.address_rank < 30,
t.c.housenumber.op('~*')(hnr_regexp)))
sa.func.RegexpWord(hnr_list, t.c.housenumber)))
# Cross check for housenumbers, need to do that on a rather large
# set. Worst case there are 40.000 main streets in OSM.
@@ -673,10 +721,10 @@ class PlaceSearch(AbstractSearch):
# Housenumbers from placex
thnr = conn.t.placex.alias('hnr')
pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
pid_list = sa.func.ArrayAgg(thnr.c.place_id)
place_sql = sa.select(pid_list)\
.where(thnr.c.parent_place_id == inner.c.place_id)\
.where(thnr.c.housenumber.op('~*')(hnr_regexp))\
.where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
.where(thnr.c.linked_place_id == None)\
.where(thnr.c.indexed_status == 0)
@@ -736,9 +784,6 @@ class PlaceSearch(AbstractSearch):
assert result
result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = row.accuracy
if not details.excluded or not result.place_id in details.excluded:
results.append(result)
if self.housenumbers and row.rank_address < 30:
if row.placex_hnr:
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
@@ -758,6 +803,14 @@ class PlaceSearch(AbstractSearch):
sub.accuracy += 0.6
results.append(sub)
result.accuracy += 1.0 # penalty for missing housenumber
# Only add the street as a result, if it meets all other
# filter conditions.
if (not details.excluded or result.place_id not in details.excluded)\
and (not self.qualifiers or result.category in self.qualifiers.values)\
and result.rank_address >= details.min_rank:
result.accuracy += 1.0 # penalty for missing housenumber
results.append(result)
else:
results.append(result)
return results

View File

@@ -7,13 +7,15 @@
"""
Public interface to the search code.
"""
from typing import List, Any, Optional, Iterator, Tuple
from typing import List, Any, Optional, Iterator, Tuple, Dict
import itertools
import re
import datetime as dt
import difflib
from nominatim.api.connection import SearchConnection
from nominatim.api.types import SearchDetails
from nominatim.api.results import SearchResults, add_result_details
from nominatim.api.results import SearchResult, SearchResults, add_result_details
from nominatim.api.search.token_assignment import yield_token_assignments
from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
from nominatim.api.search.db_searches import AbstractSearch
@@ -62,7 +64,7 @@ class ForwardGeocoder:
log().table_dump('Searches for assignment',
_dump_searches(searches, query, num_searches))
num_searches = len(searches)
searches.sort(key=lambda s: s.penalty)
searches.sort(key=lambda s: (s.penalty, s.SEARCH_PRIO))
return query, searches
@@ -73,42 +75,99 @@ class ForwardGeocoder:
is found.
"""
log().section('Execute database searches')
results = SearchResults()
results: Dict[Any, SearchResult] = {}
end_time = dt.datetime.now() + self.timeout
num_results = 0
min_ranking = 1000.0
min_ranking = searches[0].penalty + 2.0
prev_penalty = 0.0
for i, search in enumerate(searches):
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
break
log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
for result in await search.lookup(self.conn, self.params):
results.append(result)
min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
num_results = len(results)
log().var_dump('Params', self.params)
lookup_results = await search.lookup(self.conn, self.params)
for result in lookup_results:
rhash = (result.source_table, result.place_id,
result.housenumber, result.country_code)
prevresult = results.get(rhash)
if prevresult:
prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
else:
results[rhash] = result
min_ranking = min(min_ranking, result.accuracy * 1.2)
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
prev_penalty = search.penalty
if dt.datetime.now() >= end_time:
break
if results:
min_ranking = min(r.ranking for r in results)
results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
return SearchResults(results.values())
if results:
min_rank = min(r.rank_search for r in results)
def pre_filter_results(self, results: SearchResults) -> SearchResults:
""" Remove results that are significantly worse than the
best match.
"""
if results:
max_ranking = min(r.ranking for r in results) + 0.5
results = SearchResults(r for r in results if r.ranking < max_ranking)
return results
def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
""" Remove badly matching results, sort by ranking and
limit to the configured number of results.
"""
if results:
results.sort(key=lambda r: r.ranking)
min_rank = results[0].rank_search
min_ranking = results[0].ranking
results = SearchResults(r for r in results
if r.ranking + 0.05 * (r.rank_search - min_rank)
if r.ranking + 0.03 * (r.rank_search - min_rank)
< min_ranking + 0.5)
results.sort(key=lambda r: r.accuracy - r.calculated_importance())
results = SearchResults(results[:self.limit])
return results
def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
""" Adjust the accuracy of the localized result according to how well
they match the original query.
"""
assert self.query_analyzer is not None
qwords = [word for phrase in query.source
for word in re.split('[, ]+', phrase.text) if word]
if not qwords:
return
for result in results:
# Negative importance indicates ordering by distance, which is
# more important than word matching.
if not result.display_name\
or (result.importance is not None and result.importance < 0):
continue
distance = 0.0
norm = self.query_analyzer.normalize_text(' '.join((result.display_name,
result.country_code or '')))
words = set((w for w in norm.split(' ') if w))
if not words:
continue
for qword in qwords:
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
if wdist < 0.5:
distance += len(qword)
else:
distance += (1.0 - wdist) * len(qword)
# Compensate for the fact that country names do not get a
# match penalty yet by the tokenizer.
# Temporary hack that needs to be removed!
if result.rank_address == 4:
distance *= 2
result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
async def lookup_pois(self, categories: List[Tuple[str, str]],
phrases: List[Phrase]) -> SearchResults:
""" Look up places by category. If phrase is given, a place search
@@ -123,13 +182,17 @@ class ForwardGeocoder:
if query:
searches = [wrap_near_search(categories, s) for s in searches[:50]]
results = await self.execute_searches(query, searches)
results = self.pre_filter_results(results)
await add_result_details(self.conn, results, self.params)
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
results = self.sort_and_cut_results(results)
else:
results = SearchResults()
else:
search = build_poi_search(categories, self.params.countries)
results = await search.lookup(self.conn, self.params)
await add_result_details(self.conn, results, self.params)
await add_result_details(self.conn, results, self.params)
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
return results
@@ -149,7 +212,12 @@ class ForwardGeocoder:
if searches:
# Execute SQL until an appropriate result is found.
results = await self.execute_searches(query, searches[:50])
results = self.pre_filter_results(results)
await add_result_details(self.conn, results, self.params)
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
self.rerank_by_query(query, results)
log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
results = self.sort_and_cut_results(results)
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
return results

View File

@@ -8,7 +8,6 @@
Implementation of query analysis for the ICU tokenizer.
"""
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
from copy import copy
from collections import defaultdict
import dataclasses
import difflib
@@ -22,6 +21,7 @@ from nominatim.api.connection import SearchConnection
from nominatim.api.logging import log
from nominatim.api.search import query as qmod
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
from nominatim.db.sqlalchemy_types import Json
DB_TO_TOKEN_TYPE = {
@@ -101,10 +101,16 @@ class ICUToken(qmod.Token):
penalty = 0.0
if row.type == 'w':
penalty = 0.3
elif row.type == 'W':
if len(row.word_token) == 1 and row.word_token == row.word:
penalty = 0.2 if row.word.isdigit() else 0.3
elif row.type == 'H':
penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
if all(not c.isdigit() for c in row.word_token):
penalty += 0.2 * (len(row.word_token) - 1)
elif row.type == 'C':
if len(row.word_token) == 1:
penalty = 0.3
if row.info is None:
lookup_word = row.word
@@ -153,7 +159,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
sa.Column('word_token', sa.Text, nullable=False),
sa.Column('type', sa.Text, nullable=False),
sa.Column('word', sa.Text),
sa.Column('info', self.conn.t.types.Json))
sa.Column('info', Json))
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
@@ -178,13 +184,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
if row.type == 'S':
if row.info['op'] in ('in', 'near'):
if trange.start == 0:
query.add_token(trange, qmod.TokenType.CATEGORY, token)
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.CATEGORY, token)
if trange.start == 0 and trange.end == query.num_token_slots():
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)

View File

@@ -44,7 +44,7 @@ class LegacyToken(qmod.Token):
@property
def info(self) -> Dict[str, Any]:
""" Dictionary of additional propoerties of the token.
""" Dictionary of additional properties of the token.
Should only be used for debugging purposes.
"""
return {'category': self.category,
@@ -107,15 +107,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
for row in await self.lookup_in_db(lookup_words):
for trange in words[row.word_token.strip()]:
token, ttype = self.make_token(row)
if ttype == qmod.TokenType.CATEGORY:
if ttype == qmod.TokenType.NEAR_ITEM:
if trange.start == 0:
query.add_token(trange, qmod.TokenType.CATEGORY, token)
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype == qmod.TokenType.QUALIFIER:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.CATEGORY, token)
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
query.add_token(trange, ttype, token)
@@ -127,6 +127,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
return query
def normalize_text(self, text: str) -> str:
""" Bring the given text into a normalized form.
This only removes case, so some difference with the normalization
in the phrase remains.
"""
return text.lower()
def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
Dict[str, List[qmod.TokenRange]]]:
""" Transliterate the phrases and split them into tokens.
@@ -186,7 +195,7 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
ttype = qmod.TokenType.POSTCODE
lookup_word = row.word_token[1:]
else:
ttype = qmod.TokenType.CATEGORY if row.operator in ('in', 'near')\
ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
else qmod.TokenType.QUALIFIER
lookup_word = row.word
elif row.word_token.startswith(' '):

View File

@@ -46,7 +46,7 @@ class TokenType(enum.Enum):
""" Country name or reference. """
QUALIFIER = enum.auto()
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
CATEGORY = enum.auto()
NEAR_ITEM = enum.auto()
""" Special term used as searchable object(e.g. supermarket in ...). """
@@ -70,14 +70,16 @@ class PhraseType(enum.Enum):
COUNTRY = enum.auto()
""" Contains the country name or code. """
def compatible_with(self, ttype: TokenType) -> bool:
def compatible_with(self, ttype: TokenType,
is_full_phrase: bool) -> bool:
""" Check if the given token type can be used with the phrase type.
"""
if self == PhraseType.NONE:
return True
return not is_full_phrase or ttype != TokenType.QUALIFIER
if self == PhraseType.AMENITY:
return ttype in (TokenType.WORD, TokenType.PARTIAL,
TokenType.QUALIFIER, TokenType.CATEGORY)
return ttype in (TokenType.WORD, TokenType.PARTIAL)\
or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
or (not is_full_phrase and ttype == TokenType.QUALIFIER)
if self == PhraseType.STREET:
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
if self == PhraseType.POSTCODE:
@@ -167,7 +169,7 @@ class TokenList:
@dataclasses.dataclass
class QueryNode:
""" A node of the querry representing a break between terms.
""" A node of the query representing a break between terms.
"""
btype: BreakType
ptype: PhraseType
@@ -244,7 +246,9 @@ class QueryStruct:
be added to, then the token is silently dropped.
"""
snode = self.nodes[trange.start]
if snode.ptype.compatible_with(ttype):
full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
if snode.ptype.compatible_with(ttype, full_phrase):
tlist = snode.get_tokens(trange.end, ttype)
if tlist is None:
snode.starting.append(TokenList(trange.end, ttype, [token]))

View File

@@ -19,7 +19,7 @@ if TYPE_CHECKING:
from nominatim.api.search.query import Phrase, QueryStruct
class AbstractQueryAnalyzer(ABC):
""" Class for analysing incomming queries.
""" Class for analysing incoming queries.
Query analyzers are tied to the tokenizer used on import.
"""
@@ -30,6 +30,15 @@ class AbstractQueryAnalyzer(ABC):
"""
@abstractmethod
def normalize_text(self, text: str) -> str:
""" Bring the given text into a normalized form. That is the
standardized form search will work with. All information removed
at this stage is inevitably lost.
"""
async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
""" Create a query analyzer for the tokenizer used by the database.
"""

View File

@@ -46,7 +46,7 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
housenumber: Optional[qmod.TokenRange] = None
postcode: Optional[qmod.TokenRange] = None
country: Optional[qmod.TokenRange] = None
category: Optional[qmod.TokenRange] = None
near_item: Optional[qmod.TokenRange] = None
qualifier: Optional[qmod.TokenRange] = None
@@ -64,15 +64,15 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
out.postcode = token.trange
elif token.ttype == qmod.TokenType.COUNTRY:
out.country = token.trange
elif token.ttype == qmod.TokenType.CATEGORY:
out.category = token.trange
elif token.ttype == qmod.TokenType.NEAR_ITEM:
out.near_item = token.trange
elif token.ttype == qmod.TokenType.QUALIFIER:
out.qualifier = token.trange
return out
class _TokenSequence:
""" Working state used to put together the token assignements.
""" Working state used to put together the token assignments.
Represents an intermediate state while traversing the tokenized
query.
@@ -109,7 +109,7 @@ class _TokenSequence:
"""
# Country and category must be the final term for left-to-right
return len(self.seq) > 1 and \
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY)
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
@@ -132,6 +132,11 @@ class _TokenSequence:
# Name tokens are always acceptable and don't change direction
if ttype == qmod.TokenType.PARTIAL:
# qualifiers cannot appear in the middle of the query. They need
# to be near the next phrase.
if self.direction == -1 \
and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]):
return None
return self.direction
# Other tokens may only appear once
@@ -165,22 +170,22 @@ class _TokenSequence:
if ttype == qmod.TokenType.COUNTRY:
return None if self.direction == -1 else 1
if ttype == qmod.TokenType.CATEGORY:
if ttype == qmod.TokenType.NEAR_ITEM:
return self.direction
if ttype == qmod.TokenType.QUALIFIER:
if self.direction == 1:
if (len(self.seq) == 1
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
or (len(self.seq) == 2
and self.seq[0].ttype == qmod.TokenType.CATEGORY
and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
return 1
return None
if self.direction == -1:
return -1
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
if len(tempseq) == 0:
return 1
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
@@ -233,10 +238,10 @@ class _TokenSequence:
def recheck_sequence(self) -> bool:
""" Check that the sequence is a fully valid token assignment
and addapt direction and penalties further if necessary.
and adapt direction and penalties further if necessary.
This function catches some impossible assignments that need
forward context and can therefore not be exluded when building
forward context and can therefore not be excluded when building
the assignment.
"""
# housenumbers may not be further than 2 words from the beginning.
@@ -253,7 +258,7 @@ class _TokenSequence:
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1):
return False
if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq):
if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
self.penalty += 1.0
return True
@@ -272,10 +277,10 @@ class _TokenSequence:
# <address>,<postcode> should give preference to address search
if base.postcode.start == 0:
penalty = self.penalty
self.direction = -1 # name searches are only possbile backwards
self.direction = -1 # name searches are only possible backwards
else:
penalty = self.penalty + 0.1
self.direction = 1 # name searches are only possbile forwards
self.direction = 1 # name searches are only possible forwards
yield dataclasses.replace(base, penalty=penalty)
@@ -368,7 +373,7 @@ class _TokenSequence:
# Postcode or country-only search
if not base.address:
if not base.housenumber and (base.postcode or base.country or base.category):
if not base.housenumber and (base.postcode or base.country or base.near_item):
log().comment('postcode/country search')
yield dataclasses.replace(base, penalty=self.penalty)
else:
@@ -385,7 +390,7 @@ class _TokenSequence:
yield from self._get_assignments_address_backward(base, query)
# variant for special housenumber searches
if base.housenumber:
if base.housenumber and not base.qualifier:
yield dataclasses.replace(base, penalty=self.penalty)

View File

@@ -5,7 +5,7 @@
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Classes and function releated to status call.
Classes and function related to status call.
"""
from typing import Optional
import datetime as dt
@@ -36,6 +36,12 @@ async def get_status(conn: SearchConnection) -> StatusResult:
sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
status.data_updated = await conn.scalar(sql)
if status.data_updated is not None:
if status.data_updated.tzinfo is None:
status.data_updated = status.data_updated.replace(tzinfo=dt.timezone.utc)
else:
status.data_updated = status.data_updated.astimezone(dt.timezone.utc)
# Database version
try:
verstr = await conn.get_property('database_version')

View File

@@ -17,6 +17,7 @@ from struct import unpack
from binascii import unhexlify
from nominatim.errors import UsageError
from nominatim.api.localization import Locales
# pylint: disable=no-member,too-many-boolean-expressions,too-many-instance-attributes
@@ -315,7 +316,7 @@ class DataLayer(enum.Flag):
for reverse and forward search.
"""
ADDRESS = enum.auto()
""" The address layer contains all places relavant for addresses:
""" The address layer contains all places relevant for addresses:
fully qualified addresses with a house number (or a house name equivalent,
for some addresses) and places that can be part of an address like
roads, cities, states.
@@ -386,7 +387,7 @@ TParam = TypeVar('TParam', bound='LookupDetails') # pylint: disable=invalid-name
@dataclasses.dataclass
class LookupDetails:
""" Collection of parameters that define the amount of details
""" Collection of parameters that define which kind of details are
returned with a lookup or details result.
"""
geometry_output: GeometryFormat = GeometryFormat.NONE
@@ -413,6 +414,9 @@ class LookupDetails:
0.0 means the original geometry is kept. The higher the value, the
more the geometry gets simplified.
"""
locales: Locales = Locales()
""" Preferred languages for localization of results.
"""
@classmethod
def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam:
@@ -534,11 +538,13 @@ class SearchDetails(LookupDetails):
or (self.bounded_viewbox
and self.viewbox is not None and self.near is not None
and self.viewbox.contains(self.near))
or self.layers is not None and not self.layers)
or (self.layers is not None and not self.layers)
or (self.max_rank <= 4 and
self.layers is not None and not self.layers & DataLayer.ADDRESS))
def layer_enabled(self, layer: DataLayer) -> bool:
""" Check if the given layer has been choosen. Also returns
""" Check if the given layer has been chosen. Also returns
true when layer restriction has been disabled completely.
"""
return self.layers is None or bool(self.layers & layer)

View File

@@ -5,7 +5,7 @@
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Hard-coded information about tag catagories.
Hard-coded information about tag categories.
These tables have been copied verbatim from the old PHP code. For future
version a more flexible formatting is required.
@@ -44,7 +44,7 @@ def get_label_tag(category: Tuple[str, str], extratags: Optional[Mapping[str, st
def bbox_from_result(result: Union[napi.ReverseResult, napi.SearchResult]) -> napi.Bbox:
""" Compute a bounding box for the result. For ways and relations
a given boundingbox is used. For all other object, a box is computed
around the centroid according to dimensions dereived from the
around the centroid according to dimensions derived from the
search rank.
"""
if (result.osm_object and result.osm_object[0] == 'N') or result.bbox is None:

View File

@@ -247,7 +247,8 @@ def format_base_geocodejson(results: Union[napi.ReverseResults, napi.SearchResul
out.key('admin').start_object()
if result.address_rows:
for line in result.address_rows:
if line.isaddress and (line.admin_level or 15) < 15 and line.local_name:
if line.isaddress and (line.admin_level or 15) < 15 and line.local_name \
and line.category[0] == 'boundary' and line.category[1] == 'administrative':
out.keyval(f"level{line.admin_level}", line.local_name)
out.end_object().next()

View File

@@ -37,7 +37,7 @@ def zoom_to_rank(zoom: int) -> int:
return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
FEATURE_TYPE_TO_RANK: Dict[Optional[str], Any] = {
FEATURE_TYPE_TO_RANK: Dict[Optional[str], Tuple[int, int]] = {
'country': (4, 4),
'state': (8, 8),
'city': (14, 16),
@@ -155,7 +155,7 @@ COORD_REGEX = [re.compile(r'(?:(?P<pre>.*?)\s+)??' + r + r'(?:\s+(?P<post>.*))?'
)]
def extract_coords_from_query(query: str) -> Tuple[str, Optional[float], Optional[float]]:
""" Look for something that is formated like a coordinate at the
""" Look for something that is formatted like a coordinate at the
beginning or end of the query. If found, extract the coordinate and
return the remaining query (or the empty string if the query
consisted of nothing but a coordinate).

View File

@@ -240,7 +240,7 @@ class ASGIAdaptor(abc.ABC):
def parse_geometry_details(self, fmt: str) -> Dict[str, Any]:
""" Create details strucutre from the supplied geometry parameters.
""" Create details structure from the supplied geometry parameters.
"""
numgeoms = 0
output = napi.GeometryFormat.NONE
@@ -308,7 +308,8 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
keywords=params.get_bool('keywords', False),
geometry_output = napi.GeometryFormat.GEOJSON
if params.get_bool('polygon_geojson', False)
else napi.GeometryFormat.NONE
else napi.GeometryFormat.NONE,
locales=locales
)
if debug:
@@ -317,8 +318,6 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
if result is None:
params.raise_error('No place with that OSM ID found.', status=404)
result.localize(locales)
output = formatting.format_result(result, fmt,
{'locales': locales,
'group_hierarchy': params.get_bool('group_hierarchy', False),
@@ -337,6 +336,7 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
details = params.parse_geometry_details(fmt)
details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18))
details['layers'] = params.get_layers()
details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
result = await api.reverse(coord, **details)
@@ -357,9 +357,6 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
'namedetails': params.get_bool('namedetails', False),
'addressdetails': params.get_bool('addressdetails', True)}
if result:
result.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
output = formatting.format_result(napi.ReverseResults([result] if result else []),
fmt, fmt_options)
@@ -372,6 +369,7 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
fmt = params.parse_format(napi.SearchResults, 'xml')
debug = params.setup_debugging()
details = params.parse_geometry_details(fmt)
details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
places = []
for oid in (params.get('osm_ids') or '').split(','):
@@ -394,8 +392,6 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
'namedetails': params.get_bool('namedetails', False),
'addressdetails': params.get_bool('addressdetails', True)}
results.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
output = formatting.format_result(results, fmt, fmt_options)
return params.build_response(output, num_results=len(results))
@@ -456,6 +452,8 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
else:
details['layers'] = params.get_layers()
details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
# unstructured query parameters
query = params.get('q', None)
# structured query parameters
@@ -480,8 +478,6 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
except UsageError as err:
params.raise_error(str(err))
results.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
if details['dedupe'] and len(results) > 1:
results = helpers.deduplicate_results(results, max_results)
@@ -535,7 +531,7 @@ async def deletable_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -
async def polygons_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
""" Server glue for /polygons endpoint.
This is a special endpoint that shows polygons that have changed
thier size but are kept in the Nominatim database with their
their size but are kept in the Nominatim database with their
old area to minimize disruption.
"""
fmt = params.parse_format(RawDataList, 'json')

View File

@@ -159,13 +159,15 @@ class AdminServe:
group = parser.add_argument_group('Server arguments')
group.add_argument('--server', default='127.0.0.1:8088',
help='The address the server will listen to.')
group.add_argument('--engine', default='php',
group.add_argument('--engine', default='falcon',
choices=('php', 'falcon', 'starlette'),
help='Webserver framework to run. (default: php)')
help='Webserver framework to run. (default: falcon)')
def run(self, args: NominatimArgs) -> int:
if args.engine == 'php':
if args.config.lib_dir.php is None:
raise UsageError("PHP frontend not configured.")
run_php_server(args.server, args.project_dir / 'website')
else:
import uvicorn # pylint: disable=import-outside-toplevel
@@ -206,6 +208,7 @@ def get_set_parser() -> CommandlineParser:
parser.add_subcommand('admin', clicmd.AdminFuncs())
parser.add_subcommand('export', clicmd.QueryExport())
parser.add_subcommand('convert', clicmd.ConvertDB())
parser.add_subcommand('serve', AdminServe())
parser.add_subcommand('search', clicmd.APISearch())

View File

@@ -25,3 +25,4 @@ from nominatim.clicmd.admin import AdminFuncs as AdminFuncs
from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze
from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
from nominatim.clicmd.export import QueryExport as QueryExport
from nominatim.clicmd.convert import ConvertDB as ConvertDB

View File

@@ -41,6 +41,8 @@ class AdminFuncs:
help='Print performance analysis of the indexing process')
objs.add_argument('--collect-os-info', action="store_true",
help="Generate a report about the host system information")
objs.add_argument('--clean-deleted', action='store', metavar='AGE',
help='Clean up deleted relations')
group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target',
const='search',
@@ -55,7 +57,9 @@ class AdminFuncs:
mgroup.add_argument('--place-id', type=int,
help='Analyse indexing of the given Nominatim object')
def run(self, args: NominatimArgs) -> int:
# pylint: disable=too-many-return-statements
if args.warm:
return self._warm(args)
@@ -81,6 +85,12 @@ class AdminFuncs:
collect_os_info.report_system_information(args.config)
return 0
if args.clean_deleted:
LOG.warning('Cleaning up deleted relations')
from ..tools import admin
admin.clean_deleted_relations(args.config, age=args.clean_deleted)
return 0
return 1
@@ -90,17 +100,20 @@ class AdminFuncs:
api = napi.NominatimAPI(args.project_dir)
try:
if args.target != 'reverse':
if args.target != 'search':
for _ in range(1000):
api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
address_details=True)
if args.target != 'search':
if args.target != 'reverse':
from ..tokenizer import factory as tokenizer_factory
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
with connect(args.config.get_libpq_dsn()) as conn:
words = tokenizer.most_frequent_words(conn, 1000)
if conn.table_exists('search_name'):
words = tokenizer.most_frequent_words(conn, 1000)
else:
words = []
for word in words:
api.search(word)

View File

@@ -109,7 +109,8 @@ class APISearch:
'countries': args.countrycodes,
'excluded': args.exclude_place_ids,
'viewbox': args.viewbox,
'bounded_viewbox': args.bounded
'bounded_viewbox': args.bounded,
'locales': args.get_locales(api.config.DEFAULT_LANGUAGE)
}
if args.query:
@@ -124,9 +125,6 @@ class APISearch:
country=args.country,
**params)
for result in results:
result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
if args.dedupe and len(results) > 1:
results = deduplicate_results(results, args.limit)
@@ -187,14 +185,14 @@ class APIReverse:
layers=args.get_layers(napi.DataLayer.ADDRESS | napi.DataLayer.POI),
address_details=True, # needed for display name
geometry_output=args.get_geometry_output(),
geometry_simplification=args.polygon_threshold)
geometry_simplification=args.polygon_threshold,
locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
if args.format == 'debug':
print(loglib.get_and_disable())
return 0
if result:
result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
output = api_output.format_result(
napi.ReverseResults([result]),
args.format,
@@ -249,10 +247,8 @@ class APILookup:
results = api.lookup(places,
address_details=True, # needed for display name
geometry_output=args.get_geometry_output(),
geometry_simplification=args.polygon_threshold or 0.0)
for result in results:
result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
geometry_simplification=args.polygon_threshold or 0.0,
locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
output = api_output.format_result(
results,
@@ -326,6 +322,7 @@ class APIDetails:
api = napi.NominatimAPI(args.project_dir)
locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
result = api.details(place,
address_details=args.addressdetails,
linked_places=args.linkedplaces,
@@ -333,13 +330,11 @@ class APIDetails:
keywords=args.keywords,
geometry_output=napi.GeometryFormat.GEOJSON
if args.polygon_geojson
else napi.GeometryFormat.NONE)
else napi.GeometryFormat.NONE,
locales=locales)
if result:
locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
result.localize(locales)
output = api_output.format_result(
result,
'json',

View File

@@ -72,6 +72,7 @@ class NominatimArgs:
check_database: bool
migrate: bool
collect_os_info: bool
clean_deleted: str
analyse_indexing: bool
target: Optional[str]
osm_id: Optional[str]
@@ -86,6 +87,7 @@ class NominatimArgs:
offline: bool
ignore_errors: bool
index_noanalyse: bool
prepare_database: bool
# Arguments to 'index'
boundaries_only: bool
@@ -100,6 +102,9 @@ class NominatimArgs:
language: Optional[str]
restrict_to_country: Optional[str]
# Arguments to 'convert'
output: Path
# Arguments to 'refresh'
postcodes: bool
word_tokens: bool

View File

@@ -0,0 +1,95 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the 'convert' subcommand.
"""
from typing import Set, Any, Union, Optional, Sequence
import argparse
import asyncio
from pathlib import Path
from nominatim.clicmd.args import NominatimArgs
from nominatim.errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
# pylint: disable=E0012,C0415
class WithAction(argparse.Action):
""" Special action that saves a list of flags, given on the command-line
as `--with-foo` or `--without-foo`.
"""
def __init__(self, option_strings: Sequence[str], dest: Any,
default: bool = True, **kwargs: Any) -> None:
if 'nargs' in kwargs:
raise ValueError("nargs not allowed.")
if option_strings is None:
raise ValueError("Positional parameter not allowed.")
self.dest_set = kwargs.pop('dest_set')
full_option_strings = []
for opt in option_strings:
if not opt.startswith('--'):
raise ValueError("short-form options not allowed")
if default:
self.dest_set.add(opt[2:])
full_option_strings.append(f"--with-{opt[2:]}")
full_option_strings.append(f"--without-{opt[2:]}")
super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
values: Union[str, Sequence[Any], None],
option_string: Optional[str] = None) -> None:
assert option_string
if option_string.startswith('--with-'):
self.dest_set.add(option_string[7:])
if option_string.startswith('--without-'):
self.dest_set.discard(option_string[10:])
class ConvertDB:
""" Convert an existing database into a different format. (EXPERIMENTAL)
Dump a read-only version of the database in a different format.
At the moment only a SQLite database suitable for reverse lookup
can be created.
"""
def __init__(self) -> None:
self.options: Set[str] = set()
def add_args(self, parser: argparse.ArgumentParser) -> None:
parser.add_argument('--format', default='sqlite',
choices=('sqlite', ),
help='Format of the output database (must be sqlite currently)')
parser.add_argument('--output', '-o', required=True, type=Path,
help='File to write the database to.')
group = parser.add_argument_group('Switches to define database layout'
'(currently no effect)')
group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
help='Enable/disable support for reverse and lookup API'
' (default: enabled)')
group.add_argument('--search', action=WithAction, dest_set=self.options, default=True,
help='Enable/disable support for search API (default: disabled)')
group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
help='Enable/disable support for details API (default: enabled)')
def run(self, args: NominatimArgs) -> int:
if args.output.exists():
raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
if args.format == 'sqlite':
from ..tools import convert_sqlite
asyncio.run(convert_sqlite.convert(args.project_dir, args.output, self.options))
return 0
return 1

View File

@@ -102,7 +102,8 @@ async def export(args: NominatimArgs) -> int:
async with api.begin() as conn, api.begin() as detail_conn:
t = conn.t.placex
sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
sql = sa.select(t.c.place_id, t.c.parent_place_id,
t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.admin_level,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
@@ -153,17 +154,15 @@ async def dump_results(conn: napi.SearchConnection,
results: List[ReverseResult],
writer: 'csv.DictWriter[str]',
lang: Optional[str]) -> None:
await add_result_details(conn, results,
LookupDetails(address_details=True))
locale = napi.Locales([lang] if lang else None)
await add_result_details(conn, results,
LookupDetails(address_details=True, locales=locale))
for result in results:
data = {'placeid': result.place_id,
'postcode': result.postcode}
result.localize(locale)
for line in (result.address_rows or []):
if line.isaddress and line.local_name:
if line.category[1] == 'postcode':

View File

@@ -110,7 +110,7 @@ class UpdateRefresh:
if args.word_counts:
LOG.warning('Recompute word statistics')
self._get_tokenizer(args.config).update_statistics()
self._get_tokenizer(args.config).update_statistics(args.config)
if args.address_levels:
LOG.warning('Updating address levels')
@@ -128,7 +128,7 @@ class UpdateRefresh:
LOG.warning('Import secondary importance raster data from %s', args.project_dir)
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) > 0:
LOG.fatal('FATAL: Cannot update sendary importance raster data')
LOG.fatal('FATAL: Cannot update secondary importance raster data')
return 1
if args.functions:
@@ -141,10 +141,10 @@ class UpdateRefresh:
if args.wiki_data:
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
or args.project_dir)
LOG.warning('Import wikipdia article importance from %s', data_path)
LOG.warning('Import wikipedia article importance from %s', data_path)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.fatal('FATAL: Wikipedia importance dump file not found')
LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
return 1
# Attention: importance MUST come after wiki data import.

View File

@@ -142,7 +142,7 @@ class UpdateReplication:
if not args.do_index:
LOG.fatal("Indexing cannot be disabled when running updates continuously.")
raise UsageError("Bad argument '--no-index'.")
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or 1)

View File

@@ -39,14 +39,15 @@ class SetupAll:
"""
def add_args(self, parser: argparse.ArgumentParser) -> None:
group_name = parser.add_argument_group('Required arguments')
group1 = group_name.add_mutually_exclusive_group(required=True)
group1 = parser.add_argument_group('Required arguments')
group1.add_argument('--osm-file', metavar='FILE', action='append',
help='OSM file to be imported'
' (repeat for importing multiple files)')
' (repeat for importing multiple files)',
default=None)
group1.add_argument('--continue', dest='continue_at',
choices=['load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted')
choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted',
default=None)
group2 = parser.add_argument_group('Optional arguments')
group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
@@ -65,9 +66,11 @@ class SetupAll:
help='Continue import even when errors in SQL are present')
group3.add_argument('--index-noanalyse', action='store_true',
help='Do not perform analyse operations during index (expert only)')
group3.add_argument('--prepare-database', action='store_true',
help='Create the database but do not import any data')
def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements
def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
from ..data import country_info
from ..tools import database_import, refresh, postcodes, freeze
from ..indexer.indexer import Indexer
@@ -76,43 +79,61 @@ class SetupAll:
country_info.setup_country_config(args.config)
if args.continue_at is None:
if args.osm_file is None and args.continue_at is None and not args.prepare_database:
raise UsageError("No input files (use --osm-file).")
if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
if args.continue_at is not None and args.prepare_database:
raise UsageError(
"Cannot use --continue and --prepare-database together."
)
if args.prepare_database or args.continue_at is None:
LOG.warning('Creating database')
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
rouser=args.config.DATABASE_WEBUSER)
if args.prepare_database:
return 0
if args.continue_at in (None, 'import-from-file'):
files = args.get_osm_file_list()
if not files:
raise UsageError("No input files (use --osm-file).")
LOG.warning('Creating database')
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
rouser=args.config.DATABASE_WEBUSER)
if args.continue_at in ('import-from-file', None):
# Check if the correct plugins are installed
database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
LOG.warning('Setting up country tables')
country_info.setup_country_tables(args.config.get_libpq_dsn(),
args.config.lib_dir.data,
args.no_partitions)
LOG.warning('Setting up country tables')
country_info.setup_country_tables(args.config.get_libpq_dsn(),
args.config.lib_dir.data,
args.no_partitions)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(files,
args.osm2pgsql_options(0, 1),
drop=args.no_updates,
ignore_errors=args.ignore_errors)
LOG.warning('Importing OSM data file')
database_import.import_osm_data(files,
args.osm2pgsql_options(0, 1),
drop=args.no_updates,
ignore_errors=args.ignore_errors)
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
'Calculating importance values of locations will not '
'use Wikipedia importance data.')
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
'Calculating importance values of locations will not '
'use Wikipedia importance data.')
LOG.warning('Importing secondary importance raster data')
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) != 0:
LOG.error('Secondary importance file not imported. '
'Falling back to default ranking.')
LOG.warning('Importing secondary importance raster data')
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) != 0:
LOG.error('Secondary importance file not imported. '
'Falling back to default ranking.')
self._setup_tables(args.config, args.reverse_only)
self._setup_tables(args.config, args.reverse_only)
if args.continue_at is None or args.continue_at == 'load-data':
if args.continue_at in ('import-from-file', 'load-data', None):
LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.truncate_data_tables(conn)
@@ -123,12 +144,13 @@ class SetupAll:
LOG.warning("Setting up tokenizer")
tokenizer = self._get_tokenizer(args.continue_at, args.config)
if args.continue_at is None or args.continue_at == 'load-data':
if args.continue_at in ('import-from-file', 'load-data', None):
LOG.warning('Calculate postcodes')
postcodes.update_postcodes(args.config.get_libpq_dsn(),
args.project_dir, tokenizer)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
if args.continue_at in \
('import-from-file', 'load-data', 'indexing', None):
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
@@ -146,7 +168,7 @@ class SetupAll:
tokenizer.finalize_import(args.config)
LOG.warning('Recompute word counts')
tokenizer.update_statistics()
tokenizer.update_statistics(args.config)
webdir = args.project_dir / 'website'
LOG.warning('Setup website at %s', webdir)
@@ -185,7 +207,7 @@ class SetupAll:
"""
from ..tokenizer import factory as tokenizer_factory
if continue_at is None or continue_at == 'load-data':
if continue_at in ('import-from-file', 'load-data', None):
# (re)initialise the tokenizer data
return tokenizer_factory.create_tokenizer(config)
@@ -197,12 +219,11 @@ class SetupAll:
""" Determine the database date and set the status accordingly.
"""
with connect(dsn) as conn:
if not offline:
try:
dbdate = status.compute_database_date(conn)
status.set_status(conn, dbdate)
LOG.info('Database is at %s.', dbdate)
except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc)
properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
try:
dbdate = status.compute_database_date(conn, offline)
status.set_status(conn, dbdate)
LOG.info('Database is at %s.', dbdate)
except Exception as exc: # pylint: disable=broad-except
LOG.error('Cannot determine date of database: %s', exc)

View File

@@ -25,7 +25,7 @@ class CountryPostcodeMatcher:
pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
self.pattern = re.compile(pc_pattern)
self.output = config.get('output', r'\g<0>')

View File

@@ -69,8 +69,8 @@ class DBConnection:
self.current_params: Optional[Sequence[Any]] = None
self.ignore_sql_errors = ignore_sql_errors
self.conn: Optional['psycopg2.connection'] = None
self.cursor: Optional['psycopg2.cursor'] = None
self.conn: Optional['psycopg2._psycopg.connection'] = None
self.cursor: Optional['psycopg2._psycopg.cursor'] = None
self.connect(cursor_factory=cursor_factory)
def close(self) -> None:
@@ -78,7 +78,7 @@ class DBConnection:
"""
if self.conn is not None:
if self.cursor is not None:
self.cursor.close() # type: ignore[no-untyped-call]
self.cursor.close()
self.cursor = None
self.conn.close()

View File

@@ -5,17 +5,19 @@
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Import the base libary to use with asynchronous SQLAlchemy.
Import the base library to use with asynchronous SQLAlchemy.
"""
# pylint: disable=invalid-name
# pylint: disable=invalid-name, ungrouped-imports, unused-import
from typing import Any
try:
import sqlalchemy.dialects.postgresql.psycopg
import psycopg
PGCORE_LIB = 'psycopg'
PGCORE_ERROR: Any = psycopg.Error
except ModuleNotFoundError:
import sqlalchemy.dialects.postgresql.asyncpg
import asyncpg
PGCORE_LIB = 'asyncpg'
PGCORE_ERROR = asyncpg.PostgresError

View File

@@ -31,7 +31,7 @@ class Cursor(psycopg2.extras.DictCursor):
""" Query execution that logs the SQL query when debugging is enabled.
"""
if LOG.isEnabledFor(logging.DEBUG):
LOG.debug(self.mogrify(query, args).decode('utf-8')) # type: ignore[no-untyped-call]
LOG.debug(self.mogrify(query, args).decode('utf-8'))
super().execute(query, args)
@@ -174,6 +174,15 @@ class Connection(psycopg2.extensions.connection):
return (int(version_parts[0]), int(version_parts[1]))
def extension_loaded(self, extension_name: str) -> bool:
""" Return True if the hstore extension is loaded in the database.
"""
with self.cursor() as cur:
cur.execute('SELECT extname FROM pg_extension WHERE extname = %s', (extension_name, ))
return cur.rowcount > 0
class ConnectionContext(ContextManager[Connection]):
""" Context manager of the connection that also provides direct access
to the underlying connection.
@@ -230,7 +239,7 @@ _PG_CONNECTION_STRINGS = {
def get_pg_env(dsn: str,
base_env: Optional[SysEnv] = None) -> Dict[str, str]:
""" Return a copy of `base_env` with the environment variables for
PostgresSQL set up from the given database connection string.
PostgreSQL set up from the given database connection string.
If `base_env` is None, then the OS environment is used as a base
environment.
"""

View File

@@ -7,7 +7,7 @@
"""
Preprocessing of SQL files.
"""
from typing import Set, Dict, Any
from typing import Set, Dict, Any, cast
import jinja2
from nominatim.db.connection import Connection
@@ -28,13 +28,24 @@ def _get_partitions(conn: Connection) -> Set[int]:
def _get_tables(conn: Connection) -> Set[str]:
""" Return the set of tables currently in use.
Only includes non-partitioned
"""
with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
return set((row[0] for row in list(cur)))
def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
""" Returns the version of the slim middle tables.
"""
if 'osm2pgsql_properties' not in tables:
return '1'
with conn.cursor() as cur:
cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
row = cur.fetchone()
return cast(str, row[0]) if row is not None else '1'
def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]:
""" Returns a dict with tablespace expressions for the different tablespace
@@ -84,12 +95,25 @@ class SQLPreprocessor:
db_info['tables'] = _get_tables(conn)
db_info['reverse_only'] = 'search_name' not in db_info['tables']
db_info['tablespace'] = _setup_tablespace_sql(config)
db_info['middle_db_format'] = _get_middle_db_format(conn, db_info['tables'])
self.env.globals['config'] = config
self.env.globals['db'] = db_info
self.env.globals['postgres'] = _setup_postgresql_features(conn)
def run_string(self, conn: Connection, template: str, **kwargs: Any) -> None:
""" Execute the given SQL template string on the connection.
The keyword arguments may supply additional parameters
for preprocessing.
"""
sql = self.env.from_string(template).render(**kwargs)
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()
def run_sql_file(self, conn: Connection, name: str, **kwargs: Any) -> None:
""" Execute the given SQL file on the connection. The keyword arguments
may supply additional parameters for preprocessing.
@@ -103,7 +127,7 @@ class SQLPreprocessor:
def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
**kwargs: Any) -> None:
""" Execure the given SQL files using parallel asynchronous connections.
""" Execute the given SQL files using parallel asynchronous connections.
The keyword arguments may supply additional parameters for
preprocessing.

View File

@@ -7,28 +7,215 @@
"""
Custom functions and expressions for SQLAlchemy.
"""
from __future__ import annotations
from typing import Any
import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
def select_index_placex_geometry_reverse_lookuppolygon(table: str) -> 'sa.TextClause':
""" Create an expression with the necessary conditions over a placex
table that the index 'idx_placex_geometry_reverse_lookupPolygon'
can be used.
"""
return sa.text(f"ST_GeometryType({table}.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
f" AND {table}.rank_address between 4 and 25"
f" AND {table}.type != 'postcode'"
f" AND {table}.name is not null"
f" AND {table}.indexed_status = 0"
f" AND {table}.linked_place_id is null")
from nominatim.typing import SaColumn
def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
""" Create an expression with the necessary conditions over a placex
table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
can be used.
# pylint: disable=all
class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
""" Check for conditions that allow partial index use on
'idx_placex_geometry_reverse_lookupPolygon'.
Needs to be constant, so that the query planner picks them up correctly
in prepared statements.
"""
return sa.text(f"{table}.rank_address between 4 and 25"
f" AND {table}.type != 'postcode'"
f" AND {table}.name is not null"
f" AND {table}.linked_place_id is null"
f" AND {table}.osm_type = 'N'")
name = 'PlacexGeometryReverseLookuppolygon'
inherit_cache = True
@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
compiler: 'sa.Compiled', **kw: Any) -> str:
return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
" AND placex.rank_address between 4 and 25"
" AND placex.type != 'postcode'"
" AND placex.name is not null"
" AND placex.indexed_status = 0"
" AND placex.linked_place_id is null)")
@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
compiler: 'sa.Compiled', **kw: Any) -> str:
return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
" AND placex.rank_address between 4 and 25"
" AND placex.type != 'postcode'"
" AND placex.name is not null"
" AND placex.indexed_status = 0"
" AND placex.linked_place_id is null)")
class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
name = 'IntersectsReverseDistance'
inherit_cache = True
def __init__(self, table: sa.Table, geom: SaColumn) -> None:
super().__init__(table.c.geometry,
table.c.rank_search, geom)
self.tablename = table.name
@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
def default_reverse_place_diameter(element: IntersectsReverseDistance,
compiler: 'sa.Compiled', **kw: Any) -> str:
table = element.tablename
return f"({table}.rank_address between 4 and 25"\
f" AND {table}.type != 'postcode'"\
f" AND {table}.name is not null"\
f" AND {table}.linked_place_id is null"\
f" AND {table}.osm_type = 'N'" + \
" AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s)" % \
tuple(map(lambda c: compiler.process(c, **kw), element.clauses))
@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
compiler: 'sa.Compiled', **kw: Any) -> str:
geom1, rank, geom2 = list(element.clauses)
table = element.tablename
return (f"({table}.rank_address between 4 and 25"\
f" AND {table}.type != 'postcode'"\
f" AND {table}.name is not null"\
f" AND {table}.linked_place_id is null"\
f" AND {table}.osm_type = 'N'"\
" AND MbrIntersects(%s, ST_Expand(%s, 14.0 * exp(-0.2 * %s) - 0.03))"\
f" AND {table}.place_id IN"\
" (SELECT place_id FROM placex_place_node_areas"\
" WHERE ROWID IN (SELECT ROWID FROM SpatialIndex"\
" WHERE f_table_name = 'placex_place_node_areas'"\
" AND search_frame = %s)))") % (
compiler.process(geom1, **kw),
compiler.process(geom2, **kw),
compiler.process(rank, **kw),
compiler.process(geom2, **kw))
class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
name = 'IsBelowReverseDistance'
inherit_cache = True
@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
def default_is_below_reverse_distance(element: IsBelowReverseDistance,
compiler: 'sa.Compiled', **kw: Any) -> str:
dist, rank = list(element.clauses)
return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
compiler.process(rank, **kw))
@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
compiler: 'sa.Compiled', **kw: Any) -> str:
dist, rank = list(element.clauses)
return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
compiler.process(rank, **kw))
class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
name = 'IsAddressPoint'
inherit_cache = True
def __init__(self, table: sa.Table) -> None:
super().__init__(table.c.rank_address,
table.c.housenumber, table.c.name)
@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
def default_is_address_point(element: IsAddressPoint,
compiler: 'sa.Compiled', **kw: Any) -> str:
rank, hnr, name = list(element.clauses)
return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
compiler.process(rank, **kw),
compiler.process(hnr, **kw),
compiler.process(name, **kw))
@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_is_address_point(element: IsAddressPoint,
compiler: 'sa.Compiled', **kw: Any) -> str:
rank, hnr, name = list(element.clauses)
return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
compiler.process(rank, **kw),
compiler.process(hnr, **kw),
compiler.process(name, **kw))
class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
""" Check if in the given list of names in parameters 1 any of the names
from the JSON array in parameter 2 are contained.
"""
name = 'CrosscheckNames'
inherit_cache = True
@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
def compile_crosscheck_names(element: CrosscheckNames,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
compiler.process(arg1, **kw), compiler.process(arg2, **kw))
@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
def compile_sqlite_crosscheck_names(element: CrosscheckNames,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "EXISTS(SELECT *"\
" FROM json_each(%s) as name, json_each(%s) as match_name"\
" WHERE name.value = match_name.value)"\
% (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
""" Return elements of a json array as a set.
"""
name = 'JsonArrayEach'
inherit_cache = True
@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
return "json_each(%s)" % compiler.process(element.clauses, **kw)
class Greatest(sa.sql.functions.GenericFunction[Any]):
""" Function to compute maximum of all its input parameters.
"""
name = 'greatest'
inherit_cache = True
@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
return "max(%s)" % compiler.process(element.clauses, **kw)
class RegexpWord(sa.sql.functions.GenericFunction[Any]):
""" Check if a full word is in a given string.
"""
name = 'RegexpWord'
inherit_cache = True
@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "%s ~* ('\\m(' || %s || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "regexp('\\b(' || %s || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))

View File

@@ -7,50 +7,20 @@
"""
SQLAlchemy definitions for all tables used by the frontend.
"""
from typing import Any
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB, array
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
from nominatim.db.sqlalchemy_types import Geometry
class PostgresTypes:
""" Type definitions for complex types as used in Postgres variants.
"""
Composite = HSTORE
Json = JSONB
IntArray = ARRAY(sa.Integer()) #pylint: disable=invalid-name
to_array = array
class SqliteTypes:
""" Type definitions for complex types as used in Postgres variants.
"""
Composite = sqlite_json
Json = sqlite_json
IntArray = sqlite_json
@staticmethod
def to_array(arr: Any) -> Any:
""" Sqlite has no special conversion for arrays.
"""
return arr
import nominatim.db.sqlalchemy_functions #pylint: disable=unused-import
from nominatim.db.sqlalchemy_types import Geometry, KeyValueStore, IntArray
#pylint: disable=too-many-instance-attributes
class SearchTables:
""" Data class that holds the tables of the Nominatim database.
This schema strictly reflects the read-access view of the database.
Any data used for updates only will not be visible.
"""
def __init__(self, meta: sa.MetaData, engine_name: str) -> None:
if engine_name == 'postgresql':
self.types: Any = PostgresTypes
elif engine_name == 'sqlite':
self.types = SqliteTypes
else:
raise ValueError("Only 'postgresql' and 'sqlite' engines are supported.")
def __init__(self, meta: sa.MetaData) -> None:
self.meta = meta
self.import_status = sa.Table('import_status', meta,
@@ -63,23 +33,22 @@ class SearchTables:
sa.Column('value', sa.Text))
self.placex = sa.Table('placex', meta,
sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
sa.Column('place_id', sa.BigInteger, nullable=False),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('linked_place_id', sa.BigInteger),
sa.Column('importance', sa.Float),
sa.Column('indexed_date', sa.DateTime),
sa.Column('rank_address', sa.SmallInteger),
sa.Column('rank_search', sa.SmallInteger),
sa.Column('partition', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('osm_type', sa.String(1), nullable=False),
sa.Column('osm_id', sa.BigInteger, nullable=False),
sa.Column('class', sa.Text, nullable=False, key='class_'),
sa.Column('type', sa.Text, nullable=False),
sa.Column('admin_level', sa.SmallInteger),
sa.Column('name', self.types.Composite),
sa.Column('address', self.types.Composite),
sa.Column('extratags', self.types.Composite),
sa.Column('name', KeyValueStore),
sa.Column('address', KeyValueStore),
sa.Column('extratags', KeyValueStore),
sa.Column('geometry', Geometry, nullable=False),
sa.Column('wikipedia', sa.Text),
sa.Column('country_code', sa.String(2)),
@@ -88,44 +57,41 @@ class SearchTables:
sa.Column('centroid', Geometry))
self.addressline = sa.Table('place_addressline', meta,
sa.Column('place_id', sa.BigInteger, index=True),
sa.Column('address_place_id', sa.BigInteger, index=True),
sa.Column('place_id', sa.BigInteger),
sa.Column('address_place_id', sa.BigInteger),
sa.Column('distance', sa.Float),
sa.Column('cached_rank_address', sa.SmallInteger),
sa.Column('fromarea', sa.Boolean),
sa.Column('isaddress', sa.Boolean))
self.postcode = sa.Table('location_postcode', meta,
sa.Column('place_id', sa.BigInteger, unique=True),
sa.Column('place_id', sa.BigInteger),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('rank_search', sa.SmallInteger),
sa.Column('rank_address', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('indexed_date', sa.DateTime),
sa.Column('country_code', sa.String(2)),
sa.Column('postcode', sa.Text, index=True),
sa.Column('postcode', sa.Text),
sa.Column('geometry', Geometry))
self.osmline = sa.Table('location_property_osmline', meta,
sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
sa.Column('place_id', sa.BigInteger, nullable=False),
sa.Column('osm_id', sa.BigInteger),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('indexed_date', sa.DateTime),
sa.Column('startnumber', sa.Integer),
sa.Column('endnumber', sa.Integer),
sa.Column('step', sa.SmallInteger),
sa.Column('partition', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('linegeo', Geometry),
sa.Column('address', self.types.Composite),
sa.Column('address', KeyValueStore),
sa.Column('postcode', sa.Text),
sa.Column('country_code', sa.String(2)))
self.country_name = sa.Table('country_name', meta,
sa.Column('country_code', sa.String(2)),
sa.Column('name', self.types.Composite),
sa.Column('derived_name', self.types.Composite),
sa.Column('country_default_language_code', sa.Text),
sa.Column('name', KeyValueStore),
sa.Column('derived_name', KeyValueStore),
sa.Column('partition', sa.Integer))
self.country_grid = sa.Table('country_osm_grid', meta,
@@ -135,12 +101,12 @@ class SearchTables:
# The following tables are not necessarily present.
self.search_name = sa.Table('search_name', meta,
sa.Column('place_id', sa.BigInteger, index=True),
sa.Column('place_id', sa.BigInteger),
sa.Column('importance', sa.Float),
sa.Column('search_rank', sa.SmallInteger),
sa.Column('address_rank', sa.SmallInteger),
sa.Column('name_vector', self.types.IntArray, index=True),
sa.Column('nameaddress_vector', self.types.IntArray, index=True),
sa.Column('name_vector', IntArray),
sa.Column('nameaddress_vector', IntArray),
sa.Column('country_code', sa.String(2)),
sa.Column('centroid', Geometry))
@@ -150,6 +116,5 @@ class SearchTables:
sa.Column('startnumber', sa.Integer),
sa.Column('endnumber', sa.Integer),
sa.Column('step', sa.SmallInteger),
sa.Column('partition', sa.SmallInteger),
sa.Column('linegeo', Geometry),
sa.Column('postcode', sa.Text))

View File

@@ -1,118 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom types for SQLAlchemy.
"""
from typing import Callable, Any, cast
import sys
import sqlalchemy as sa
from sqlalchemy import types
from nominatim.typing import SaColumn, SaBind
#pylint: disable=all
class Geometry(types.UserDefinedType): # type: ignore[type-arg]
""" Simplified type decorator for PostGIS geometry. This type
only supports geometries in 4326 projection.
"""
cache_ok = True
def __init__(self, subtype: str = 'Geometry'):
self.subtype = subtype
def get_col_spec(self) -> str:
return f'GEOMETRY({self.subtype}, 4326)'
def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
def process(value: Any) -> str:
if isinstance(value, str):
return value
return cast(str, value.to_wkt())
return process
def result_processor(self, dialect: 'sa.Dialect', coltype: object) -> Callable[[Any], str]:
def process(value: Any) -> str:
assert isinstance(value, str)
return value
return process
def bind_expression(self, bindvalue: SaBind) -> SaColumn:
return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self)
class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
def intersects(self, other: SaColumn) -> 'sa.Operators':
return self.op('&&')(other)
def is_line_like(self) -> SaColumn:
return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_LineString',
'ST_MultiLineString'))
def is_area(self) -> SaColumn:
return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_Polygon',
'ST_MultiPolygon'))
def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
return sa.func.ST_DWithin(self, other, distance, type_=sa.Boolean)
def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
return sa.func.ST_DWithin(sa.func.coalesce(sa.null(), self),
other, distance, type_=sa.Boolean)
def ST_Intersects_no_index(self, other: SaColumn) -> 'sa.Operators':
return sa.func.coalesce(sa.null(), self).op('&&')(other)
def ST_Distance(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Distance(self, other, type_=sa.Float)
def ST_Contains(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Contains(self, other, type_=sa.Boolean)
def ST_CoveredBy(self, other: SaColumn) -> SaColumn:
return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean)
def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_ClosestPoint(self, other, type_=Geometry)
def ST_Buffer(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Buffer(self, other, type_=Geometry)
def ST_Expand(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Expand(self, other, type_=Geometry)
def ST_Collect(self) -> SaColumn:
return sa.func.ST_Collect(self, type_=Geometry)
def ST_Centroid(self) -> SaColumn:
return sa.func.ST_Centroid(self, type_=Geometry)
def ST_LineInterpolatePoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_LineInterpolatePoint(self, other, type_=Geometry)
def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)

View File

@@ -0,0 +1,17 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Module with custom types for SQLAlchemy
"""
# See also https://github.com/PyCQA/pylint/issues/6006
# pylint: disable=useless-import-alias
from .geometry import (Geometry as Geometry)
from .int_array import (IntArray as IntArray)
from .key_value import (KeyValueStore as KeyValueStore)
from .json import (Json as Json)

View File

@@ -0,0 +1,308 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom types for SQLAlchemy.
"""
from __future__ import annotations
from typing import Callable, Any, cast
import sys
import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
from sqlalchemy import types
from nominatim.typing import SaColumn, SaBind
#pylint: disable=all
class Geometry_DistanceSpheroid(sa.sql.expression.FunctionElement[float]):
""" Function to compute the spherical distance in meters.
"""
type = sa.Float()
name = 'Geometry_DistanceSpheroid'
inherit_cache = True
@compiles(Geometry_DistanceSpheroid) # type: ignore[no-untyped-call, misc]
def _default_distance_spheroid(element: Geometry_DistanceSpheroid,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "ST_DistanceSpheroid(%s,"\
" 'SPHEROID[\"WGS 84\",6378137,298.257223563, AUTHORITY[\"EPSG\",\"7030\"]]')"\
% compiler.process(element.clauses, **kw)
@compiles(Geometry_DistanceSpheroid, 'sqlite') # type: ignore[no-untyped-call, misc]
def _spatialite_distance_spheroid(element: Geometry_DistanceSpheroid,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "COALESCE(Distance(%s, true), 0.0)" % compiler.process(element.clauses, **kw)
class Geometry_IsLineLike(sa.sql.expression.FunctionElement[Any]):
""" Check if the geometry is a line or multiline.
"""
name = 'Geometry_IsLineLike'
inherit_cache = True
@compiles(Geometry_IsLineLike) # type: ignore[no-untyped-call, misc]
def _default_is_line_like(element: Geometry_IsLineLike,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "ST_GeometryType(%s) IN ('ST_LineString', 'ST_MultiLineString')" % \
compiler.process(element.clauses, **kw)
@compiles(Geometry_IsLineLike, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_is_line_like(element: Geometry_IsLineLike,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "ST_GeometryType(%s) IN ('LINESTRING', 'MULTILINESTRING')" % \
compiler.process(element.clauses, **kw)
class Geometry_IsAreaLike(sa.sql.expression.FunctionElement[Any]):
""" Check if the geometry is a polygon or multipolygon.
"""
name = 'Geometry_IsLineLike'
inherit_cache = True
@compiles(Geometry_IsAreaLike) # type: ignore[no-untyped-call, misc]
def _default_is_area_like(element: Geometry_IsAreaLike,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "ST_GeometryType(%s) IN ('ST_Polygon', 'ST_MultiPolygon')" % \
compiler.process(element.clauses, **kw)
@compiles(Geometry_IsAreaLike, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_is_area_like(element: Geometry_IsAreaLike,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "ST_GeometryType(%s) IN ('POLYGON', 'MULTIPOLYGON')" % \
compiler.process(element.clauses, **kw)
class Geometry_IntersectsBbox(sa.sql.expression.FunctionElement[Any]):
""" Check if the bounding boxes of the given geometries intersect.
"""
name = 'Geometry_IntersectsBbox'
inherit_cache = True
@compiles(Geometry_IntersectsBbox) # type: ignore[no-untyped-call, misc]
def _default_intersects(element: Geometry_IntersectsBbox,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
@compiles(Geometry_IntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_intersects(element: Geometry_IntersectsBbox,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "MbrIntersects(%s) = 1" % compiler.process(element.clauses, **kw)
class Geometry_ColumnIntersectsBbox(sa.sql.expression.FunctionElement[Any]):
""" Check if the bounding box of the geometry intersects with the
given table column, using the spatial index for the column.
The index must exist or the query may return nothing.
"""
name = 'Geometry_ColumnIntersectsBbox'
inherit_cache = True
@compiles(Geometry_ColumnIntersectsBbox) # type: ignore[no-untyped-call, misc]
def default_intersects_column(element: Geometry_ColumnIntersectsBbox,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
@compiles(Geometry_ColumnIntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
def spatialite_intersects_column(element: Geometry_ColumnIntersectsBbox,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "MbrIntersects(%s, %s) = 1 and "\
"%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
"WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
"AND search_frame = %s)" %(
compiler.process(arg1, **kw),
compiler.process(arg2, **kw),
arg1.table.name, arg1.table.name, arg1.name,
compiler.process(arg2, **kw))
class Geometry_ColumnDWithin(sa.sql.expression.FunctionElement[Any]):
""" Check if the geometry is within the distance of the
given table column, using the spatial index for the column.
The index must exist or the query may return nothing.
"""
name = 'Geometry_ColumnDWithin'
inherit_cache = True
@compiles(Geometry_ColumnDWithin) # type: ignore[no-untyped-call, misc]
def default_dwithin_column(element: Geometry_ColumnDWithin,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "ST_DWithin(%s)" % compiler.process(element.clauses, **kw)
@compiles(Geometry_ColumnDWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
def spatialite_dwithin_column(element: Geometry_ColumnDWithin,
compiler: 'sa.Compiled', **kw: Any) -> str:
geom1, geom2, dist = list(element.clauses)
return "ST_Distance(%s, %s) < %s and "\
"%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
"WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
"AND search_frame = ST_Expand(%s, %s))" %(
compiler.process(geom1, **kw),
compiler.process(geom2, **kw),
compiler.process(dist, **kw),
geom1.table.name, geom1.table.name, geom1.name,
compiler.process(geom2, **kw),
compiler.process(dist, **kw))
class Geometry(types.UserDefinedType): # type: ignore[type-arg]
""" Simplified type decorator for PostGIS geometry. This type
only supports geometries in 4326 projection.
"""
cache_ok = True
def __init__(self, subtype: str = 'Geometry'):
self.subtype = subtype
def get_col_spec(self) -> str:
return f'GEOMETRY({self.subtype}, 4326)'
def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
def process(value: Any) -> str:
if isinstance(value, str):
return value
return cast(str, value.to_wkt())
return process
def result_processor(self, dialect: 'sa.Dialect', coltype: object) -> Callable[[Any], str]:
def process(value: Any) -> str:
assert isinstance(value, str)
return value
return process
def column_expression(self, col: SaColumn) -> SaColumn:
return sa.func.ST_AsEWKB(col)
def bind_expression(self, bindvalue: SaBind) -> SaColumn:
return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self)
class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
def intersects(self, other: SaColumn, use_index: bool = True) -> 'sa.Operators':
if not use_index:
return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self.expr), other)
if isinstance(self.expr, sa.Column):
return Geometry_ColumnIntersectsBbox(self.expr, other)
return Geometry_IntersectsBbox(self.expr, other)
def is_line_like(self) -> SaColumn:
return Geometry_IsLineLike(self)
def is_area(self) -> SaColumn:
return Geometry_IsAreaLike(self)
def within_distance(self, other: SaColumn, distance: SaColumn) -> SaColumn:
if isinstance(self.expr, sa.Column):
return Geometry_ColumnDWithin(self.expr, other, distance)
return self.ST_Distance(other) < distance
def ST_Distance(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Distance(self, other, type_=sa.Float)
def ST_Contains(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Contains(self, other, type_=sa.Boolean)
def ST_CoveredBy(self, other: SaColumn) -> SaColumn:
return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean)
def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
return sa.func.coalesce(sa.func.ST_ClosestPoint(self, other, type_=Geometry),
other)
def ST_Buffer(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Buffer(self, other, type_=Geometry)
def ST_Expand(self, other: SaColumn) -> SaColumn:
return sa.func.ST_Expand(self, other, type_=Geometry)
def ST_Collect(self) -> SaColumn:
return sa.func.ST_Collect(self, type_=Geometry)
def ST_Centroid(self) -> SaColumn:
return sa.func.ST_Centroid(self, type_=Geometry)
def ST_LineInterpolatePoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_LineInterpolatePoint(self, other, type_=Geometry)
def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)
def distance_spheroid(self, other: SaColumn) -> SaColumn:
return Geometry_DistanceSpheroid(self, other)
@compiles(Geometry, 'sqlite') # type: ignore[no-untyped-call]
def get_col_spec(self, *args, **kwargs): # type: ignore[no-untyped-def]
return 'GEOMETRY'
SQLITE_FUNCTION_ALIAS = (
('ST_AsEWKB', sa.Text, 'AsEWKB'),
('ST_GeomFromEWKT', Geometry, 'GeomFromEWKT'),
('ST_AsGeoJSON', sa.Text, 'AsGeoJSON'),
('ST_AsKML', sa.Text, 'AsKML'),
('ST_AsSVG', sa.Text, 'AsSVG'),
('ST_LineLocatePoint', sa.Float, 'ST_Line_Locate_Point'),
('ST_LineInterpolatePoint', sa.Float, 'ST_Line_Interpolate_Point'),
)
def _add_function_alias(func: str, ftype: type, alias: str) -> None:
_FuncDef = type(func, (sa.sql.functions.GenericFunction, ), {
"type": ftype(),
"name": func,
"identifier": func,
"inherit_cache": True})
func_templ = f"{alias}(%s)"
def _sqlite_impl(element: Any, compiler: Any, **kw: Any) -> Any:
return func_templ % compiler.process(element.clauses, **kw)
compiles(_FuncDef, 'sqlite')(_sqlite_impl) # type: ignore[no-untyped-call]
for alias in SQLITE_FUNCTION_ALIAS:
_add_function_alias(*alias)

View File

@@ -0,0 +1,123 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom type for an array of integers.
"""
from typing import Any, List, cast, Optional
import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.dialects.postgresql import ARRAY
from nominatim.typing import SaDialect, SaColumn
# pylint: disable=all
class IntList(sa.types.TypeDecorator[Any]):
""" A list of integers saved as a text of comma-separated numbers.
"""
impl = sa.types.Unicode
cache_ok = True
def process_bind_param(self, value: Optional[Any], dialect: 'sa.Dialect') -> Optional[str]:
if value is None:
return None
assert isinstance(value, list)
return ','.join(map(str, value))
def process_result_value(self, value: Optional[Any],
dialect: SaDialect) -> Optional[List[int]]:
return [int(v) for v in value.split(',')] if value is not None else None
def copy(self, **kw: Any) -> 'IntList':
return IntList(self.impl.length)
class IntArray(sa.types.TypeDecorator[Any]):
""" Dialect-independent list of integers.
"""
impl = IntList
cache_ok = True
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
if dialect.name == 'postgresql':
return ARRAY(sa.Integer()) #pylint: disable=invalid-name
return IntList()
class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
def __add__(self, other: SaColumn) -> 'sa.ColumnOperators':
""" Concate the array with the given array. If one of the
operants is null, the value of the other will be returned.
"""
return ArrayCat(self.expr, other)
def contains(self, other: SaColumn, **kwargs: Any) -> 'sa.ColumnOperators':
""" Return true if the array contains all the value of the argument
array.
"""
return ArrayContains(self.expr, other)
class ArrayAgg(sa.sql.functions.GenericFunction[Any]):
""" Aggregate function to collect elements in an array.
"""
type = IntArray()
identifier = 'ArrayAgg'
name = 'array_agg'
inherit_cache = True
@compiles(ArrayAgg, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_array_agg(element: ArrayAgg, compiler: 'sa.Compiled', **kw: Any) -> str:
return "group_concat(%s, ',')" % compiler.process(element.clauses, **kw)
class ArrayContains(sa.sql.expression.FunctionElement[Any]):
""" Function to check if an array is fully contained in another.
"""
name = 'ArrayContains'
inherit_cache = True
@compiles(ArrayContains) # type: ignore[no-untyped-call, misc]
def generic_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "(%s @> %s)" % (compiler.process(arg1, **kw),
compiler.process(arg2, **kw))
@compiles(ArrayContains, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
return "array_contains(%s)" % compiler.process(element.clauses, **kw)
class ArrayCat(sa.sql.expression.FunctionElement[Any]):
""" Function to check if an array is fully contained in another.
"""
type = IntArray()
identifier = 'ArrayCat'
inherit_cache = True
@compiles(ArrayCat) # type: ignore[no-untyped-call, misc]
def generic_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
return "array_cat(%s)" % compiler.process(element.clauses, **kw)
@compiles(ArrayCat, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "(%s || ',' || %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))

View File

@@ -0,0 +1,30 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Common json type for different dialects.
"""
from typing import Any
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
from nominatim.typing import SaDialect
# pylint: disable=all
class Json(sa.types.TypeDecorator[Any]):
""" Dialect-independent type for JSON.
"""
impl = sa.types.JSON
cache_ok = True
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
if dialect.name == 'postgresql':
return JSONB(none_as_null=True) # type: ignore[no-untyped-call]
return sqlite_json(none_as_null=True)

View File

@@ -0,0 +1,62 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
A custom type that implements a simple key-value store of strings.
"""
from typing import Any
import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
from sqlalchemy.dialects.postgresql import HSTORE
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
from nominatim.typing import SaDialect, SaColumn
# pylint: disable=all
class KeyValueStore(sa.types.TypeDecorator[Any]):
""" Dialect-independent type of a simple key-value store of strings.
"""
impl = HSTORE
cache_ok = True
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
if dialect.name == 'postgresql':
return HSTORE() # type: ignore[no-untyped-call]
return sqlite_json(none_as_null=True)
class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
def merge(self, other: SaColumn) -> 'sa.Operators':
""" Merge the values from the given KeyValueStore into this
one, overwriting values where necessary. When the argument
is null, nothing happens.
"""
return KeyValueConcat(self.expr, other)
class KeyValueConcat(sa.sql.expression.FunctionElement[Any]):
""" Return the merged key-value store from the input parameters.
"""
type = KeyValueStore()
name = 'JsonConcat'
inherit_cache = True
@compiles(KeyValueConcat) # type: ignore[no-untyped-call, misc]
def default_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "(%s || coalesce(%s, ''::hstore))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
@compiles(KeyValueConcat, 'sqlite') # type: ignore[no-untyped-call, misc]
def sqlite_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "json_patch(%s, coalesce(%s, '{}'))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))

View File

@@ -0,0 +1,122 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom functions for SQLite.
"""
from typing import cast, Optional, Set, Any
import json
# pylint: disable=protected-access
def weigh_search(search_vector: Optional[str], rankings: str, default: float) -> float:
""" Custom weight function for search results.
"""
if search_vector is not None:
svec = [int(x) for x in search_vector.split(',')]
for rank in json.loads(rankings):
if all(r in svec for r in rank[1]):
return cast(float, rank[0])
return default
class ArrayIntersectFuzzy:
""" Compute the array of common elements of all input integer arrays.
Very large input parameters may be ignored to speed up
computation. Therefore, the result is a superset of common elements.
Input and output arrays are given as comma-separated lists.
"""
def __init__(self) -> None:
self.first = ''
self.values: Optional[Set[int]] = None
def step(self, value: Optional[str]) -> None:
""" Add the next array to the intersection.
"""
if value is not None:
if not self.first:
self.first = value
elif len(value) < 10000000:
if self.values is None:
self.values = {int(x) for x in self.first.split(',')}
self.values.intersection_update((int(x) for x in value.split(',')))
def finalize(self) -> str:
""" Return the final result.
"""
if self.values is not None:
return ','.join(map(str, self.values))
return self.first
class ArrayUnion:
""" Compute the set of all elements of the input integer arrays.
Input and output arrays are given as strings of comma-separated lists.
"""
def __init__(self) -> None:
self.values: Optional[Set[str]] = None
def step(self, value: Optional[str]) -> None:
""" Add the next array to the union.
"""
if value is not None:
if self.values is None:
self.values = set(value.split(','))
else:
self.values.update(value.split(','))
def finalize(self) -> str:
""" Return the final result.
"""
return '' if self.values is None else ','.join(self.values)
def array_contains(container: Optional[str], containee: Optional[str]) -> Optional[bool]:
""" Is the array 'containee' completely contained in array 'container'.
"""
if container is None or containee is None:
return None
vset = container.split(',')
return all(v in vset for v in containee.split(','))
def array_pair_contains(container1: Optional[str], container2: Optional[str],
containee: Optional[str]) -> Optional[bool]:
""" Is the array 'containee' completely contained in the union of
array 'container1' and array 'container2'.
"""
if container1 is None or container2 is None or containee is None:
return None
vset = container1.split(',') + container2.split(',')
return all(v in vset for v in containee.split(','))
def install_custom_functions(conn: Any) -> None:
""" Install helper functions for Nominatim into the given SQLite
database connection.
"""
conn.create_function('weigh_search', 3, weigh_search, deterministic=True)
conn.create_function('array_contains', 2, array_contains, deterministic=True)
conn.create_function('array_pair_contains', 3, array_pair_contains, deterministic=True)
_create_aggregate(conn, 'array_intersect_fuzzy', 1, ArrayIntersectFuzzy)
_create_aggregate(conn, 'array_union', 1, ArrayUnion)
async def _make_aggregate(aioconn: Any, *args: Any) -> None:
await aioconn._execute(aioconn._conn.create_aggregate, *args)
def _create_aggregate(conn: Any, name: str, nargs: int, aggregate: Any) -> None:
try:
conn.await_(_make_aggregate(conn._connection, name, nargs, aggregate))
except Exception as error: # pylint: disable=broad-exception-caught
conn._handle_exception(error)

View File

@@ -29,11 +29,24 @@ class StatusRow(TypedDict):
indexed: Optional[bool]
def compute_database_date(conn: Connection) -> dt.datetime:
def compute_database_date(conn: Connection, offline: bool = False) -> dt.datetime:
""" Determine the date of the database from the newest object in the
data base.
"""
# First, find the node with the highest ID in the database
# If there is a date from osm2pgsql available, use that.
if conn.table_exists('osm2pgsql_properties'):
with conn.cursor() as cur:
cur.execute(""" SELECT value FROM osm2pgsql_properties
WHERE property = 'current_timestamp' """)
row = cur.fetchone()
if row is not None:
return dt.datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ")\
.replace(tzinfo=dt.timezone.utc)
if offline:
raise UsageError("Cannot determine database date from data in offline mode.")
# Else, find the node with the highest ID in the database
with conn.cursor() as cur:
if conn.table_exists('place'):
osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")

View File

@@ -118,4 +118,4 @@ class CopyBuffer:
"""
if self.buffer.tell() > 0:
self.buffer.seek(0)
cur.copy_from(self.buffer, table, columns=columns) # type: ignore[no-untyped-call]
cur.copy_from(self.buffer, table, columns=columns)

View File

@@ -10,11 +10,13 @@ Server implementation using the falcon webserver framework.
from typing import Optional, Mapping, cast, Any, List
from pathlib import Path
import datetime as dt
import asyncio
from falcon.asgi import App, Request, Response
from nominatim.api import NominatimAPIAsync
import nominatim.api.v1 as api_impl
import nominatim.api.logging as loglib
from nominatim.config import Configuration
class HTTPNominatimError(Exception):
@@ -44,8 +46,15 @@ async def timeout_error_handler(req: Request, resp: Response, #pylint: disable=u
per exception info.
"""
resp.status = 503
resp.text = "Query took too long to process."
resp.content_type = 'text/plain; charset=utf-8'
loglib.log().comment('Aborted: Query took too long to process.')
logdata = loglib.get_and_disable()
if logdata:
resp.text = logdata
resp.content_type = 'text/html; charset=utf-8'
else:
resp.text = "Query took too long to process."
resp.content_type = 'text/plain; charset=utf-8'
class ParamWrapper(api_impl.ASGIAdaptor):
@@ -119,7 +128,7 @@ class FileLoggingMiddleware:
resource: Optional[EndpointWrapper],
req_succeeded: bool) -> None:
""" Callback after requests writes to the logfile. It only
writes logs for sucessful requests for search, reverse and lookup.
writes logs for successful requests for search, reverse and lookup.
"""
if not req_succeeded or resource is None or resp.status != 200\
or resource.name not in ('reverse', 'search', 'lookup', 'details'):
@@ -164,6 +173,8 @@ def get_application(project_dir: Path,
middleware=middleware)
app.add_error_handler(HTTPNominatimError, nominatim_error_handler)
app.add_error_handler(TimeoutError, timeout_error_handler)
# different from TimeoutError in Python <= 3.10
app.add_error_handler(asyncio.TimeoutError, timeout_error_handler)
legacy_urls = api.config.get_bool('SERVE_LEGACY_URLS')
for name, func in api_impl.ROUTES:

View File

@@ -10,11 +10,12 @@ Server implementation using the starlette webserver framework.
from typing import Any, Optional, Mapping, Callable, cast, Coroutine, Dict, Awaitable
from pathlib import Path
import datetime as dt
import asyncio
from starlette.applications import Starlette
from starlette.routing import Route
from starlette.exceptions import HTTPException
from starlette.responses import Response, PlainTextResponse
from starlette.responses import Response, PlainTextResponse, HTMLResponse
from starlette.requests import Request
from starlette.middleware import Middleware
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
@@ -22,6 +23,7 @@ from starlette.middleware.cors import CORSMiddleware
from nominatim.api import NominatimAPIAsync
import nominatim.api.v1 as api_impl
import nominatim.api.logging as loglib
from nominatim.config import Configuration
class ParamWrapper(api_impl.ASGIAdaptor):
@@ -114,6 +116,12 @@ async def timeout_error(request: Request, #pylint: disable=unused-argument
_: Exception) -> Response:
""" Error handler for query timeouts.
"""
loglib.log().comment('Aborted: Query took too long to process.')
logdata = loglib.get_and_disable()
if logdata:
return HTMLResponse(logdata)
return PlainTextResponse("Query took too long to process.", status_code=503)
@@ -144,7 +152,8 @@ def get_application(project_dir: Path,
middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file))
exceptions: Dict[Any, Callable[[Request, Exception], Awaitable[Response]]] = {
TimeoutError: timeout_error
TimeoutError: timeout_error,
asyncio.TimeoutError: timeout_error
}
async def _shutdown() -> None:

View File

@@ -201,7 +201,7 @@ class AbstractTokenizer(ABC):
@abstractmethod
def update_statistics(self) -> None:
def update_statistics(self, config: Configuration) -> None:
""" Recompute any tokenizer statistics necessary for efficient lookup.
This function is meant to be called from time to time by the user
to improve performance. However, the tokenizer must not depend on

View File

@@ -31,6 +31,11 @@ DBCFG_TERM_NORMALIZATION = "tokenizer_term_normalization"
LOG = logging.getLogger()
WORD_TYPES =(('country_names', 'C'),
('postcodes', 'P'),
('full_word', 'W'),
('housenumbers', 'H'))
def create(dsn: str, data_dir: Path) -> 'ICUTokenizer':
""" Create a new instance of the tokenizer provided by this module.
"""
@@ -62,7 +67,8 @@ class ICUTokenizer(AbstractTokenizer):
if init_db:
self.update_sql_functions(config)
self._init_db_tables(config)
self._setup_db_tables(config)
self._create_base_indices(config, 'word')
def init_from_project(self, config: Configuration) -> None:
@@ -80,9 +86,7 @@ class ICUTokenizer(AbstractTokenizer):
""" Do any required postprocessing to make the tokenizer data ready
for use.
"""
with connect(self.dsn) as conn:
sqlp = SQLPreprocessor(conn, config)
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
self._create_lookup_indices(config, 'word')
def update_sql_functions(self, config: Configuration) -> None:
@@ -100,24 +104,39 @@ class ICUTokenizer(AbstractTokenizer):
self.init_from_project(config)
def update_statistics(self) -> None:
def update_statistics(self, config: Configuration) -> None:
""" Recompute frequencies for all name words.
"""
with connect(self.dsn) as conn:
if conn.table_exists('search_name'):
with conn.cursor() as cur:
cur.drop_table("word_frequencies")
LOG.info("Computing word frequencies")
cur.execute("""CREATE TEMP TABLE word_frequencies AS
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id""")
cur.execute("CREATE INDEX ON word_frequencies(id)")
LOG.info("Update word table with recomputed frequencies")
cur.execute("""UPDATE word
SET info = info || jsonb_build_object('count', count)
FROM word_frequencies WHERE word_id = id""")
cur.drop_table("word_frequencies")
if not conn.table_exists('search_name'):
return
with conn.cursor() as cur:
LOG.info('Computing word frequencies')
cur.drop_table('word_frequencies')
cur.execute("""CREATE TEMP TABLE word_frequencies AS
SELECT unnest(name_vector) as id, count(*)
FROM search_name GROUP BY id""")
cur.execute('CREATE INDEX ON word_frequencies(id)')
LOG.info('Update word table with recomputed frequencies')
cur.drop_table('tmp_word')
cur.execute("""CREATE TABLE tmp_word AS
SELECT word_id, word_token, type, word,
(CASE WHEN wf.count is null THEN info
ELSE info || jsonb_build_object('count', wf.count)
END) as info
FROM word LEFT JOIN word_frequencies wf
ON word.word_id = wf.id""")
cur.drop_table('word_frequencies')
sqlp = SQLPreprocessor(conn, config)
sqlp.run_string(conn,
'GRANT SELECT ON tmp_word TO "{{config.DATABASE_WEBUSER}}"')
conn.commit()
self._create_base_indices(config, 'tmp_word')
self._create_lookup_indices(config, 'tmp_word')
self._move_temporary_word_table('tmp_word')
def _cleanup_housenumbers(self) -> None:
@@ -195,19 +214,20 @@ class ICUTokenizer(AbstractTokenizer):
return list(s[0].split('@')[0] for s in cur)
def _install_php(self, phpdir: Path, overwrite: bool = True) -> None:
def _install_php(self, phpdir: Optional[Path], overwrite: bool = True) -> None:
""" Install the php script for the tokenizer.
"""
assert self.loader is not None
php_file = self.data_dir / "tokenizer.php"
if phpdir is not None:
assert self.loader is not None
php_file = self.data_dir / "tokenizer.php"
if not php_file.exists() or overwrite:
php_file.write_text(dedent(f"""\
<?php
@define('CONST_Max_Word_Frequency', 10000000);
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
if not php_file.exists() or overwrite:
php_file.write_text(dedent(f"""\
<?php
@define('CONST_Max_Word_Frequency', 10000000);
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
def _save_config(self) -> None:
@@ -219,16 +239,84 @@ class ICUTokenizer(AbstractTokenizer):
self.loader.save_config_to_db(conn)
def _init_db_tables(self, config: Configuration) -> None:
def _setup_db_tables(self, config: Configuration) -> None:
""" Set up the word table and fill it with pre-computed word
frequencies.
"""
with connect(self.dsn) as conn:
with conn.cursor() as cur:
cur.drop_table('word')
sqlp = SQLPreprocessor(conn, config)
sqlp.run_string(conn, """
CREATE TABLE word (
word_id INTEGER,
word_token text NOT NULL,
type text NOT NULL,
word text,
info jsonb
) {{db.tablespace.search_data}};
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
DROP SEQUENCE IF EXISTS seq_word;
CREATE SEQUENCE seq_word start 1;
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
""")
conn.commit()
def _create_base_indices(self, config: Configuration, table_name: str) -> None:
""" Set up the word table and fill it with pre-computed word
frequencies.
"""
with connect(self.dsn) as conn:
sqlp = SQLPreprocessor(conn, config)
sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer_tables.sql')
sqlp.run_string(conn,
"""CREATE INDEX idx_{{table_name}}_word_token ON {{table_name}}
USING BTREE (word_token) {{db.tablespace.search_index}}""",
table_name=table_name)
for name, ctype in WORD_TYPES:
sqlp.run_string(conn,
"""CREATE INDEX idx_{{table_name}}_{{idx_name}} ON {{table_name}}
USING BTREE (word) {{db.tablespace.address_index}}
WHERE type = '{{column_type}}'
""",
table_name=table_name, idx_name=name,
column_type=ctype)
conn.commit()
def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
""" Create additional indexes used when running the API.
"""
with connect(self.dsn) as conn:
sqlp = SQLPreprocessor(conn, config)
# Index required for details lookup.
sqlp.run_string(conn, """
CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id
ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
""",
table_name=table_name)
conn.commit()
def _move_temporary_word_table(self, old: str) -> None:
""" Rename all tables and indexes used by the tokenizer.
"""
with connect(self.dsn) as conn:
with conn.cursor() as cur:
cur.drop_table('word')
cur.execute(f"ALTER TABLE {old} RENAME TO word")
for idx in ('word_token', 'word_id'):
cur.execute(f"""ALTER INDEX idx_{old}_{idx}
RENAME TO idx_word_{idx}""")
for name, _ in WORD_TYPES:
cur.execute(f"""ALTER INDEX idx_{old}_{name}
RENAME TO idx_word_{name}""")
conn.commit()
class ICUNameAnalyzer(AbstractAnalyzer):
""" The ICU analyzer uses the ICU library for splitting names.

View File

@@ -210,7 +210,7 @@ class LegacyTokenizer(AbstractTokenizer):
self._save_config(conn, config)
def update_statistics(self) -> None:
def update_statistics(self, _: Configuration) -> None:
""" Recompute the frequency of full words.
"""
with connect(self.dsn) as conn:
@@ -269,15 +269,16 @@ class LegacyTokenizer(AbstractTokenizer):
def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
if config.lib_dir.php is not None:
php_file = self.data_dir / "tokenizer.php"
if not php_file.exists() or overwrite:
php_file.write_text(dedent(f"""\
<?php
@define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
"""), encoding='utf-8')
if not php_file.exists() or overwrite:
php_file.write_text(dedent(f"""\
<?php
@define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
"""), encoding='utf-8')
def _init_db_tables(self, config: Configuration) -> None:

View File

@@ -60,5 +60,5 @@ class SanitizerHandler(Protocol):
Return:
The result must be a callable that takes a place description
and transforms name and address as reuqired.
and transforms name and address as required.
"""

View File

@@ -11,6 +11,7 @@ from typing import Optional, Tuple, Any, cast
import logging
from psycopg2.extras import Json, register_hstore
from psycopg2 import DataError
from nominatim.config import Configuration
from nominatim.db.connection import connect, Cursor
@@ -87,3 +88,19 @@ def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,
for msg in conn.notices:
print(msg)
def clean_deleted_relations(config: Configuration, age: str) -> None:
""" Clean deleted relations older than a given age
"""
with connect(config.get_libpq_dsn()) as conn:
with conn.cursor() as cur:
try:
cur.execute("""SELECT place_force_delete(p.place_id)
FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
AND age(p.indexed_date) > %s::interval""",
(age, ))
except DataError as exc:
raise UsageError('Invalid PostgreSQL time interval format') from exc
conn.commit()

View File

@@ -13,9 +13,11 @@ from textwrap import dedent
from nominatim.config import Configuration
from nominatim.db.connection import connect, Connection
from nominatim.db import properties
from nominatim.errors import UsageError
from nominatim.tokenizer import factory as tokenizer_factory
from nominatim.tools import freeze
from nominatim.version import NOMINATIM_VERSION, parse_version
CHECKLIST = []
@@ -125,7 +127,7 @@ def _get_indexes(conn: Connection) -> List[str]:
# CHECK FUNCTIONS
#
# Functions are exectured in the order they appear here.
# Functions are executed in the order they appear here.
@_check(hint="""\
{error}
@@ -146,11 +148,52 @@ def check_connection(conn: Any, config: Configuration) -> CheckResult:
return CheckState.OK
@_check(hint="""\
Database version ({db_version}) doesn't match Nominatim version ({nom_version})
Hints:
* Are you connecting to the correct database?
{instruction}
Check the Migration chapter of the Administration Guide.
Project directory: {config.project_dir}
Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
""")
def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
""" Checking database_version matches Nominatim software version
"""
if conn.table_exists('nominatim_properties'):
db_version_str = properties.get_property(conn, 'database_version')
else:
db_version_str = None
if db_version_str is not None:
db_version = parse_version(db_version_str)
if db_version == NOMINATIM_VERSION:
return CheckState.OK
instruction = (
'Run migrations: nominatim admin --migrate'
if db_version < NOMINATIM_VERSION
else 'You need to upgrade the Nominatim software.'
)
else:
instruction = ''
return CheckState.FATAL, dict(db_version=db_version_str,
nom_version=NOMINATIM_VERSION,
instruction=instruction,
config=config)
@_check(hint="""\
placex table not found
Hints:
* Are you connecting to the right database?
* Are you connecting to the correct database?
* Did the import process finish without errors?
Project directory: {config.project_dir}

View File

@@ -12,14 +12,13 @@ import os
import subprocess
import sys
from pathlib import Path
from typing import List, Optional, Tuple, Union, cast
from typing import List, Optional, Tuple, Union
import psutil
from psycopg2.extensions import make_dsn, parse_dsn
from nominatim.config import Configuration
from nominatim.db.connection import connect
from nominatim.typing import DictCursorResults
from nominatim.version import NOMINATIM_VERSION
@@ -79,7 +78,7 @@ def from_file_find_line_portion(
filename: str, start: str, sep: str, fieldnum: int = 1
) -> Optional[str]:
"""open filename, finds the line starting with the 'start' string.
Splits the line using seperator and returns a "fieldnum" from the split."""
Splits the line using separator and returns a "fieldnum" from the split."""
with open(filename, encoding='utf8') as file:
result = ""
for line in file:
@@ -107,15 +106,15 @@ def report_system_information(config: Configuration) -> None:
postgresql_ver: str = convert_version(conn.server_version_tuple())
with conn.cursor() as cur:
cur.execute(f"""
SELECT datname FROM pg_catalog.pg_database
WHERE datname='{parse_dsn(config.get_libpq_dsn())['dbname']}'""")
nominatim_db_exists = cast(Optional[DictCursorResults], cur.fetchall())
if nominatim_db_exists:
with connect(config.get_libpq_dsn()) as conn:
postgis_ver: str = convert_version(conn.postgis_version_tuple())
else:
postgis_ver = "Unable to connect to database"
num = cur.scalar("SELECT count(*) FROM pg_catalog.pg_database WHERE datname=%s",
(parse_dsn(config.get_libpq_dsn())['dbname'], ))
nominatim_db_exists = num == 1 if isinstance(num, int) else False
if nominatim_db_exists:
with connect(config.get_libpq_dsn()) as conn:
postgis_ver: str = convert_version(conn.postgis_version_tuple())
else:
postgis_ver = "Unable to connect to database"
postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))

View File

@@ -0,0 +1,265 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Exporting a Nominatim database to SQlite.
"""
from typing import Set, Any
import datetime as dt
import logging
from pathlib import Path
import sqlalchemy as sa
from nominatim.typing import SaSelect, SaRow
from nominatim.db.sqlalchemy_types import Geometry, IntArray
from nominatim.api.search.query_analyzer_factory import make_query_analyzer
import nominatim.api as napi
LOG = logging.getLogger()
async def convert(project_dir: Path, outfile: Path, options: Set[str]) -> None:
""" Export an existing database to sqlite. The resulting database
will be usable against the Python frontend of Nominatim.
"""
api = napi.NominatimAPIAsync(project_dir)
try:
outapi = napi.NominatimAPIAsync(project_dir,
{'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}",
'NOMINATIM_DATABASE_RW': '1'})
try:
async with api.begin() as src, outapi.begin() as dest:
writer = SqliteWriter(src, dest, options)
await writer.write()
finally:
await outapi.close()
finally:
await api.close()
class SqliteWriter:
""" Worker class which creates a new SQLite database.
"""
def __init__(self, src: napi.SearchConnection,
dest: napi.SearchConnection, options: Set[str]) -> None:
self.src = src
self.dest = dest
self.options = options
async def write(self) -> None:
""" Create the database structure and copy the data from
the source database to the destination.
"""
LOG.warning('Setting up spatialite')
await self.dest.execute(sa.select(sa.func.InitSpatialMetaData(True, 'WGS84')))
await self.create_tables()
await self.copy_data()
if 'search' in self.options:
await self.create_word_table()
await self.create_indexes()
async def create_tables(self) -> None:
""" Set up the database tables.
"""
LOG.warning('Setting up tables')
if 'search' not in self.options:
self.dest.t.meta.remove(self.dest.t.search_name)
else:
await self.create_class_tables()
await self.dest.connection.run_sync(self.dest.t.meta.create_all)
# Convert all Geometry columns to Spatialite geometries
for table in self.dest.t.meta.sorted_tables:
for col in table.c:
if isinstance(col.type, Geometry):
await self.dest.execute(sa.select(
sa.func.RecoverGeometryColumn(table.name, col.name, 4326,
col.type.subtype.upper(), 'XY')))
async def create_class_tables(self) -> None:
""" Set up the table that serve class/type-specific geometries.
"""
sql = sa.text("""SELECT tablename FROM pg_tables
WHERE tablename LIKE 'place_classtype_%'""")
for res in await self.src.execute(sql):
for db in (self.src, self.dest):
sa.Table(res[0], db.t.meta,
sa.Column('place_id', sa.BigInteger),
sa.Column('centroid', Geometry))
async def create_word_table(self) -> None:
""" Create the word table.
This table needs the property information to determine the
correct format. Therefore needs to be done after all other
data has been copied.
"""
await make_query_analyzer(self.src)
await make_query_analyzer(self.dest)
src = self.src.t.meta.tables['word']
dest = self.dest.t.meta.tables['word']
await self.dest.connection.run_sync(dest.create)
LOG.warning("Copying word table")
async_result = await self.src.connection.stream(sa.select(src))
async for partition in async_result.partitions(10000):
data = [{k: getattr(r, k) for k in r._fields} for r in partition]
await self.dest.execute(dest.insert(), data)
await self.dest.connection.run_sync(sa.Index('idx_word_woken', dest.c.word_token).create)
async def copy_data(self) -> None:
""" Copy data for all registered tables.
"""
def _getfield(row: SaRow, key: str) -> Any:
value = getattr(row, key)
if isinstance(value, dt.datetime):
if value.tzinfo is not None:
value = value.astimezone(dt.timezone.utc)
return value
for table in self.dest.t.meta.sorted_tables:
LOG.warning("Copying '%s'", table.name)
async_result = await self.src.connection.stream(self.select_from(table.name))
async for partition in async_result.partitions(10000):
data = [{('class_' if k == 'class' else k): _getfield(r, k)
for k in r._fields}
for r in partition]
await self.dest.execute(table.insert(), data)
# Set up a minimal copy of pg_tables used to look up the class tables later.
pg_tables = sa.Table('pg_tables', self.dest.t.meta,
sa.Column('schemaname', sa.Text, default='public'),
sa.Column('tablename', sa.Text))
await self.dest.connection.run_sync(pg_tables.create)
data = [{'tablename': t} for t in self.dest.t.meta.tables]
await self.dest.execute(pg_tables.insert().values(data))
async def create_indexes(self) -> None:
""" Add indexes necessary for the frontend.
"""
# reverse place node lookup needs an extra table to simulate a
# partial index with adaptive buffering.
await self.dest.execute(sa.text(
""" CREATE TABLE placex_place_node_areas AS
SELECT place_id, ST_Expand(geometry,
14.0 * exp(-0.2 * rank_search) - 0.03) as geometry
FROM placex
WHERE rank_address between 5 and 25
and osm_type = 'N'
and linked_place_id is NULL """))
await self.dest.execute(sa.select(
sa.func.RecoverGeometryColumn('placex_place_node_areas', 'geometry',
4326, 'GEOMETRY', 'XY')))
await self.dest.execute(sa.select(sa.func.CreateSpatialIndex(
'placex_place_node_areas', 'geometry')))
# Remaining indexes.
await self.create_spatial_index('country_grid', 'geometry')
await self.create_spatial_index('placex', 'geometry')
await self.create_spatial_index('osmline', 'linegeo')
await self.create_spatial_index('tiger', 'linegeo')
await self.create_index('placex', 'place_id')
await self.create_index('placex', 'parent_place_id')
await self.create_index('placex', 'rank_address')
await self.create_index('addressline', 'place_id')
await self.create_index('postcode', 'place_id')
await self.create_index('osmline', 'place_id')
await self.create_index('tiger', 'place_id')
if 'search' in self.options:
await self.create_spatial_index('postcode', 'geometry')
await self.create_spatial_index('search_name', 'centroid')
await self.create_index('search_name', 'place_id')
await self.create_index('osmline', 'parent_place_id')
await self.create_index('tiger', 'parent_place_id')
await self.create_search_index()
for t in self.dest.t.meta.tables:
if t.startswith('place_classtype_'):
await self.dest.execute(sa.select(
sa.func.CreateSpatialIndex(t, 'centroid')))
async def create_spatial_index(self, table: str, column: str) -> None:
""" Create a spatial index on the given table and column.
"""
await self.dest.execute(sa.select(
sa.func.CreateSpatialIndex(getattr(self.dest.t, table).name, column)))
async def create_index(self, table_name: str, column: str) -> None:
""" Create a simple index on the given table and column.
"""
table = getattr(self.dest.t, table_name)
await self.dest.connection.run_sync(
sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)
async def create_search_index(self) -> None:
""" Create the tables and indexes needed for word lookup.
"""
LOG.warning("Creating reverse search table")
rsn = sa.Table('reverse_search_name', self.dest.t.meta,
sa.Column('word', sa.Integer()),
sa.Column('column', sa.Text()),
sa.Column('places', IntArray))
await self.dest.connection.run_sync(rsn.create)
tsrc = self.src.t.search_name
for column in ('name_vector', 'nameaddress_vector'):
sql = sa.select(sa.func.unnest(getattr(tsrc.c, column)).label('word'),
sa.func.ArrayAgg(tsrc.c.place_id).label('places'))\
.group_by('word')
async_result = await self.src.connection.stream(sql)
async for partition in async_result.partitions(100):
data = []
for row in partition:
row.places.sort()
data.append({'word': row.word,
'column': column,
'places': row.places})
await self.dest.execute(rsn.insert(), data)
await self.dest.connection.run_sync(
sa.Index('idx_reverse_search_name_word', rsn.c.word).create)
def select_from(self, table: str) -> SaSelect:
""" Create the SQL statement to select the source columns and rows.
"""
columns = self.src.t.meta.tables[table].c
if table == 'placex':
# SQLite struggles with Geometries that are larger than 5MB,
# so simplify those.
return sa.select(*(c for c in columns if not isinstance(c.type, Geometry)),
sa.func.ST_AsText(columns.centroid).label('centroid'),
sa.func.ST_AsText(
sa.case((sa.func.ST_MemSize(columns.geometry) < 5000000,
columns.geometry),
else_=sa.func.ST_SimplifyPreserveTopology(
columns.geometry, 0.0001)
)).label('geometry'))
sql = sa.select(*(sa.func.ST_AsText(c).label(c.name)
if isinstance(c.type, Geometry) else c for c in columns))
return sql

View File

@@ -23,7 +23,8 @@ from nominatim.db.async_connection import DBConnection
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.tools.exec_utils import run_osm2pgsql
from nominatim.errors import UsageError
from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
from nominatim.version import POSTGRESQL_REQUIRED_VERSION, \
POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
@@ -38,6 +39,25 @@ def _require_version(module: str, actual: Tuple[int, int], expected: Tuple[int,
raise UsageError(f'{module} is too old.')
def _require_loaded(extension_name: str, conn: Connection) -> None:
""" Check that the given extension is loaded. """
if not conn.extension_loaded(extension_name):
LOG.fatal('Required module %s is not loaded.', extension_name)
raise UsageError(f'{extension_name} is not loaded.')
def check_existing_database_plugins(dsn: str) -> None:
""" Check that the database has the required plugins installed."""
with connect(dsn) as conn:
_require_version('PostgreSQL server',
conn.server_version_tuple(),
POSTGRESQL_REQUIRED_VERSION)
_require_version('PostGIS',
conn.postgis_version_tuple(),
POSTGIS_REQUIRED_VERSION)
_require_loaded('hstore', conn)
def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
""" Create a new database for Nominatim and populate it with the
essential extensions.

View File

@@ -31,7 +31,7 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
"""
env = get_pg_env(options['dsn'])
cmd = [str(options['osm2pgsql']),
'--hstore', '--latlon', '--slim',
'--slim',
'--log-progress', 'true',
'--number-processes', '1' if options['append'] else str(options['threads']),
'--cache', str(options['osm2pgsql_cache']),
@@ -43,7 +43,7 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
os.environ.get('LUAPATH', ';')))
cmd.extend(('--output', 'flex'))
else:
cmd.extend(('--output', 'gazetteer'))
cmd.extend(('--output', 'gazetteer', '--hstore', '--latlon'))
cmd.append('--append' if options['append'] else '--create')

View File

@@ -213,6 +213,10 @@ def _quote_php_variable(var_type: Type[Any], config: Configuration,
def setup_website(basedir: Path, config: Configuration, conn: Connection) -> None:
""" Create the website script stubs.
"""
if config.lib_dir.php is None:
LOG.info("Python frontend does not require website setup. Skipping.")
return
if not basedir.exists():
LOG.info('Creating website directory.')
basedir.mkdir()

Some files were not shown because too many files have changed in this diff Show More