mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
Compare commits
653 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
e5a5f02666 | ||
|
|
11ced26025 | ||
|
|
edb1eec46d | ||
|
|
63eacc5589 | ||
|
|
e929693cae | ||
|
|
ae7c584e28 | ||
|
|
4d5faf9423 | ||
|
|
b7eea4d53a | ||
|
|
dd2c794de5 | ||
|
|
3b6d35fc12 | ||
|
|
9fa73cfb15 | ||
|
|
62b7670e0c | ||
|
|
d7bb449e74 | ||
|
|
247065ff6f | ||
|
|
9a84adef59 | ||
|
|
1879cf902c | ||
|
|
019a68a4bb | ||
|
|
110491011f | ||
|
|
36b1660121 | ||
|
|
56201feb28 | ||
|
|
c6d40d4bf4 | ||
|
|
a4f2e6a893 | ||
|
|
b427fc7965 | ||
|
|
e264604894 | ||
|
|
3a5d9f0377 | ||
|
|
8be27015b2 | ||
|
|
100391fb8e | ||
|
|
dc1baaa0af | ||
|
|
7205491b84 | ||
|
|
918fec73c6 | ||
|
|
b6df486525 | ||
|
|
8bd8a040e0 | ||
|
|
781e83ddc3 | ||
|
|
5afd96d210 | ||
|
|
cf49a070fd | ||
|
|
4aba36c5ac | ||
|
|
ca6e65fff1 | ||
|
|
1e0025b095 | ||
|
|
173e85c9e6 | ||
|
|
ffb467028e | ||
|
|
05fad607ff | ||
|
|
19360a9552 | ||
|
|
b087f3ab7b | ||
|
|
2c8fb31381 | ||
|
|
b2d3f0a8b3 | ||
|
|
bd8025feab | ||
|
|
4c19762e33 | ||
|
|
1015ac40ae | ||
|
|
4ce13f5c1f | ||
|
|
2833362cf6 | ||
|
|
bc51378aee | ||
|
|
39039e2a55 | ||
|
|
f523c01571 | ||
|
|
81eed0680c | ||
|
|
33c0f249b1 | ||
|
|
76eadc562c | ||
|
|
3cc3e3b2e3 | ||
|
|
f07f8530a8 | ||
|
|
103800a732 | ||
|
|
f9ba7a465a | ||
|
|
fed46240d5 | ||
|
|
2703442fd2 | ||
|
|
2813bf18e6 | ||
|
|
dcebea376d | ||
|
|
b3a2b3d484 | ||
|
|
7321e66d08 | ||
|
|
9627352ee4 | ||
|
|
bfc7acbb18 | ||
|
|
e0ca2ce6ec | ||
|
|
b969c5a62f | ||
|
|
28f7e51279 | ||
|
|
d35eb4105e | ||
|
|
b2afe3ce3e | ||
|
|
7337898b84 | ||
|
|
4305160c91 | ||
|
|
dc52d0954e | ||
|
|
d3a575319f | ||
|
|
2592bf1954 | ||
|
|
88d7ffa274 | ||
|
|
474d4230b8 | ||
|
|
10a5424a71 | ||
|
|
7eb04f67e2 | ||
|
|
1d7e078a2c | ||
|
|
f03ec3ea12 | ||
|
|
8e90fa3395 | ||
|
|
02af0a2c87 | ||
|
|
fa4e5513d1 | ||
|
|
93afe5a7c3 | ||
|
|
af85ad390f | ||
|
|
ab45db5360 | ||
|
|
89094cf92e | ||
|
|
3f5484f48f | ||
|
|
ff06b64329 | ||
|
|
6d39563b87 | ||
|
|
0d840c8d4e | ||
|
|
381bd0b576 | ||
|
|
b5c61e0b5b | ||
|
|
df6eddebcd | ||
|
|
b6c8c0e72b | ||
|
|
b06f5fddcb | ||
|
|
8791c6cb69 | ||
|
|
615b166c68 | ||
|
|
c41f2fed21 | ||
|
|
05e47fbb28 | ||
|
|
1b7c8240ba | ||
|
|
c4fd3ab97f | ||
|
|
8c7140d92b | ||
|
|
3969ce0f55 | ||
|
|
4f5f5ea8fc | ||
|
|
5f7cc91cf9 | ||
|
|
424c1f0d41 | ||
|
|
cff05394a1 | ||
|
|
638b40c3ec | ||
|
|
53d2050dc5 | ||
|
|
97ac036df5 | ||
|
|
482f7fe3ba | ||
|
|
567c31ab6a | ||
|
|
7d28fc35d1 | ||
|
|
c06f902398 | ||
|
|
59ae63e6f5 | ||
|
|
9c7d947fd1 | ||
|
|
58db0ad6d8 | ||
|
|
3b09c39dbf | ||
|
|
db917cb0d4 | ||
|
|
ba6cdd875d | ||
|
|
d231ff60ed | ||
|
|
c74904d075 | ||
|
|
22204050f2 | ||
|
|
667197a47e | ||
|
|
e8b866aa88 | ||
|
|
e7b8e1a2c2 | ||
|
|
279b4fd6d2 | ||
|
|
b7c83d3580 | ||
|
|
d4018f2e3b | ||
|
|
38369ca3cf | ||
|
|
cc0bdd34e9 | ||
|
|
8e71ff329c | ||
|
|
b4e3d0ea44 | ||
|
|
992703b15e | ||
|
|
ba5ec80611 | ||
|
|
1c1447e709 | ||
|
|
3c32c0354a | ||
|
|
8a2c6067a2 | ||
|
|
d60a45715a | ||
|
|
3c7a28dab0 | ||
|
|
0c72a434e0 | ||
|
|
32e7b59b1f | ||
|
|
f448423727 | ||
|
|
b2319e52ff | ||
|
|
25279d009a | ||
|
|
3f72ca4bca | ||
|
|
70dc4957dc | ||
|
|
d8ed565bce | ||
|
|
a7f5c6c8f5 | ||
|
|
a8b023e57e | ||
|
|
47ca56f21b | ||
|
|
580a7b032f | ||
|
|
8fcc2bb7f5 | ||
|
|
d6fe58f84e | ||
|
|
2d54de09bb | ||
|
|
4e4d29f653 | ||
|
|
195c13ee8a | ||
|
|
ac5ef64701 | ||
|
|
e7dc24c026 | ||
|
|
155f26060d | ||
|
|
a87fe8d8bf | ||
|
|
158df6b2e8 | ||
|
|
b8db76c925 | ||
|
|
fffdfc9b88 | ||
|
|
6478409b05 | ||
|
|
ee556fd42e | ||
|
|
9a1b8a67d6 | ||
|
|
383e3ccd25 | ||
|
|
b4ce1fb599 | ||
|
|
2bf8e62580 | ||
|
|
afb439b089 | ||
|
|
78a87ad16b | ||
|
|
5bf55a69a5 | ||
|
|
ca782e2f20 | ||
|
|
308de35802 | ||
|
|
a9ac68a729 | ||
|
|
84d6b481ae | ||
|
|
613c8635a8 | ||
|
|
899a04ad26 | ||
|
|
d8dca2a3a9 | ||
|
|
8216899a9a | ||
|
|
b1d419f458 | ||
|
|
0417946153 | ||
|
|
1149578e8f | ||
|
|
37488ee82b | ||
|
|
06bbd501fd | ||
|
|
07e6c5cf69 | ||
|
|
d0c91e4acf | ||
|
|
114cdafe7e | ||
|
|
837bdecde8 | ||
|
|
d9d0e70e5b | ||
|
|
1255efba7f | ||
|
|
6ad397d4a9 | ||
|
|
570ca22d71 | ||
|
|
418f381b49 | ||
|
|
2cae37ccde | ||
|
|
650fbc2563 | ||
|
|
9ec26c60ff | ||
|
|
06204dfcd8 | ||
|
|
fbe40e005d | ||
|
|
e9efef9095 | ||
|
|
95c3181a35 | ||
|
|
12dbfb0777 | ||
|
|
b62dbd1f92 | ||
|
|
5011fde176 | ||
|
|
54cb9a33b1 | ||
|
|
f1fbcd863d | ||
|
|
b00b16aa3a | ||
|
|
0f19695225 | ||
|
|
7fcbe13669 | ||
|
|
87c91ec5c4 | ||
|
|
0e10916b07 | ||
|
|
21df87dedc | ||
|
|
fd26310d6a | ||
|
|
5762a5bc80 | ||
|
|
8106e67f14 | ||
|
|
f029fb3c65 | ||
|
|
44da684d1d | ||
|
|
64c1a4fc8c | ||
|
|
ec47459410 | ||
|
|
c55c3657c3 | ||
|
|
8b56b55761 | ||
|
|
9056c9276f | ||
|
|
09ae312f09 | ||
|
|
d6960c72e4 | ||
|
|
b529e054cf | ||
|
|
b4a4ca81d1 | ||
|
|
c284df2dc9 | ||
|
|
18b2a4c204 | ||
|
|
e1303fb592 | ||
|
|
bd25cf04ed | ||
|
|
ce1f4cbbdc | ||
|
|
e0aea0f27a | ||
|
|
9848c4c56c | ||
|
|
cb8149f8ea | ||
|
|
0e74e82a38 | ||
|
|
15e09f2b24 | ||
|
|
2de8256863 | ||
|
|
aff43fb1a3 | ||
|
|
cafd8e2b1e | ||
|
|
3794080327 | ||
|
|
4e2683f068 | ||
|
|
6e5f595d48 | ||
|
|
2c24ba6d2d | ||
|
|
3bb27fbee6 | ||
|
|
ef1b52eee5 | ||
|
|
f917fa67aa | ||
|
|
386b4c82da | ||
|
|
a987f22cfb | ||
|
|
558c42ec83 | ||
|
|
309ac46b98 | ||
|
|
399b04596e | ||
|
|
75513a23a8 | ||
|
|
e672de036e | ||
|
|
6533af6a91 | ||
|
|
d3372e69ec | ||
|
|
f3809a52e8 | ||
|
|
c5f5ab5363 | ||
|
|
26dfb868e9 | ||
|
|
d5b6042118 | ||
|
|
1115705cbc | ||
|
|
161d17d85b | ||
|
|
5a2ebfcd4a | ||
|
|
06a974df36 | ||
|
|
2762c45569 | ||
|
|
fd85483ce3 | ||
|
|
0a2d0c3b5c | ||
|
|
de7f9a4bd9 | ||
|
|
c5836c8090 | ||
|
|
dcdda314e2 | ||
|
|
a9edd57fe2 | ||
|
|
cbd9fad94b | ||
|
|
bc1009f8c2 | ||
|
|
719b66e5ed | ||
|
|
5f09ba4e10 | ||
|
|
517a0cb673 | ||
|
|
7c79b07817 | ||
|
|
23eed4ff2f | ||
|
|
4559886d83 | ||
|
|
bfc706a596 | ||
|
|
9805a461eb | ||
|
|
bcf8433ba8 | ||
|
|
746dd057b9 | ||
|
|
b710297d05 | ||
|
|
0a8e8cec0f | ||
|
|
96e5a23727 | ||
|
|
611b925368 | ||
|
|
cab2a74740 | ||
|
|
fa3ac22a8f | ||
|
|
95d1048789 | ||
|
|
38b2b8a143 | ||
|
|
3d0bc85b4d | ||
|
|
25a391070b | ||
|
|
926c4a7d04 | ||
|
|
5683f55646 | ||
|
|
671f4e943e | ||
|
|
282c0da941 | ||
|
|
78648f1faf | ||
|
|
8d9b5e4775 | ||
|
|
996026e5ed | ||
|
|
2c7e1db5f6 | ||
|
|
2171b38551 | ||
|
|
afdbdb02a1 | ||
|
|
8adeaa2c7e | ||
|
|
d15f605129 | ||
|
|
252fe42612 | ||
|
|
67e1c7dc72 | ||
|
|
c29ffc38e6 | ||
|
|
4d61cc87cf | ||
|
|
2350018106 | ||
|
|
8fc3dd9457 | ||
|
|
d97ca9fcb2 | ||
|
|
e523da9e12 | ||
|
|
67706cec4e | ||
|
|
fac8c32cda | ||
|
|
1c6f426363 | ||
|
|
8cba65809c | ||
|
|
77ed4f98bb | ||
|
|
848e5ac5de | ||
|
|
9448c5e16f | ||
|
|
0722495434 | ||
|
|
d545c6d73c | ||
|
|
f69fea4210 | ||
|
|
4cd0a4ced4 | ||
|
|
0804cc0cff | ||
|
|
faeee7528f | ||
|
|
261e0cfd5a | ||
|
|
66ecb56cea | ||
|
|
79bd54f610 | ||
|
|
30cef4d5fd | ||
|
|
8d52032263 | ||
|
|
4a5786334b | ||
|
|
587698a6f3 | ||
|
|
927d2cc824 | ||
|
|
7f9cb4e68d | ||
|
|
d48ea4f22c | ||
|
|
412bd2ec20 | ||
|
|
1c189060c2 | ||
|
|
4a00a3c0f5 | ||
|
|
8366e4ca83 | ||
|
|
283db76e45 | ||
|
|
8a36ed4f6f | ||
|
|
d0f45155c8 | ||
|
|
9fc235d670 | ||
|
|
42c549274f | ||
|
|
2e56182a7f | ||
|
|
7932b1849b | ||
|
|
886374d779 | ||
|
|
d42e2e391f | ||
|
|
f264eaeda2 | ||
|
|
35fd74af6d | ||
|
|
4b53cf1464 | ||
|
|
26e78efbb9 | ||
|
|
157f0b8a83 | ||
|
|
d743cf308e | ||
|
|
f4cdcb995c | ||
|
|
75139961a3 | ||
|
|
3e2dd59a94 | ||
|
|
c01386b5b4 | ||
|
|
f59a072aa6 | ||
|
|
9cb8447673 | ||
|
|
e67355ab0e | ||
|
|
9cb9b670d1 | ||
|
|
3e725bb2db | ||
|
|
cc45930ef9 | ||
|
|
3266daa8fd | ||
|
|
ce17b0eeca | ||
|
|
17a65d82bb | ||
|
|
cc7646665c | ||
|
|
82216ebf8b | ||
|
|
49e0d83d5d | ||
|
|
673c3c7a55 | ||
|
|
5135041405 | ||
|
|
42631b85c7 | ||
|
|
9f6f12cfeb | ||
|
|
6c4c9ec1f2 | ||
|
|
4bb4db0668 | ||
|
|
505fdd02ca | ||
|
|
a873f260cf | ||
|
|
b45f761227 | ||
|
|
d7a3039c2a | ||
|
|
6c5589c9d2 | ||
|
|
645ea5a057 | ||
|
|
2755ebe883 | ||
|
|
4b829b5ff9 | ||
|
|
ed19340af0 | ||
|
|
2d05ff0190 | ||
|
|
0d338fa4c0 | ||
|
|
15a66e7b7d | ||
|
|
3a21999a17 | ||
|
|
08dcd05d7b | ||
|
|
2337cc653b | ||
|
|
0deb9262c9 | ||
|
|
9bc5be837b | ||
|
|
b79d5494f9 | ||
|
|
ded2c5bf68 | ||
|
|
bd2c64876f | ||
|
|
7c66fef63f | ||
|
|
4ad8818809 | ||
|
|
2f4342810d | ||
|
|
36df56b093 | ||
|
|
d0a1e8e311 | ||
|
|
1b50381852 | ||
|
|
3443d2c129 | ||
|
|
1f83efa8f2 | ||
|
|
a7bd39b62a | ||
|
|
1177b30a60 | ||
|
|
10e56e0de7 | ||
|
|
6f3339cc49 | ||
|
|
771be0e056 | ||
|
|
71ad4fc406 | ||
|
|
6a5695d059 | ||
|
|
aaf0e7db06 | ||
|
|
7aa0aba382 | ||
|
|
9af190a43c | ||
|
|
2e46bc0aea | ||
|
|
a413aae8a3 | ||
|
|
317cc5c544 | ||
|
|
41bf162306 | ||
|
|
43c27dffd2 | ||
|
|
8f299838f7 | ||
|
|
146a0b29c0 | ||
|
|
964bc7fbe0 | ||
|
|
75aa3cc9bd | ||
|
|
0843fefad3 | ||
|
|
371a780ef4 | ||
|
|
c7db69a30c | ||
|
|
b48cda7173 | ||
|
|
0608cf1476 | ||
|
|
f335e78d1e | ||
|
|
dcfb228c9a | ||
|
|
dc99bbb0af | ||
|
|
c42273a4db | ||
|
|
3bf489cd7c | ||
|
|
d8240f9ee4 | ||
|
|
2448cf2a14 | ||
|
|
004883bdb1 | ||
|
|
11a1191ba0 | ||
|
|
ff66595f7a | ||
|
|
9de2a342e8 | ||
|
|
562f8bc84a | ||
|
|
d69411f414 | ||
|
|
39ccb15880 | ||
|
|
d2c56f9f96 | ||
|
|
7f1a0ce94a | ||
|
|
32dbf83747 | ||
|
|
d9d8b9c526 | ||
|
|
9036bf3398 | ||
|
|
bef5cea48e | ||
|
|
84abf7c95a | ||
|
|
1f0e1bec0e | ||
|
|
8f88613a6b | ||
|
|
e5f332bd71 | ||
|
|
07589cfc34 | ||
|
|
68e0306e62 | ||
|
|
5751686fdc | ||
|
|
2af20f8df8 | ||
|
|
60c1301fca | ||
|
|
b8a7319212 | ||
|
|
6ef4d04b46 | ||
|
|
1dce2b98b4 | ||
|
|
86c4897c9b | ||
|
|
2237603677 | ||
|
|
6e81596609 | ||
|
|
4607c7ed04 | ||
|
|
63638eb447 | ||
|
|
c92ac84679 | ||
|
|
ed9cd9f0e5 | ||
|
|
7d30dbebc5 | ||
|
|
8f03c80ce8 | ||
|
|
ee0366af88 | ||
|
|
683a3cb3ec | ||
|
|
f8bca4fbcb | ||
|
|
1e2a1d9ce5 | ||
|
|
1feac2069b | ||
|
|
26ee6b6dde | ||
|
|
c150ca4889 | ||
|
|
e717e349d0 | ||
|
|
e158017086 | ||
|
|
36d068871d | ||
|
|
6c67a4b500 | ||
|
|
86b43dc605 | ||
|
|
300921a93e | ||
|
|
35b52c4656 | ||
|
|
878302a622 | ||
|
|
55277738d4 | ||
|
|
2f54732500 | ||
|
|
41da298b18 | ||
|
|
ebcf8c2b6b | ||
|
|
1facfd019b | ||
|
|
00e3a752c9 | ||
|
|
d03fd3f883 | ||
|
|
fa3d13ac7e | ||
|
|
434bd5a5bb | ||
|
|
9aca389bda | ||
|
|
69ce42b22c | ||
|
|
114cc776be | ||
|
|
5e5cff897f | ||
|
|
a8bedb6ab9 | ||
|
|
81430bd3bd | ||
|
|
93203f355a | ||
|
|
b730d286ad | ||
|
|
3f2296e3ea | ||
|
|
2b7eb4906a | ||
|
|
db1aa4d02e | ||
|
|
ad88d7a3e0 | ||
|
|
e42c1c9c7a | ||
|
|
556bb2386d | ||
|
|
1e58cef174 | ||
|
|
01010e443f | ||
|
|
da0a7a765e | ||
|
|
9769a0dcdb | ||
|
|
fbff4fa218 | ||
|
|
d17ec56e54 | ||
|
|
9a5f75dba7 | ||
|
|
ca149fb796 | ||
|
|
08f19e074b | ||
|
|
36388cafe9 | ||
|
|
8191c747b9 | ||
|
|
d078763fa1 | ||
|
|
412ead5f2d | ||
|
|
513175ce23 | ||
|
|
8db6dd995a | ||
|
|
4be6970bd4 | ||
|
|
fa681ce246 | ||
|
|
dd5cd97713 | ||
|
|
89d47d26f0 | ||
|
|
d574ceb598 | ||
|
|
92e2f5ca8e | ||
|
|
f2bc792178 | ||
|
|
8ed096f938 | ||
|
|
3405dbf90e | ||
|
|
ee0c5e24bb | ||
|
|
b320f1c7e3 | ||
|
|
6c6b1c0606 | ||
|
|
a5f5add630 | ||
|
|
8557105c40 | ||
|
|
24e7ffb289 | ||
|
|
0b9bcfe01d | ||
|
|
1a0e8f810b | ||
|
|
7698f0672d | ||
|
|
da064ea702 | ||
|
|
0c65289a80 | ||
|
|
95c90a785f | ||
|
|
42c3754dcd | ||
|
|
b742200442 | ||
|
|
3ac70f7cc2 | ||
|
|
104722a56a | ||
|
|
1924beeb20 | ||
|
|
70f6f9a711 | ||
|
|
f1ceefe9a6 | ||
|
|
e1fc1566f3 | ||
|
|
189f74a40d | ||
|
|
370c9b38c0 | ||
|
|
df65c10360 | ||
|
|
4573389da7 | ||
|
|
5c55c1d8a1 | ||
|
|
a1d4e53eb8 | ||
|
|
16b6484c65 | ||
|
|
2156fd4909 | ||
|
|
7f5fbe1dc7 | ||
|
|
2e9090d121 | ||
|
|
23f2690c54 | ||
|
|
5226cd2a0b | ||
|
|
c7e8a82d68 | ||
|
|
77bec1261e | ||
|
|
dfcb24061e | ||
|
|
f85b0c6208 | ||
|
|
e490a30a4a | ||
|
|
654b652530 | ||
|
|
8f4426fbc8 | ||
|
|
32c1e59622 | ||
|
|
e56957f047 | ||
|
|
3cc357bffa | ||
|
|
388faa2c54 | ||
|
|
ce9ed993c8 | ||
|
|
929a13d4cd | ||
|
|
56f0d678e3 | ||
|
|
02645277c8 | ||
|
|
5f4e98e0d9 | ||
|
|
e9e14834bc | ||
|
|
5a57d6308e | ||
|
|
cb73d562d5 | ||
|
|
6c61690ef3 | ||
|
|
bf1f6a997c | ||
|
|
90b29aa808 | ||
|
|
31d0468cd2 | ||
|
|
38f467bae3 | ||
|
|
5c7c4bb9a8 | ||
|
|
9646ec4edd | ||
|
|
c665796c52 | ||
|
|
a72e2ecb3f | ||
|
|
0c47558729 | ||
|
|
93b9288c30 | ||
|
|
9d31a67116 | ||
|
|
cf19036ce6 | ||
|
|
7219ee6532 | ||
|
|
d7bc846c3c | ||
|
|
1adb0a9886 | ||
|
|
8aa01c9c8e | ||
|
|
23dabad0b0 | ||
|
|
45c675bd78 | ||
|
|
860c6ecbcc | ||
|
|
3f38091421 | ||
|
|
7704b3fc7b | ||
|
|
151b3c4021 | ||
|
|
610af95ed1 | ||
|
|
018ef5bd53 | ||
|
|
200eae3bc0 | ||
|
|
9321e425a4 | ||
|
|
9395c0dadc | ||
|
|
a40065878b | ||
|
|
e8d3c0a99a | ||
|
|
2e6ff1b750 | ||
|
|
15f9e397dd | ||
|
|
87a109d0e9 | ||
|
|
f3ffbe167d | ||
|
|
89a34e7508 | ||
|
|
f6fc750f08 | ||
|
|
c32f686c62 | ||
|
|
2ca83efc36 | ||
|
|
06796745ff | ||
|
|
093d531509 | ||
|
|
a915815e4d | ||
|
|
de3c28104c | ||
|
|
d9d13a6204 | ||
|
|
d1f5820711 | ||
|
|
7592f8f189 | ||
|
|
ffbb61713a | ||
|
|
6f51c1ba33 | ||
|
|
cd861345b7 | ||
|
|
823502a40a | ||
|
|
4efad0bb95 | ||
|
|
922352e215 | ||
|
|
8a0b2dc0be | ||
|
|
7eced34e20 | ||
|
|
64d00c1c8e | ||
|
|
6d48798d45 | ||
|
|
0e186835b9 | ||
|
|
b607eb9678 | ||
|
|
85a68f1e56 | ||
|
|
45d13bc295 | ||
|
|
2231401483 | ||
|
|
2abe9e6fd9 | ||
|
|
20f56dfc77 | ||
|
|
67664406da | ||
|
|
0ed60d29cb | ||
|
|
244b6fcef6 |
7
.codespellrc
Normal file
7
.codespellrc
Normal file
@@ -0,0 +1,7 @@
|
||||
# https://github.com/codespell-project/codespell
|
||||
|
||||
[codespell]
|
||||
skip = ./man/nominatim.1,data,./docs/styles.css,lib-php,module,munin,osm2pgsql,./test,./settings/*.lua,./settings/*.yaml,./settings/**/*.yaml,./settings/icu-rules,./nominatim/tokenizer/token_analysis/config_variants.py
|
||||
# Need to be lowercase in the list
|
||||
# Unter = Unter den Linden (an example address)
|
||||
ignore-words-list = inout,unter
|
||||
@@ -7,10 +7,13 @@ assignees: ''
|
||||
|
||||
---
|
||||
|
||||
<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first. -->
|
||||
<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first.
|
||||
|
||||
Do not send screen shots! Copy any console output directly into the issue.
|
||||
-->
|
||||
|
||||
**Describe the bug**
|
||||
<!-- A clear and concise description of what the bug is. -->
|
||||
<!-- A clear and concise description of what the bug is.-->
|
||||
|
||||
**To Reproduce**
|
||||
<!-- Please describe what you did to get to the issue. -->
|
||||
@@ -25,12 +28,15 @@ assignees: ''
|
||||
- RAM:
|
||||
- number of CPUs:
|
||||
- type and size of disks:
|
||||
- bare metal/AWS/other cloud service:
|
||||
|
||||
**Postgresql Configuration:**
|
||||
|
||||
<!-- List any configuration items you changed in your postgresql configuration. -->
|
||||
|
||||
**Nominatim Configuration:**
|
||||
|
||||
<!-- List the contents of your customized `.env` file. -->
|
||||
|
||||
**Additional context**
|
||||
|
||||
<!-- Add any other context about the problem here. -->
|
||||
|
||||
15
.github/actions/build-nominatim/action.yml
vendored
15
.github/actions/build-nominatim/action.yml
vendored
@@ -1,10 +1,10 @@
|
||||
name: 'Build Nominatim'
|
||||
|
||||
inputs:
|
||||
ubuntu:
|
||||
flavour:
|
||||
description: 'Version of Ubuntu to install on'
|
||||
required: false
|
||||
default: '20'
|
||||
default: 'ubuntu-20'
|
||||
cmake-args:
|
||||
description: 'Additional options to hand to cmake'
|
||||
required: false
|
||||
@@ -23,17 +23,18 @@ runs:
|
||||
sudo rm -rf /opt/hostedtoolcache/go /opt/hostedtoolcache/CodeQL /usr/lib/jvm /usr/local/share/chromium /usr/local/lib/android
|
||||
df -h
|
||||
shell: bash
|
||||
- name: Install prerequisites
|
||||
- name: Install${{ matrix.flavour }} prerequisites
|
||||
run: |
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION}
|
||||
if [ "x$UBUNTUVER" == "x18" ]; then
|
||||
pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 datrie
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
|
||||
if [ "$FLAVOUR" == "oldstuff" ]; then
|
||||
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg aiosqlite
|
||||
else
|
||||
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
|
||||
pip3 install sqlalchemy psycopg aiosqlite
|
||||
fi
|
||||
shell: bash
|
||||
env:
|
||||
UBUNTUVER: ${{ inputs.ubuntu }}
|
||||
FLAVOUR: ${{ inputs.flavour }}
|
||||
CMAKE_ARGS: ${{ inputs.cmake-args }}
|
||||
LUA_VERSION: ${{ inputs.lua }}
|
||||
|
||||
|
||||
261
.github/workflows/ci-tests.yml
vendored
261
.github/workflows/ci-tests.yml
vendored
@@ -7,11 +7,11 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
data/country_osm_grid.sql.gz
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
mv nominatim-src.tar.bz2 Nominatim
|
||||
|
||||
- name: 'Upload Artifact'
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
path: nominatim-src.tar.bz2
|
||||
@@ -37,45 +37,38 @@ jobs:
|
||||
needs: create-archive
|
||||
strategy:
|
||||
matrix:
|
||||
ubuntu: [18, 20, 22]
|
||||
flavour: [oldstuff, "ubuntu-20", "ubuntu-22"]
|
||||
include:
|
||||
- ubuntu: 18
|
||||
postgresql: 9.6
|
||||
postgis: 2.5
|
||||
pytest: pytest
|
||||
php: 7.2
|
||||
- ubuntu: 20
|
||||
- flavour: oldstuff
|
||||
ubuntu: 20
|
||||
postgresql: '9.6'
|
||||
postgis: '2.5'
|
||||
lua: '5.1'
|
||||
- flavour: ubuntu-20
|
||||
ubuntu: 20
|
||||
postgresql: 13
|
||||
postgis: 3
|
||||
pytest: py.test-3
|
||||
php: 7.4
|
||||
- ubuntu: 22
|
||||
lua: '5.3'
|
||||
- flavour: ubuntu-22
|
||||
ubuntu: 22
|
||||
postgresql: 15
|
||||
postgis: 3
|
||||
pytest: py.test-3
|
||||
php: 8.1
|
||||
lua: '5.3'
|
||||
|
||||
runs-on: ubuntu-${{ matrix.ubuntu }}.04
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
|
||||
- name: Unpack Nominatim
|
||||
run: tar xf nominatim-src.tar.bz2
|
||||
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: ${{ matrix.php }}
|
||||
tools: phpunit, phpcs, composer
|
||||
ini-values: opcache.jit=disable
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: 3.6
|
||||
if: matrix.ubuntu == 18
|
||||
python-version: 3.7
|
||||
if: matrix.flavour == 'oldstuff'
|
||||
|
||||
- uses: ./Nominatim/.github/actions/setup-postgresql
|
||||
with:
|
||||
@@ -84,59 +77,69 @@ jobs:
|
||||
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
ubuntu: ${{ matrix.ubuntu }}
|
||||
flavour: ${{ matrix.flavour }}
|
||||
lua: ${{ matrix.lua }}
|
||||
|
||||
- name: Install test prerequsites
|
||||
run: sudo apt-get install -y -qq python3-pytest python3-behave
|
||||
if: matrix.ubuntu == 20
|
||||
- name: Install test prerequsites (behave from apt)
|
||||
run: sudo apt-get install -y -qq python3-behave
|
||||
if: matrix.flavour == 'ubuntu-20'
|
||||
|
||||
- name: Install test prerequsites
|
||||
run: pip3 install pylint pytest behave==1.2.6
|
||||
if: ${{ (matrix.ubuntu == 18) || (matrix.ubuntu == 22) }}
|
||||
- name: Install test prerequsites (behave from pip)
|
||||
run: pip3 install behave==1.2.6
|
||||
if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')
|
||||
|
||||
- name: Install test prerequsites
|
||||
run: sudo apt-get install -y -qq python3-pytest
|
||||
if: matrix.ubuntu == 22
|
||||
- name: Install test prerequsites (from apt for Ununtu 2x)
|
||||
run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
|
||||
if: matrix.flavour != 'oldstuff'
|
||||
|
||||
- name: Install latest pylint/mypy
|
||||
run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests typing-extensions
|
||||
- name: Install newer pytest-asyncio
|
||||
run: pip3 install -U pytest-asyncio==0.21.1
|
||||
if: matrix.flavour == 'ubuntu-20'
|
||||
|
||||
- name: PHP linting
|
||||
run: phpcs --report-width=120 .
|
||||
working-directory: Nominatim
|
||||
- name: Install test prerequsites (from pip for Ubuntu 18)
|
||||
run: pip3 install pytest pytest-asyncio uvicorn
|
||||
if: matrix.flavour == 'oldstuff'
|
||||
|
||||
- name: Install Python webservers
|
||||
run: pip3 install falcon starlette asgi_lifespan
|
||||
|
||||
- name: Install latest pylint
|
||||
run: pip3 install -U pylint
|
||||
if: matrix.flavour != 'oldstuff'
|
||||
|
||||
- name: Python linting
|
||||
run: pylint nominatim
|
||||
run: python3 -m pylint nominatim
|
||||
working-directory: Nominatim
|
||||
|
||||
- name: Python static typechecking
|
||||
run: mypy --strict nominatim
|
||||
working-directory: Nominatim
|
||||
|
||||
|
||||
- name: PHP unit tests
|
||||
run: phpunit ./
|
||||
working-directory: Nominatim/test/php
|
||||
if: ${{ (matrix.ubuntu == 20) || (matrix.ubuntu == 22) }}
|
||||
if: matrix.flavour != 'oldstuff'
|
||||
|
||||
- name: Python unit tests
|
||||
run: $PYTEST test/python
|
||||
run: python3 -m pytest test/python
|
||||
working-directory: Nominatim
|
||||
env:
|
||||
PYTEST: ${{ matrix.pytest }}
|
||||
|
||||
- name: BDD tests
|
||||
run: |
|
||||
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
|
||||
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
|
||||
working-directory: Nominatim/test/bdd
|
||||
|
||||
- name: Install mypy and typechecking info
|
||||
run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions
|
||||
if: matrix.flavour != 'oldstuff'
|
||||
|
||||
- name: Python static typechecking
|
||||
run: python3 -m mypy --strict nominatim
|
||||
working-directory: Nominatim
|
||||
if: matrix.flavour != 'oldstuff'
|
||||
|
||||
legacy-test:
|
||||
needs: create-archive
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
postgresql: ["13", "16"]
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
|
||||
@@ -146,19 +149,20 @@ jobs:
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: 7.4
|
||||
php-version: '7.4'
|
||||
|
||||
- uses: ./Nominatim/.github/actions/setup-postgresql
|
||||
with:
|
||||
postgresql-version: 13
|
||||
postgresql-version: ${{ matrix.postgresql }}
|
||||
postgis-version: 3
|
||||
|
||||
- name: Install Postgresql server dev
|
||||
run: sudo apt-get install postgresql-server-dev-13
|
||||
run: sudo apt-get install postgresql-server-dev-$PGVER
|
||||
env:
|
||||
PGVER: ${{ matrix.postgresql }}
|
||||
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
ubuntu: 20
|
||||
cmake-args: -DBUILD_MODULE=on
|
||||
|
||||
- name: Install test prerequsites
|
||||
@@ -166,7 +170,54 @@ jobs:
|
||||
|
||||
- name: BDD tests (legacy tokenizer)
|
||||
run: |
|
||||
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy --format=progress3
|
||||
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php -DTOKENIZER=legacy --format=progress3
|
||||
working-directory: Nominatim/test/bdd
|
||||
|
||||
|
||||
php-test:
|
||||
needs: create-archive
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
|
||||
- name: Unpack Nominatim
|
||||
run: tar xf nominatim-src.tar.bz2
|
||||
|
||||
- uses: ./Nominatim/.github/actions/setup-postgresql
|
||||
with:
|
||||
postgresql-version: 15
|
||||
postgis-version: 3
|
||||
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: 8.1
|
||||
tools: phpunit:9, phpcs, composer
|
||||
ini-values: opcache.jit=disable
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: PHP linting
|
||||
run: phpcs --report-width=120 .
|
||||
working-directory: Nominatim
|
||||
|
||||
- name: PHP unit tests
|
||||
run: phpunit ./
|
||||
working-directory: Nominatim/test/php
|
||||
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
flavour: 'ubuntu-22'
|
||||
|
||||
- name: Install test prerequsites
|
||||
run: sudo apt-get install -y -qq python3-behave
|
||||
|
||||
- name: BDD tests (php)
|
||||
run: |
|
||||
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php --format=progress3
|
||||
working-directory: Nominatim/test/bdd
|
||||
|
||||
|
||||
@@ -176,20 +227,13 @@ jobs:
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
name: [Ubuntu-18, Ubuntu-20, Ubuntu-22]
|
||||
name: [Ubuntu-20, Ubuntu-22]
|
||||
include:
|
||||
- name: Ubuntu-18
|
||||
flavour: ubuntu
|
||||
image: "ubuntu:18.04"
|
||||
ubuntu: 18
|
||||
install_mode: install-nginx
|
||||
- name: Ubuntu-20
|
||||
flavour: ubuntu
|
||||
image: "ubuntu:20.04"
|
||||
ubuntu: 20
|
||||
install_mode: install-apache
|
||||
- name: Ubuntu-22
|
||||
flavour: ubuntu
|
||||
image: "ubuntu:22.04"
|
||||
ubuntu: 22
|
||||
install_mode: install-apache
|
||||
@@ -212,14 +256,6 @@ jobs:
|
||||
apt-get install -y git sudo wget
|
||||
ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
|
||||
shell: bash
|
||||
if: matrix.flavour == 'ubuntu'
|
||||
|
||||
- name: Prepare container (CentOS)
|
||||
run: |
|
||||
dnf update -y
|
||||
dnf install -y sudo glibc-langpack-en
|
||||
shell: bash
|
||||
if: matrix.flavour == 'centos'
|
||||
|
||||
- name: Setup import user
|
||||
run: |
|
||||
@@ -231,7 +267,7 @@ jobs:
|
||||
OS: ${{ matrix.name }}
|
||||
INSTALL_MODE: ${{ matrix.install_mode }}
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
path: /home/nominatim
|
||||
@@ -249,26 +285,25 @@ jobs:
|
||||
- name: Prepare import environment
|
||||
run: |
|
||||
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
|
||||
mv Nominatim/settings/flex-base.lua flex-base.lua
|
||||
mv Nominatim/settings/import-extratags.lua import-extratags.lua
|
||||
mv Nominatim/settings/taginfo.lua taginfo.lua
|
||||
rm -rf Nominatim
|
||||
mkdir data-env-reverse
|
||||
working-directory: /home/nominatim
|
||||
|
||||
- name: Prepare import environment (CentOS)
|
||||
run: |
|
||||
sudo ln -s /usr/local/bin/nominatim /usr/bin/nominatim
|
||||
echo NOMINATIM_DATABASE_WEBUSER="apache" > nominatim-project/.env
|
||||
cp nominatim-project/.env data-env-reverse/.env
|
||||
working-directory: /home/nominatim
|
||||
if: matrix.flavour == 'centos'
|
||||
|
||||
- name: Print version
|
||||
run: nominatim --version
|
||||
working-directory: /home/nominatim/nominatim-project
|
||||
|
||||
- name: Print taginfo
|
||||
run: lua taginfo.lua
|
||||
working-directory: /home/nominatim
|
||||
|
||||
- name: Collect host OS information
|
||||
run: nominatim admin --collect-os-info
|
||||
working-directory: /home/nominatim/nominatim-project
|
||||
|
||||
|
||||
- name: Import
|
||||
run: nominatim import --osm-file ../test.pbf
|
||||
working-directory: /home/nominatim/nominatim-project
|
||||
@@ -288,7 +323,6 @@ jobs:
|
||||
- name: Prepare update (Ubuntu)
|
||||
run: apt-get install -y python3-pip
|
||||
shell: bash
|
||||
if: matrix.flavour == 'ubuntu'
|
||||
|
||||
- name: Run update
|
||||
run: |
|
||||
@@ -314,3 +348,54 @@ jobs:
|
||||
- name: Clean up database (reverse-only import)
|
||||
run: nominatim refresh --postcodes --word-tokens
|
||||
working-directory: /home/nominatim/nominatim-project
|
||||
|
||||
install-no-superuser:
|
||||
runs-on: ubuntu-latest
|
||||
needs: create-archive
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
|
||||
- name: Unpack Nominatim
|
||||
run: tar xf nominatim-src.tar.bz2
|
||||
|
||||
- uses: ./Nominatim/.github/actions/setup-postgresql
|
||||
with:
|
||||
postgresql-version: 16
|
||||
postgis-version: 3
|
||||
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
flavour: ubuntu-22
|
||||
lua: 5.3
|
||||
|
||||
- name: Prepare import environment
|
||||
run: |
|
||||
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
|
||||
rm -rf Nominatim
|
||||
|
||||
- name: Prepare Database
|
||||
run: |
|
||||
nominatim import --prepare-database
|
||||
|
||||
- name: Create import user
|
||||
run: |
|
||||
sudo -u postgres createuser osm-import
|
||||
psql -d nominatim -c "ALTER USER \"osm-import\" WITH PASSWORD 'osm-import'"
|
||||
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "osm-import"'
|
||||
|
||||
- name: Run import
|
||||
run: |
|
||||
NOMINATIM_DATABASE_DSN="pgsql:host=127.0.0.1;dbname=nominatim;user=osm-import;password=osm-import" nominatim import --continue import-from-file --osm-file test.pbf
|
||||
|
||||
- name: Check full import
|
||||
run: nominatim admin --check-database
|
||||
|
||||
codespell:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: codespell-project/actions-codespell@v2
|
||||
with:
|
||||
only_warn: 1
|
||||
|
||||
12
.mypy.ini
12
.mypy.ini
@@ -1,9 +1,13 @@
|
||||
[mypy]
|
||||
plugins = sqlalchemy.ext.mypy.plugin
|
||||
|
||||
[mypy-sanic_cors.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-icu.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-osmium.*]
|
||||
[mypy-asyncpg.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-datrie.*]
|
||||
@@ -11,3 +15,9 @@ ignore_missing_imports = True
|
||||
|
||||
[mypy-dotenv.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-falcon.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
[mypy-geoalchemy2.*]
|
||||
ignore_missing_imports = True
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[MASTER]
|
||||
|
||||
extension-pkg-whitelist=osmium
|
||||
extension-pkg-whitelist=osmium,falcon
|
||||
ignored-modules=icu,datrie
|
||||
|
||||
[MESSAGES CONTROL]
|
||||
@@ -13,6 +13,6 @@ ignored-classes=NominatimArgs,closing
|
||||
# 'too-many-ancestors' is triggered already by deriving from UserDict
|
||||
# 'not-context-manager' disabled because it causes false positives once
|
||||
# typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
|
||||
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager
|
||||
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager,use-dict-literal,chained-comparison,attribute-defined-outside-init
|
||||
|
||||
good-names=i,x,y,m,fd,db,cc
|
||||
good-names=i,j,x,y,m,t,fd,db,cc,x1,x2,y1,y2,pt,k,v,nr
|
||||
|
||||
@@ -19,7 +19,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
|
||||
project(nominatim)
|
||||
|
||||
set(NOMINATIM_VERSION_MAJOR 4)
|
||||
set(NOMINATIM_VERSION_MINOR 2)
|
||||
set(NOMINATIM_VERSION_MINOR 4)
|
||||
set(NOMINATIM_VERSION_PATCH 0)
|
||||
|
||||
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
|
||||
@@ -73,7 +73,7 @@ endif()
|
||||
#-----------------------------------------------------------------------------
|
||||
|
||||
if (BUILD_IMPORTER)
|
||||
find_package(PythonInterp 3.6 REQUIRED)
|
||||
find_package(PythonInterp 3.7 REQUIRED)
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
@@ -82,26 +82,17 @@ endif()
|
||||
|
||||
# Setting PHP binary variable as to command line (prevailing) or auto detect
|
||||
|
||||
if (BUILD_API OR BUILD_IMPORTER)
|
||||
if (BUILD_API)
|
||||
if (NOT PHP_BIN)
|
||||
find_program (PHP_BIN php)
|
||||
endif()
|
||||
# sanity check if PHP binary exists
|
||||
if (NOT EXISTS ${PHP_BIN})
|
||||
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
|
||||
message(WARNING "PHP binary not found. Only Python frontend can be used.")
|
||||
set(PHP_BIN "")
|
||||
else()
|
||||
message (STATUS "Using PHP binary " ${PHP_BIN})
|
||||
endif()
|
||||
if (NOT PHPCGI_BIN)
|
||||
find_program (PHPCGI_BIN php-cgi)
|
||||
endif()
|
||||
# sanity check if PHP binary exists
|
||||
if (NOT EXISTS ${PHPCGI_BIN})
|
||||
message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
|
||||
set (PHPCGI_BIN "")
|
||||
else()
|
||||
message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
|
||||
endif()
|
||||
endif()
|
||||
|
||||
#-----------------------------------------------------------------------------
|
||||
@@ -233,7 +224,18 @@ if (BUILD_IMPORTER)
|
||||
install(DIRECTORY nominatim
|
||||
DESTINATION ${NOMINATIM_LIBDIR}/lib-python
|
||||
FILES_MATCHING PATTERN "*.py"
|
||||
PATTERN "paths.py" EXCLUDE
|
||||
PATTERN __pycache__ EXCLUDE)
|
||||
|
||||
if (EXISTS ${PHP_BIN})
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
|
||||
else()
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py-no-php.tmpl paths-py.installed)
|
||||
endif()
|
||||
install(FILES ${PROJECT_BINARY_DIR}/paths-py.installed
|
||||
DESTINATION ${NOMINATIM_LIBDIR}/lib-python/nominatim
|
||||
RENAME paths.py)
|
||||
|
||||
install(DIRECTORY lib-sql DESTINATION ${NOMINATIM_LIBDIR})
|
||||
|
||||
install(FILES ${COUNTRY_GRID_FILE}
|
||||
@@ -257,18 +259,19 @@ if (BUILD_MODULE)
|
||||
DESTINATION ${NOMINATIM_LIBDIR}/module)
|
||||
endif()
|
||||
|
||||
if (BUILD_API)
|
||||
if (BUILD_API AND EXISTS ${PHP_BIN})
|
||||
install(DIRECTORY lib-php DESTINATION ${NOMINATIM_LIBDIR})
|
||||
endif()
|
||||
|
||||
install(FILES settings/env.defaults
|
||||
settings/address-levels.json
|
||||
settings/phrase-settings.json
|
||||
settings/import-admin.style
|
||||
settings/import-street.style
|
||||
settings/import-address.style
|
||||
settings/import-full.style
|
||||
settings/import-extratags.style
|
||||
settings/import-admin.lua
|
||||
settings/import-street.lua
|
||||
settings/import-address.lua
|
||||
settings/import-full.lua
|
||||
settings/import-extratags.lua
|
||||
settings/flex-base.lua
|
||||
settings/icu_tokenizer.yaml
|
||||
settings/country_settings.yaml
|
||||
DESTINATION ${NOMINATIM_CONFIGDIR})
|
||||
|
||||
@@ -69,7 +69,7 @@ Before submitting a pull request make sure that the tests pass:
|
||||
|
||||
Nominatim follows semantic versioning. Major releases are done for large changes
|
||||
that require (or at least strongly recommend) a reimport of the databases.
|
||||
Minor releases can usually be applied to exisiting databases Patch releases
|
||||
Minor releases can usually be applied to existing databases. Patch releases
|
||||
contain bug fixes only and are released from a separate branch where the
|
||||
relevant changes are cherry-picked from the master branch.
|
||||
|
||||
|
||||
124
ChangeLog
124
ChangeLog
@@ -1,3 +1,105 @@
|
||||
4.4.0
|
||||
* add export to SQLite database and SQLite support for the frontend
|
||||
* switch to Python frontend as the default frontend
|
||||
* update to osm2pgsql 1.11.0
|
||||
* add support for new osm2pgsql middle table format
|
||||
* simplify geometry for large polygon objects not used in addresses
|
||||
* various performance tweaks for search in Python frontend
|
||||
* fix regression in search with categories where it was confused with near
|
||||
search
|
||||
* partially roll back use of SQLAlchemy lambda statements due to bugs
|
||||
in SQLAchemy
|
||||
* fix handling of timezones for timestamps from the database
|
||||
* fix handling of full address searches in connection with a viewbox
|
||||
* fix postcode computation of highway areas
|
||||
* fix handling of timeout errors for Python <= 3.10
|
||||
* fix address computation for postcode areas
|
||||
* fix variable shadowing in osm2pgsql flex script, causing bugs with LuaJIT
|
||||
* make sure extratags are always null when empty
|
||||
* reduce importance of places without wikipedia reference
|
||||
* improve performance of word count computations
|
||||
* drop support for wikipedia tags with full URLs
|
||||
* replace get_addressdata() SQL implementation with a Python function
|
||||
* improve display name for non-address features
|
||||
* fix postcode validation for postcodes with country code
|
||||
(thanks @pawel-wroniszewski)
|
||||
* add possibility to run imports without superuser database rights
|
||||
(thanks @robbe-haesendonck)
|
||||
* new CLI command for cleaning deleted relations (thanks @lujoh)
|
||||
* add check for database version in the CLI check command
|
||||
* updates to import styles ignoring more unused objects
|
||||
* various typo fixes (thanks @kumarUjjawal)
|
||||
|
||||
4.3.2
|
||||
* fix potential SQL injection issue for 'nominatim admin --collect-os-info'
|
||||
* PHP frontend: fix on-the-fly lookup of postcode areas near boundaries
|
||||
* Python frontend: improve handling of viewbox
|
||||
* Python frontend: correct deployment instructions
|
||||
|
||||
4.3.1
|
||||
* reintroduce result rematching
|
||||
* improve search of multi-part names
|
||||
* fix accidentally switched meaning of --reverse-only and --search-only in
|
||||
warm command
|
||||
|
||||
4.3.0
|
||||
* fix failing importance recalculation command
|
||||
* fix merging of linked names into unnamed boundaries
|
||||
* fix a number of corner cases with interpolation splitting resulting in
|
||||
invalid geometries
|
||||
* fix failure in website generation when password contains curly brackets
|
||||
* fix broken use of ST_Project in PostGIS 3.4
|
||||
* new NOMINATIM_SEARCH_WITHIN_COUNTRIES setting to restrict reverse lookups
|
||||
to known countries (thanks @alfmarcua)
|
||||
* allow negative OSM IDs (thanks @alfmarcua)
|
||||
* disallow import of Tiger data in a frozen DB
|
||||
* avoid UPDATE to change settings to be compatible with r/o DBs (thanks @t-tomek)
|
||||
* update bundled osm2pgsql to 1.9.2
|
||||
* reorganise osm2pgsql flex style and make it the default
|
||||
* exclude names ending in :wikipedia from indexing
|
||||
* no longer accept comma as a list separator in name tags
|
||||
* process forward dependencies on update to catch updates in geometries
|
||||
of ways and relations
|
||||
* fix handling of isolated silent letters during transliteration
|
||||
* no longer assign postcodes to large linear features like rivers
|
||||
* introduce nominatim.paths module for finding data and libraries
|
||||
* documentation layout changed to material theme
|
||||
* new documentation section for library
|
||||
* various smaller fixes to existing documentation
|
||||
(thanks @woodpeck, @bloom256, @biswajit-k)
|
||||
* updates to vagrant install scripts, drop support for Ubuntu 18
|
||||
(thanks @n-timofeev)
|
||||
* removed obsolete configuration variables from env.defaults
|
||||
* add script for generating a taginfo description (thanks @biswajit-k)
|
||||
* modernize Python code around BDD test and add testing of Python frontend
|
||||
* lots of new BDD tests for API output
|
||||
|
||||
4.2.3
|
||||
|
||||
* fix deletion handling for 'nominatim add-data'
|
||||
* adapt place_force_delete() to new deletion handling
|
||||
* flex style: avoid dropping of postcode areas
|
||||
* fix update errors on address interpolation handling
|
||||
|
||||
4.2.2
|
||||
|
||||
* extend flex-style library to fully support all default styles
|
||||
* fix handling of Hebrew aleph
|
||||
* do not assign postcodes to rivers
|
||||
* fix string matching in PHP code
|
||||
* update osm2pgsql (various updates to flex)
|
||||
* fix slow query when deleting places on update
|
||||
* fix CLI details query
|
||||
* fix recalculation of importance values
|
||||
* fix polygon simplification in reverse results
|
||||
* add class/type information to reverse geocodejson result
|
||||
* minor improvements to default tokenizer configuration
|
||||
* various smaller fixes to documentation
|
||||
|
||||
4.2.1
|
||||
|
||||
* fix XSS vulnerability in debug view
|
||||
|
||||
4.2.0
|
||||
|
||||
* add experimental support for osm2pgsql flex style
|
||||
@@ -21,6 +123,10 @@
|
||||
* typing fixes to work with latest type annotations from typeshed
|
||||
* smaller improvements to documentation (thanks to @mausch)
|
||||
|
||||
4.1.1
|
||||
|
||||
* fix XSS vulnerability in debug view
|
||||
|
||||
4.1.0
|
||||
|
||||
* switch to ICU tokenizer as default
|
||||
@@ -57,6 +163,10 @@
|
||||
* add setup instructions for updates and systemd
|
||||
* drop support for PostgreSQL 9.5
|
||||
|
||||
4.0.2
|
||||
|
||||
* fix XSS vulnerability in debug view
|
||||
|
||||
4.0.1
|
||||
|
||||
* fix initialisation error in replication script
|
||||
@@ -95,6 +205,10 @@
|
||||
* add testing of installation scripts via CI
|
||||
* drop support for Python < 3.6 and Postgresql < 9.5
|
||||
|
||||
3.7.3
|
||||
|
||||
* fix XSS vulnerability in debug view
|
||||
|
||||
3.7.2
|
||||
|
||||
* fix database check for reverse-only imports
|
||||
@@ -170,7 +284,7 @@
|
||||
* increase splitting for large geometries to improve indexing speed
|
||||
* remove deprecated get_magic_quotes_gpc() function
|
||||
* make sure that all postcodes have an entry in word and are thus searchable
|
||||
* remove use of ST_Covers in conjunction woth ST_Intersects,
|
||||
* remove use of ST_Covers in conjunction with ST_Intersects,
|
||||
causes bad query planning and slow updates in Postgis3
|
||||
* update osm2pgsql
|
||||
|
||||
@@ -227,7 +341,7 @@
|
||||
* exclude postcode ranges separated by colon from centre point calculation
|
||||
* update osm2pgsql, better handling of imports without flatnode file
|
||||
* switch to more efficient algorithm for word set computation
|
||||
* use only boundries for country and state parts of addresses
|
||||
* use only boundaries for country and state parts of addresses
|
||||
* improve updates of addresses with housenumbers and interpolations
|
||||
* remove country from place_addressline table and use country_code instead
|
||||
* optimise indexes on search_name partition tables
|
||||
@@ -266,7 +380,7 @@
|
||||
|
||||
* complete rewrite of reverse search algorithm
|
||||
* add new geojson and geocodejson output formats
|
||||
* add simple export script to exprot addresses to CSV
|
||||
* add simple export script to export addresses to CSV
|
||||
* remove is_in terms from address computation
|
||||
* remove unused search_name_country tables
|
||||
* various smaller fixes to query parsing
|
||||
@@ -331,7 +445,7 @@
|
||||
* move installation documentation into this repo
|
||||
* add self-documenting vagrant scripts
|
||||
* remove --create-website, recommend to use website directory in build
|
||||
* add accessor functions for URL parameters and improve erro checking
|
||||
* add accessor functions for URL parameters and improve error checking
|
||||
* remove IP blocking and rate-limiting code
|
||||
* enable CI via travis
|
||||
* reformatting for more consistent coding style
|
||||
@@ -342,7 +456,7 @@
|
||||
* update to refactored osm2pgsql which use libosmium based types
|
||||
* switch from osmosis to pyosmium for updates
|
||||
* be more strict when matching against special search terms
|
||||
* handle postcode entries with mutliple values correctly
|
||||
* handle postcode entries with multiple values correctly
|
||||
|
||||
2.5
|
||||
|
||||
|
||||
@@ -9,11 +9,10 @@ versions.
|
||||
|
||||
| Version | End of support for security updates |
|
||||
| ------- | ----------------------------------- |
|
||||
| 4.4.x | 2026-03-07 |
|
||||
| 4.3.x | 2025-09-07 |
|
||||
| 4.2.x | 2024-11-24 |
|
||||
| 4.1.x | 2024-08-05 |
|
||||
| 4.0.x | 2023-11-02 |
|
||||
| 3.7.x | 2023-04-05 |
|
||||
| 3.6.x | 2022-12-12 |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
@@ -37,4 +36,6 @@ incident. Announcements will also be published at the
|
||||
|
||||
## List of Previous Incidents
|
||||
|
||||
* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html)
|
||||
* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
|
||||
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
|
||||
|
||||
32
VAGRANT.md
32
VAGRANT.md
@@ -1,6 +1,6 @@
|
||||
# Install Nominatim in a virtual machine for development and testing
|
||||
|
||||
This document describes how you can install Nominatim inside a Ubuntu 16
|
||||
This document describes how you can install Nominatim inside a Ubuntu 22
|
||||
virtual machine on your desktop/laptop (host machine). The goal is to give
|
||||
you a development environment to easily edit code and run the test suite
|
||||
without affecting the rest of your system.
|
||||
@@ -69,8 +69,7 @@ installation.
|
||||
PHP errors are written to `/var/log/apache2/error.log`.
|
||||
|
||||
With `echo` and `var_dump()` you write into the output (HTML/XML/JSON) when
|
||||
you either add `&debug=1` to the URL (preferred) or set
|
||||
`@define('CONST_Debug', true);` in `settings/local.php`.
|
||||
you either add `&debug=1` to the URL.
|
||||
|
||||
In the Python BDD test you can use `logger.info()` for temporary debug
|
||||
statements.
|
||||
@@ -130,6 +129,10 @@ and then
|
||||
Yes, Vagrant and Virtualbox can be installed on MS Windows just fine. You need a 64bit
|
||||
version of Windows.
|
||||
|
||||
##### Will it run on Apple Silicon?
|
||||
|
||||
You might need to replace Virtualbox with [Parallels](https://www.parallels.com/products/desktop/).
|
||||
There is no free/open source version of Parallels.
|
||||
|
||||
##### Why Monaco, can I use another country?
|
||||
|
||||
@@ -141,11 +144,12 @@ No. Long running Nominatim installations will differ once new import features (o
|
||||
bug fixes) get added since those usually only get applied to new/changed data.
|
||||
|
||||
Also this document skips the optional Wikipedia data import which affects ranking
|
||||
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation) for details.
|
||||
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation)
|
||||
for details.
|
||||
|
||||
##### Why Ubuntu? Can I test CentOS/Fedora/CoreOS/FreeBSD?
|
||||
|
||||
There is a Vagrant script for CentOS available, but the Nominatim directory
|
||||
There used to be a Vagrant script for CentOS available, but the Nominatim directory
|
||||
isn't symlinked/mounted to the host which makes development trickier. We used
|
||||
it mainly for debugging installation with SELinux.
|
||||
|
||||
@@ -154,14 +158,17 @@ are slightly different, e.g. the name of the package manager, Apache2 package
|
||||
name, location of files. We chose Ubuntu because that is closest to the
|
||||
nominatim.openstreetmap.org production environment.
|
||||
|
||||
You can configure/download other Vagrant boxes from [https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
|
||||
You can configure/download other Vagrant boxes from
|
||||
[https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
|
||||
|
||||
##### How can I connect to an existing database?
|
||||
|
||||
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com` and port `5432`. The Postgres username is `postgres`. You can edit `settings/local.php` and point Nominatim to it.
|
||||
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com`
|
||||
and port `5432`. The Postgres username is `postgres`. You can edit the `.env` in your
|
||||
project directory and point Nominatim to it.
|
||||
|
||||
NOMINATIM_DATABASE_DSN="pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
|
||||
|
||||
pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
|
||||
|
||||
No data import or restarting necessary.
|
||||
|
||||
If the Postgres installation is behind a firewall, you can try
|
||||
@@ -169,11 +176,12 @@ If the Postgres installation is behind a firewall, you can try
|
||||
ssh -L 9999:localhost:5432 your-username@your-server.com
|
||||
|
||||
inside the virtual machine. It will map the port to `localhost:9999` and then
|
||||
you edit `settings/local.php` with
|
||||
you edit `.env` file with
|
||||
|
||||
@define('CONST_Database_DSN', 'pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it');
|
||||
NOMINATIM_DATABASE_DSN="pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it"
|
||||
|
||||
To access postgres directly remember to specify the hostname, e.g. `psql --host localhost --port 9999 nominatim_it`
|
||||
To access postgres directly remember to specify the hostname,
|
||||
e.g. `psql --host localhost --port 9999 nominatim_it`
|
||||
|
||||
|
||||
##### My computer is slow and the import takes too long. Can I start the virtual machine "in the cloud"?
|
||||
|
||||
90
Vagrantfile
vendored
90
Vagrantfile
vendored
@@ -17,6 +17,14 @@ Vagrant.configure("2") do |config|
|
||||
checkout = "no"
|
||||
end
|
||||
|
||||
config.vm.provider "hyperv" do |hv, override|
|
||||
hv.memory = 2048
|
||||
hv.linked_clone = true
|
||||
if ENV['CHECKOUT'] != 'y' then
|
||||
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: "smb", smb_host: ENV['SMB_HOST'] || ENV['COMPUTERNAME']
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.provider "virtualbox" do |vb, override|
|
||||
vb.gui = false
|
||||
vb.memory = 2048
|
||||
@@ -30,11 +38,38 @@ Vagrant.configure("2") do |config|
|
||||
lv.memory = 2048
|
||||
lv.nested = true
|
||||
if ENV['CHECKOUT'] != 'y' then
|
||||
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs'
|
||||
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs', nfs_udp: false
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu", primary: true do |sub|
|
||||
config.vm.define "ubuntu22", primary: true do |sub|
|
||||
sub.vm.box = "generic/ubuntu2204"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-22.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu22-apache" do |sub|
|
||||
sub.vm.box = "generic/ubuntu2204"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-22.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout, "install-apache"]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu22-nginx" do |sub|
|
||||
sub.vm.box = "generic/ubuntu2204"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-22.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout, "install-nginx"]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu20" do |sub|
|
||||
sub.vm.box = "generic/ubuntu2004"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-20.sh"
|
||||
@@ -43,7 +78,7 @@ Vagrant.configure("2") do |config|
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu-apache" do |sub|
|
||||
config.vm.define "ubuntu20-apache" do |sub|
|
||||
sub.vm.box = "generic/ubuntu2004"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-20.sh"
|
||||
@@ -52,7 +87,7 @@ Vagrant.configure("2") do |config|
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu-nginx" do |sub|
|
||||
config.vm.define "ubuntu20-nginx" do |sub|
|
||||
sub.vm.box = "generic/ubuntu2004"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-20.sh"
|
||||
@@ -60,51 +95,4 @@ Vagrant.configure("2") do |config|
|
||||
s.args = [checkout, "install-nginx"]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu18" do |sub|
|
||||
sub.vm.box = "generic/ubuntu1804"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-18.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu18-apache" do |sub|
|
||||
sub.vm.box = "generic/ubuntu1804"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-18.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout, "install-apache"]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "ubuntu18-nginx" do |sub|
|
||||
sub.vm.box = "generic/ubuntu1804"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Ubuntu-18.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout, "install-nginx"]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "centos7" do |sub|
|
||||
sub.vm.box = "centos/7"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Centos-7.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout]
|
||||
end
|
||||
end
|
||||
|
||||
config.vm.define "centos" do |sub|
|
||||
sub.vm.box = "generic/centos8"
|
||||
sub.vm.provision :shell do |s|
|
||||
s.path = "vagrant/Install-on-Centos-8.sh"
|
||||
s.privileged = false
|
||||
s.args = [checkout]
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
end
|
||||
|
||||
15
cmake/paths-py-no-php.tmpl
Normal file
15
cmake/paths-py-no-php.tmpl
Normal file
@@ -0,0 +1,15 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2022 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Path settings for extra data used by Nominatim (installed version).
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
PHPLIB_DIR = None
|
||||
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
|
||||
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
|
||||
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()
|
||||
15
cmake/paths-py.tmpl
Normal file
15
cmake/paths-py.tmpl
Normal file
@@ -0,0 +1,15 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2022 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Path settings for extra data used by Nominatim (installed version).
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
PHPLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-php').resolve()
|
||||
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
|
||||
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
|
||||
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()
|
||||
@@ -4,17 +4,10 @@ import os
|
||||
|
||||
sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
|
||||
|
||||
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
|
||||
|
||||
from nominatim import cli
|
||||
from nominatim import version
|
||||
|
||||
version.GIT_COMMIT_HASH = '@GIT_HASH@'
|
||||
|
||||
exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
|
||||
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
|
||||
phplib_dir='@NOMINATIM_LIBDIR@/lib-php',
|
||||
sqllib_dir='@NOMINATIM_LIBDIR@/lib-sql',
|
||||
data_dir='@NOMINATIM_DATADIR@',
|
||||
config_dir='@NOMINATIM_CONFIGDIR@',
|
||||
phpcgi_path='@PHPCGI_BIN@'))
|
||||
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql'))
|
||||
|
||||
@@ -4,17 +4,10 @@ import os
|
||||
|
||||
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
|
||||
|
||||
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
|
||||
|
||||
from nominatim import cli
|
||||
from nominatim import version
|
||||
|
||||
version.GIT_COMMIT_HASH = '@GIT_HASH@'
|
||||
|
||||
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
|
||||
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
|
||||
phplib_dir='@CMAKE_SOURCE_DIR@/lib-php',
|
||||
sqllib_dir='@CMAKE_SOURCE_DIR@/lib-sql',
|
||||
data_dir='@CMAKE_SOURCE_DIR@/data',
|
||||
config_dir='@CMAKE_SOURCE_DIR@/settings',
|
||||
phpcgi_path='@PHPCGI_BIN@'))
|
||||
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql'))
|
||||
|
||||
@@ -11,6 +11,7 @@ set (DOC_SOURCES
|
||||
develop
|
||||
api
|
||||
customize
|
||||
library
|
||||
index.md
|
||||
extra.css
|
||||
styles.css
|
||||
@@ -23,13 +24,12 @@ foreach (src ${DOC_SOURCES})
|
||||
endforeach()
|
||||
|
||||
ADD_CUSTOM_TARGET(doc
|
||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
|
||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
|
||||
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
|
||||
COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
|
||||
COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
|
||||
)
|
||||
|
||||
ADD_CUSTOM_TARGET(serve-doc
|
||||
COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve
|
||||
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
|
||||
COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
|
||||
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
|
||||
)
|
||||
|
||||
@@ -5,6 +5,35 @@ your Nominatim database. It is assumed that you have already successfully
|
||||
installed the Nominatim software itself, if not return to the
|
||||
[installation page](Installation.md).
|
||||
|
||||
## Importing with a database user without superuser rights
|
||||
|
||||
Nominatim usually creates its own PostgreSQL database at the beginning of the
|
||||
import process. This makes usage easier for the user but means that the
|
||||
database user doing the import needs the appropriate rights.
|
||||
|
||||
If you prefer to run the import with a database user with limited rights,
|
||||
you can do so by changing the import process as follows:
|
||||
|
||||
1. Run the command for database preparation with a database user with
|
||||
superuser rights. For example, to use a db user 'dbadmin' for a
|
||||
database 'nominatim', execute:
|
||||
|
||||
```
|
||||
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=dbadmin" nominatim import --prepare-database
|
||||
```
|
||||
|
||||
2. Grant the import user the right to create tables. For example, foe user 'import-user':
|
||||
|
||||
```
|
||||
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "import-user"'
|
||||
```
|
||||
|
||||
3. Now run the reminder of the import with the import user:
|
||||
|
||||
```
|
||||
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=import-user" nominatim import --continue import-from-file --osm-file file.pbf
|
||||
```
|
||||
|
||||
## Importing multiple regions (without updates)
|
||||
|
||||
To import multiple regions in your database you can simply give multiple
|
||||
@@ -36,16 +65,15 @@ which has the following structure:
|
||||
|
||||
```bash
|
||||
update
|
||||
├── europe
|
||||
│ ├── andorra
|
||||
│ │ └── sequence.state
|
||||
│ └── monaco
|
||||
│ └── sequence.state
|
||||
└── tmp
|
||||
└── europe
|
||||
├── andorra-latest.osm.pbf
|
||||
└── monaco-latest.osm.pbf
|
||||
|
||||
├── europe
|
||||
│ ├── andorra
|
||||
│ │ └── sequence.state
|
||||
│ └── monaco
|
||||
│ └── sequence.state
|
||||
└── tmp
|
||||
└── europe
|
||||
├── andorra-latest.osm.pbf
|
||||
└── monaco-latest.osm.pbf
|
||||
|
||||
```
|
||||
|
||||
@@ -99,7 +127,7 @@ Change into the project directory and run the following command:
|
||||
|
||||
This will get diffs from the replication server, import diffs and index
|
||||
the database. The default replication server in the
|
||||
script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
|
||||
script ([Geofabrik](https://download.geofabrik.de)) provides daily updates.
|
||||
|
||||
## Using an external PostgreSQL database
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
# Deploying Nominatim
|
||||
# Deploying Nominatim using the PHP frontend
|
||||
|
||||
The Nominatim API is implemented as a PHP application. The `website/` directory
|
||||
in the project directory contains the configured website. You can serve this
|
||||
@@ -8,13 +8,13 @@ PHP scripts.
|
||||
This section gives a quick overview on how to configure Apache and Nginx to
|
||||
serve Nominatim. It is not meant as a full system administration guide on how
|
||||
to run a web service. Please refer to the documentation of
|
||||
[Apache](http://httpd.apache.org/docs/current/) and
|
||||
[Apache](https://httpd.apache.org/docs/current/) and
|
||||
[Nginx](https://nginx.org/en/docs/)
|
||||
for background information on configuring the services.
|
||||
|
||||
!!! Note
|
||||
Throughout this page, we assume that your Nominatim project directory is
|
||||
located in `/srv/nominatim-project` and that you have installed Nominatim
|
||||
Throughout this page, we assume your Nominatim project directory is
|
||||
located in `/srv/nominatim-project` and you have installed Nominatim
|
||||
using the default installation prefix `/usr/local`. If you have put it
|
||||
somewhere else, you need to adjust the commands and configuration
|
||||
accordingly.
|
||||
140
docs/admin/Deployment-Python.md
Normal file
140
docs/admin/Deployment-Python.md
Normal file
@@ -0,0 +1,140 @@
|
||||
# Deploying the Nominatim Python frontend
|
||||
|
||||
The Nominatim can be run as a Python-based
|
||||
[ASGI web application](https://asgi.readthedocs.io/en/latest/). You have the
|
||||
choice between [Falcon](https://falcon.readthedocs.io/en/stable/)
|
||||
and [Starlette](https://www.starlette.io/) as the ASGI framework.
|
||||
|
||||
This section gives a quick overview on how to configure Nginx to serve
|
||||
Nominatim. Please refer to the documentation of
|
||||
[Nginx](https://nginx.org/en/docs/) for background information on how
|
||||
to configure it.
|
||||
|
||||
!!! Note
|
||||
Throughout this page, we assume your Nominatim project directory is
|
||||
located in `/srv/nominatim-project` and you have installed Nominatim
|
||||
using the default installation prefix `/usr/local`. If you have put it
|
||||
somewhere else, you need to adjust the commands and configuration
|
||||
accordingly.
|
||||
|
||||
We further assume that your web server runs as user `www-data`. Older
|
||||
versions of CentOS may still use the user name `apache`. You also need
|
||||
to adapt the instructions in this case.
|
||||
|
||||
### Installing the required packages
|
||||
|
||||
The recommended way to deploy a Python ASGI application is to run
|
||||
the ASGI runner [uvicorn](https://uvicorn.org/)
|
||||
together with [gunicorn](https://gunicorn.org/) HTTP server. We use
|
||||
Falcon here as the web framework.
|
||||
|
||||
Create a virtual environment for the Python packages and install the necessary
|
||||
dependencies:
|
||||
|
||||
``` sh
|
||||
sudo apt install virtualenv
|
||||
virtualenv /srv/nominatim-venv
|
||||
/srv/nominatim-venv/bin/pip install SQLAlchemy PyICU psycopg[binary] \
|
||||
psycopg2-binary python-dotenv PyYAML falcon uvicorn gunicorn
|
||||
```
|
||||
|
||||
### Setting up Nominatim as a systemd job
|
||||
|
||||
Next you need to set up the service that runs the Nominatim frontend. This is
|
||||
easiest done with a systemd job.
|
||||
|
||||
First you need to tell systemd to create a socket file to be used by
|
||||
hunicorn. Create the following file `/etc/systemd/system/nominatim.socket`:
|
||||
|
||||
``` systemd
|
||||
[Unit]
|
||||
Description=Gunicorn socket for Nominatim
|
||||
|
||||
[Socket]
|
||||
ListenStream=/run/nominatim.sock
|
||||
SocketUser=www-data
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
Now you can add the systemd service for Nominatim itself.
|
||||
Create the following file `/etc/systemd/system/nominatim.service`:
|
||||
|
||||
``` systemd
|
||||
[Unit]
|
||||
Description=Nominatim running as a gunicorn application
|
||||
After=network.target
|
||||
Requires=nominatim.socket
|
||||
|
||||
[Service]
|
||||
Type=simple
|
||||
Environment="PYTHONPATH=/usr/local/lib/nominatim/lib-python/"
|
||||
User=www-data
|
||||
Group=www-data
|
||||
WorkingDirectory=/srv/nominatim-project
|
||||
ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker nominatim.server.falcon.server:run_wsgi
|
||||
ExecReload=/bin/kill -s HUP $MAINPID
|
||||
StandardOutput=append:/var/log/gunicorn-nominatim.log
|
||||
StandardError=inherit
|
||||
PrivateTmp=true
|
||||
TimeoutStopSec=5
|
||||
KillMode=mixed
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
|
||||
its own Python process using
|
||||
[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
|
||||
connections to the database to serve requests in parallel.
|
||||
|
||||
Make the new services known to systemd and start it:
|
||||
|
||||
``` sh
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable nominatim.socket
|
||||
sudo systemctl start nominatim.socket
|
||||
sudo systemctl enable nominatim.service
|
||||
sudo systemctl start nominatim.service
|
||||
```
|
||||
|
||||
This sets the service up, so that Nominatim is automatically started
|
||||
on reboot.
|
||||
|
||||
### Configuring nginx
|
||||
|
||||
To make the service available to the world, you need to proxy it through
|
||||
nginx. Add the following definition to the default configuration:
|
||||
|
||||
``` nginx
|
||||
upstream nominatim_service {
|
||||
server unix:/run/nominatim.sock fail_timeout=0;
|
||||
}
|
||||
|
||||
server {
|
||||
listen 80;
|
||||
listen [::]:80;
|
||||
|
||||
root /var/www/html;
|
||||
index /search;
|
||||
|
||||
location / {
|
||||
proxy_set_header Host $http_host;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_redirect off;
|
||||
proxy_pass http://nominatim_service;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
Reload nginx with
|
||||
|
||||
```
|
||||
sudo systemctl reload nginx
|
||||
```
|
||||
|
||||
and you should be able to see the status of your server under
|
||||
`http://localhost/status`.
|
||||
@@ -37,40 +37,6 @@ nominatim import --continue indexing
|
||||
Otherwise it's best to start the full setup from the beginning.
|
||||
|
||||
|
||||
### PHP "open_basedir restriction in effect" warnings
|
||||
|
||||
PHP Warning: file_get_contents(): open_basedir restriction in effect.
|
||||
|
||||
You need to adjust the
|
||||
[open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir)
|
||||
setting in your PHP configuration (`php.ini` file). By default this setting may
|
||||
look like this:
|
||||
|
||||
open_basedir = /srv/http/:/home/:/tmp/:/usr/share/pear/
|
||||
|
||||
Either add reported directories to the list or disable this setting temporarily
|
||||
by adding ";" at the beginning of the line. Don't forget to enable this setting
|
||||
again once you are done with the PHP command line operations.
|
||||
|
||||
|
||||
### PHP timezeone warnings
|
||||
|
||||
The Apache log may contain lots of PHP warnings like this:
|
||||
`PHP Warning: date_default_timezone_set() function.`
|
||||
|
||||
You should set the default time zone as instructed in the warning in
|
||||
your `php.ini` file. Find the entry about timezone and set it to
|
||||
something like this:
|
||||
|
||||
; Defines the default timezone used by the date functions
|
||||
; https://php.net/date.timezone
|
||||
date.timezone = 'America/Denver'
|
||||
|
||||
Or
|
||||
|
||||
```
|
||||
echo "date.timezone = 'America/Denver'" > /etc/php.d/timezone.ini
|
||||
```
|
||||
|
||||
### nominatim.so version mismatch
|
||||
|
||||
@@ -170,7 +136,7 @@ recreate `nominatim.so`. Try
|
||||
cmake $main_Nominatim_path && make
|
||||
```
|
||||
|
||||
### Setup.php fails with "DB Error: extension not found"
|
||||
### Setup fails with "DB Error: extension not found"
|
||||
|
||||
Make sure you have the PostgreSQL extensions "hstore" and "postgis" installed.
|
||||
See the installation instructions for a full list of required packages.
|
||||
|
||||
@@ -74,7 +74,7 @@ but it will improve the quality of the results if this is installed.
|
||||
This data is available as a binary download. Put it into your project directory:
|
||||
|
||||
cd $PROJECT_DIR
|
||||
wget https://www.nominatim.org/data/wikimedia-importance.sql.gz
|
||||
wget https://nominatim.org/data/wikimedia-importance.sql.gz
|
||||
|
||||
The file is about 400MB and adds around 4GB to the Nominatim database.
|
||||
|
||||
@@ -92,8 +92,8 @@ and the UK (using the [CodePoint OpenData set](https://osdatahub.os.uk/downloads
|
||||
This data can be optionally downloaded into the project directory:
|
||||
|
||||
cd $PROJECT_DIR
|
||||
wget https://www.nominatim.org/data/gb_postcodes.csv.gz
|
||||
wget https://www.nominatim.org/data/us_postcodes.csv.gz
|
||||
wget https://nominatim.org/data/gb_postcodes.csv.gz
|
||||
wget https://nominatim.org/data/us_postcodes.csv.gz
|
||||
|
||||
You can also add your own custom postcode sources, see
|
||||
[Customization of postcodes](../customize/Postcodes.md).
|
||||
@@ -254,26 +254,71 @@ successfully.
|
||||
nominatim admin --check-database
|
||||
```
|
||||
|
||||
Now you can try out your installation by running:
|
||||
Now you can try out your installation by executing a simple query on the
|
||||
command line:
|
||||
|
||||
```sh
|
||||
``` sh
|
||||
nominatim search --query Berlin
|
||||
```
|
||||
|
||||
or, when you have a reverse-only installation:
|
||||
|
||||
``` sh
|
||||
nominatim reverse --lat 51 --lon 45
|
||||
```
|
||||
|
||||
If you want to run Nominatim as a service, you need to make a choice between
|
||||
running the modern Python frontend and the legacy PHP frontend.
|
||||
Make sure you have installed the right packages as per
|
||||
[Installation](Installation.md#software).
|
||||
|
||||
#### Testing the Python frontend
|
||||
|
||||
To run the test server against the Python frontend, you must choose a
|
||||
web framework to use, either starlette or falcon. Make sure the appropriate
|
||||
packages are installed. Then run
|
||||
|
||||
``` sh
|
||||
nominatim serve
|
||||
```
|
||||
|
||||
This runs a small test server normally used for development. You can use it
|
||||
to verify that your installation is working. Go to
|
||||
`http://localhost:8088/status.php` and you should see the message `OK`.
|
||||
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
|
||||
or, if you prefer to use Starlette instead of Falcon as webserver,
|
||||
|
||||
Note that search query is not supported for reverse-only imports. You can run a
|
||||
reverse query, e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
|
||||
``` sh
|
||||
nominatim serve --engine starlette
|
||||
```
|
||||
|
||||
To run Nominatim via webservers like Apache or nginx, please read the
|
||||
[Deployment chapter](Deployment.md).
|
||||
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
|
||||
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
|
||||
or, for reverse-only installations a reverse query,
|
||||
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
|
||||
|
||||
## Adding search through category phrases
|
||||
Do not use this test server in production.
|
||||
To run Nominatim via webservers like Apache or nginx, please continue reading
|
||||
[Deploy the Python frontend](Deployment-Python.md).
|
||||
|
||||
If you want to be able to search for places by their type through
|
||||
#### Testing the PHP frontend
|
||||
|
||||
You can run a small test server with the PHP frontend like this:
|
||||
|
||||
```sh
|
||||
nominatim serve --engine php
|
||||
```
|
||||
|
||||
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
|
||||
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
|
||||
or, for reverse-only installations a reverse query,
|
||||
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
|
||||
|
||||
Do not use this test server in production.
|
||||
To run Nominatim via webservers like Apache or nginx, please continue reading
|
||||
[Deploy the PHP frontend](Deployment-PHP.md).
|
||||
|
||||
|
||||
|
||||
## Enabling search by category phrases
|
||||
|
||||
To be able to search for places by their type using
|
||||
[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
|
||||
you also need to import these key phrases like this:
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ the following operating systems:
|
||||
|
||||
* [Ubuntu 22.04](../appendix/Install-on-Ubuntu-22.md)
|
||||
* [Ubuntu 20.04](../appendix/Install-on-Ubuntu-20.md)
|
||||
* [Ubuntu 18.04](../appendix/Install-on-Ubuntu-18.md)
|
||||
|
||||
These OS-specific instructions can also be found in executable form
|
||||
in the `vagrant/` directory.
|
||||
@@ -36,6 +35,7 @@ For compiling:
|
||||
* [bzip2](http://www.bzip.org/)
|
||||
* [zlib](https://www.zlib.net/)
|
||||
* [ICU](http://site.icu-project.org/)
|
||||
* [nlohmann/json](https://json.nlohmann.me/)
|
||||
* [Boost libraries](https://www.boost.org/), including system and filesystem
|
||||
* PostgreSQL client libraries
|
||||
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
|
||||
@@ -44,23 +44,35 @@ For running Nominatim:
|
||||
|
||||
* [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
|
||||
* [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
|
||||
* [Python 3](https://www.python.org/) (3.6+)
|
||||
* [Python 3](https://www.python.org/) (3.7+)
|
||||
* [Psycopg2](https://www.psycopg.org) (2.7+)
|
||||
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
|
||||
* [psutil](https://github.com/giampaolo/psutil)
|
||||
* [Jinja2](https://palletsprojects.com/p/jinja/)
|
||||
* [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
|
||||
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
|
||||
* [PyICU](https://pypi.org/project/PyICU/)
|
||||
* [PyYaml](https://pyyaml.org/) (5.1+)
|
||||
* [datrie](https://github.com/pytries/datrie)
|
||||
* [PHP](https://php.net) (7.0 or later)
|
||||
* PHP-pgsql
|
||||
* PHP-intl (bundled with PHP)
|
||||
* PHP-cgi (for running queries from the command line)
|
||||
|
||||
For running continuous updates:
|
||||
|
||||
* [pyosmium](https://osmcode.org/pyosmium/)
|
||||
|
||||
For running the Python frontend:
|
||||
|
||||
* one of the following web frameworks:
|
||||
* [falcon](https://falconframework.org/) (3.0+)
|
||||
* [starlette](https://www.starlette.io/)
|
||||
* [uvicorn](https://www.uvicorn.org/)
|
||||
|
||||
For running the legacy PHP frontend:
|
||||
|
||||
* [PHP](https://php.net) (7.3+)
|
||||
* PHP-pgsql
|
||||
* PHP-intl (bundled with PHP)
|
||||
|
||||
|
||||
For dependencies for running tests and building documentation, see
|
||||
the [Development section](../develop/Development-Environment.md).
|
||||
|
||||
@@ -75,7 +87,7 @@ Take into account that the OSM database is growing fast.
|
||||
Fast disks are essential. Using NVME disks is recommended.
|
||||
|
||||
Even on a well configured machine the import of a full planet takes
|
||||
around 2 days. On traditional spinning disks, 7-8 days are more realistic.
|
||||
around 2 days. When using traditional SSDs, 4-5 days are more realistic.
|
||||
|
||||
## Tuning the PostgreSQL database
|
||||
|
||||
@@ -107,15 +119,6 @@ you might consider setting:
|
||||
and even reduce `autovacuum_work_mem` further. This will reduce the amount
|
||||
of memory that autovacuum takes away from the import process.
|
||||
|
||||
For the initial import, you should also set:
|
||||
|
||||
fsync = off
|
||||
full_page_writes = off
|
||||
|
||||
Don't forget to re-enable them after the initial import or you risk database
|
||||
corruption.
|
||||
|
||||
|
||||
## Downloading and building Nominatim
|
||||
|
||||
### Downloading the latest release
|
||||
@@ -135,7 +138,7 @@ git clone --recursive https://github.com/openstreetmap/Nominatim.git
|
||||
The development version does not include the country grid. Download it separately:
|
||||
|
||||
```
|
||||
wget -O Nominatim/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
|
||||
wget -O Nominatim/data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
|
||||
```
|
||||
|
||||
### Building Nominatim
|
||||
|
||||
@@ -60,16 +60,13 @@ to finish the recomputation.
|
||||
|
||||
## Removing large deleted objects
|
||||
|
||||
Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
|
||||
|
||||
Nominatim refuses to delete very large areas because often these deletions are
|
||||
accidental and are reverted within hours. Instead the deletions are logged in
|
||||
the `import_polygon_delete` table and left to the administrator to clean up.
|
||||
|
||||
There is currently no command to do that. You can use the following SQL
|
||||
query to force a deletion on all objects that have been deleted more than
|
||||
a certain timespan ago (here: 1 month):
|
||||
To run this command you will need to pass a PostgreSQL time interval. For example to
|
||||
delete any objects that have been deleted more than a month ago you would run:
|
||||
`nominatim admin --clean-deleted '1 month'`
|
||||
|
||||
```sql
|
||||
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
|
||||
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
|
||||
and age(p.indexed_date) > '1 month'::interval
|
||||
```
|
||||
|
||||
@@ -15,6 +15,25 @@ breaking changes. **Please read them before running the migration.**
|
||||
If you are migrating from a version <3.6, then you still have to follow
|
||||
the manual migration steps up to 3.6.
|
||||
|
||||
## 4.2.0 -> 4.3.0
|
||||
|
||||
### New indexes for reverse lookup
|
||||
|
||||
The reverse lookup algorithm has changed slightly to improve performance.
|
||||
This change needs a different index in the database. The required index
|
||||
will be automatically build during migration. Until the new index is available
|
||||
performance of the /reverse endpoint is significantly reduced. You should
|
||||
therefore either remove traffic from the machine before attempting a
|
||||
version update or create the index manually **before** starting the update
|
||||
using the following SQL:
|
||||
|
||||
```sql
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
|
||||
ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
|
||||
WHERE rank_address between 4 and 25 AND type != 'postcode'
|
||||
AND name is not null AND linked_place_id is null AND osm_type = 'N';
|
||||
```
|
||||
|
||||
## 4.0.0 -> 4.1.0
|
||||
|
||||
### ICU tokenizer is the new default
|
||||
|
||||
@@ -59,47 +59,6 @@ imported multiple country extracts and want to keep them
|
||||
up-to-date, [Advanced installations section](Advanced-Installations.md)
|
||||
contains instructions to set up and update multiple country extracts.
|
||||
|
||||
#### Continuous updates
|
||||
|
||||
This is the easiest mode. Simply run the replication command without any
|
||||
parameters:
|
||||
|
||||
nominatim replication
|
||||
|
||||
The update application keeps running forever and retrieves and applies
|
||||
new updates from the server as they are published.
|
||||
|
||||
You can run this command as a simple systemd service. Create a service
|
||||
description like that in `/etc/systemd/system/nominatim-updates.service`:
|
||||
|
||||
```
|
||||
[Unit]
|
||||
Description=Continuous updates of Nominatim
|
||||
|
||||
[Service]
|
||||
WorkingDirectory=/srv/nominatim
|
||||
ExecStart=nominatim replication
|
||||
StandardOutput=append:/var/log/nominatim-updates.log
|
||||
StandardError=append:/var/log/nominatim-updates.error.log
|
||||
User=nominatim
|
||||
Group=nominatim
|
||||
Type=simple
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
Replace the `WorkingDirectory` with your project directory. Also adapt user
|
||||
and group names as required.
|
||||
|
||||
Now activate the service and start the updates:
|
||||
|
||||
```
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable nominatim-updates
|
||||
sudo systemctl start nominatim-updates
|
||||
```
|
||||
|
||||
#### One-time mode
|
||||
|
||||
When the `--once` parameter is given, then Nominatim will download exactly one
|
||||
@@ -221,3 +180,53 @@ replication catch-up at whatever interval you desire.
|
||||
updated source with daily updates), use the
|
||||
continuous update mode. It ensures to re-request the newest update until it
|
||||
is published.
|
||||
|
||||
|
||||
#### Continuous updates
|
||||
|
||||
!!! danger
|
||||
This mode is no longer recommended to use and will removed in future
|
||||
releases. systemd is much better
|
||||
suited for running regular updates. Please refer to the setup
|
||||
instructions for running one-time mode with systemd above.
|
||||
|
||||
This is the easiest mode. Simply run the replication command without any
|
||||
parameters:
|
||||
|
||||
nominatim replication
|
||||
|
||||
The update application keeps running forever and retrieves and applies
|
||||
new updates from the server as they are published.
|
||||
|
||||
You can run this command as a simple systemd service. Create a service
|
||||
description like that in `/etc/systemd/system/nominatim-updates.service`:
|
||||
|
||||
```
|
||||
[Unit]
|
||||
Description=Continuous updates of Nominatim
|
||||
|
||||
[Service]
|
||||
WorkingDirectory=/srv/nominatim
|
||||
ExecStart=nominatim replication
|
||||
StandardOutput=append:/var/log/nominatim-updates.log
|
||||
StandardError=append:/var/log/nominatim-updates.error.log
|
||||
User=nominatim
|
||||
Group=nominatim
|
||||
Type=simple
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
Replace the `WorkingDirectory` with your project directory. Also adapt user
|
||||
and group names as required.
|
||||
|
||||
Now activate the service and start the updates:
|
||||
|
||||
```
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable nominatim-updates
|
||||
sudo systemctl start nominatim-updates
|
||||
```
|
||||
|
||||
|
||||
|
||||
@@ -2,13 +2,17 @@
|
||||
|
||||
Show all details about a single place saved in the database.
|
||||
|
||||
This API endpoint is meant for visual inspection of the data in the database,
|
||||
mainly together with [Nominatim-UI](https://github.com/osm-search/nominatim-ui/).
|
||||
The parameters of the endpoint and the output may change occasionally between
|
||||
versions of Nominatim. Do not rely on the output in scripts or applications.
|
||||
|
||||
!!! warning
|
||||
The details page exists for debugging only. You may not use it in scripts
|
||||
or to automatically query details about a result.
|
||||
The details endpoint at https://nominatim.openstreetmap.org
|
||||
may not used in scripts or bots at all.
|
||||
See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
|
||||
|
||||
|
||||
## Parameters
|
||||
|
||||
The details API supports the following two request formats:
|
||||
|
||||
@@ -35,59 +39,90 @@ for a place is different between Nominatim installation (servers) and
|
||||
changes when data gets reimported. Therefore it cannot be used as
|
||||
a permanent id and shouldn't be used in bug reports.
|
||||
|
||||
!!! danger "Deprecation warning"
|
||||
The API can also be used with the URL
|
||||
`https://nominatim.openstreetmap.org/details.php`. This is now deprecated
|
||||
and will be removed in future versions.
|
||||
|
||||
Additional optional parameters are explained below.
|
||||
|
||||
## Parameters
|
||||
|
||||
This section lists additional optional parameters.
|
||||
|
||||
### Output format
|
||||
|
||||
* `json_callback=<string>`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| json_callback | function name | _unset_ |
|
||||
|
||||
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
|
||||
When set, then JSON output will be wrapped in a callback function with
|
||||
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
|
||||
information.
|
||||
|
||||
* `pretty=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| pretty | 0 or 1 | 0 |
|
||||
|
||||
Add indentation to make it more human-readable. (Default: 0)
|
||||
`[PHP-only]` Add indentation to the output to make it more human-readable.
|
||||
|
||||
|
||||
### Output details
|
||||
|
||||
* `addressdetails=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| addressdetails | 0 or 1 | 0 |
|
||||
|
||||
Include a breakdown of the address into elements. (Default: 0)
|
||||
When set to 1, include a breakdown of the address into elements.
|
||||
|
||||
* `keywords=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| keywords | 0 or 1 | 0 |
|
||||
|
||||
Include a list of name keywords and address keywords (word ids). (Default: 0)
|
||||
When set to 1, include a list of name keywords and address keywords
|
||||
in the result.
|
||||
|
||||
* `linkedplaces=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| linkedplaces | 0 or 1 | 1 |
|
||||
|
||||
Include a details of places that are linked with this one. Places get linked
|
||||
Include details of places that are linked with this one. Places get linked
|
||||
together when they are different forms of the same physical object. Nominatim
|
||||
links two kinds of objects together: place nodes get linked with the
|
||||
corresponding administrative boundaries. Waterway relations get linked together with their
|
||||
members.
|
||||
(Default: 1)
|
||||
|
||||
* `hierarchy=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| hierarchy | 0 or 1 | 0 |
|
||||
|
||||
Include details of places lower in the address hierarchy. (Default: 0)
|
||||
Include details of places lower in the address hierarchy.
|
||||
|
||||
* `group_hierarchy=[0|1]`
|
||||
`[Python-only]` will only return properly parented places. These are address
|
||||
or POI-like places that reuse the address of their parent street or place.
|
||||
|
||||
For JSON output will group the places by type. (Default: 0)
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| group_hierarchy | 0 or 1 | 0 |
|
||||
|
||||
* `polygon_geojson=[0|1]`
|
||||
When set to 1, the output of the address hierarchy will be
|
||||
grouped by type.
|
||||
|
||||
Include geometry of result. (Default: 0)
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| polygon_geojson | 0 or 1 | 0 |
|
||||
|
||||
|
||||
Include geometry of result.
|
||||
|
||||
### Language of results
|
||||
|
||||
* `accept-language=<browser language string>`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| accept-language | browser language string | content of "Accept-Language" HTTP header |
|
||||
|
||||
Preferred language order for showing result, overrides the value
|
||||
specified in the "Accept-Language" HTTP header.
|
||||
Either use a standard RFC2616 accept-language string or a simple
|
||||
comma-separated list of language codes.
|
||||
Preferred language order for showing search results. This may either be
|
||||
a simple comma-separated list of language codes or have the same format
|
||||
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -59,3 +59,27 @@ suited for these kinds of queries.
|
||||
|
||||
That said if you installed your own Nominatim instance you can use the
|
||||
`nominatim export` PHP script as basis to return such lists.
|
||||
|
||||
#### 7. My result has a wrong postcode. Where does it come from?
|
||||
|
||||
Most places in OSM don't have a postcode, so Nominatim tries to interpolate
|
||||
one. It first look at all the places that make up the address of the place.
|
||||
If one of them has a postcode defined, this is the one to be used. When
|
||||
none of the address parts has a postcode either, Nominatim interpolates one
|
||||
from the surrounding objects. If the postcode is for your result is one, then
|
||||
most of the time there is an OSM object with the wrong postcode nearby.
|
||||
|
||||
To find the bad postcode, go to
|
||||
[https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
|
||||
and search for your place. When you have found it, click on the 'details' link
|
||||
under the result to go to the details page. There is a field 'Computed Postcode'
|
||||
which should display the bad postcode. Click on the 'how?' link. A small
|
||||
explanation text appears. It contains a link to a query for Overpass Turbo.
|
||||
Click on that and you get a map with all places in the area that have the bad
|
||||
postcode. If none is displayed, zoom the map out a bit and then click on 'Run'.
|
||||
|
||||
Now go to [OpenStreetMap](https://openstreetmap.org) and fix the error you
|
||||
have just found. It will take at least a day for Nominatim to catch up with
|
||||
your data fix. Sometimes longer, depending on how much editing activity is in
|
||||
the area.
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
The lookup API allows to query the address and other details of one or
|
||||
multiple OSM objects like node, way or relation.
|
||||
|
||||
## Parameters
|
||||
## Endpoint
|
||||
|
||||
The lookup API has the following format:
|
||||
|
||||
@@ -15,75 +15,129 @@ The lookup API has the following format:
|
||||
prefixed with its type, one of node(N), way(W) or relation(R). Up to 50 ids
|
||||
can be queried at the same time.
|
||||
|
||||
Additional optional parameters are explained below.
|
||||
!!! danger "Deprecation warning"
|
||||
The API can also be used with the URL
|
||||
`https://nominatim.openstreetmap.org/lookup.php`. This is now deprecated
|
||||
and will be removed in future versions.
|
||||
|
||||
|
||||
## Parameters
|
||||
|
||||
This section lists additional optional parameters.
|
||||
|
||||
### Output format
|
||||
|
||||
* `format=[xml|json|jsonv2|geojson|geocodejson]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
|
||||
|
||||
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
|
||||
See [Place Output Formats](Output.md) for details on each format.
|
||||
|
||||
* `json_callback=<string>`
|
||||
|
||||
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| json_callback | function name | _unset_ |
|
||||
|
||||
When given, then JSON output will be wrapped in a callback function with
|
||||
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
|
||||
information.
|
||||
|
||||
Only has an effect for JSON output formats.
|
||||
|
||||
|
||||
### Output details
|
||||
|
||||
* `addressdetails=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| addressdetails | 0 or 1 | 0 |
|
||||
|
||||
Include a breakdown of the address into elements. (Default: 0)
|
||||
When set to 1, include a breakdown of the address into elements.
|
||||
The exact content of the address breakdown depends on the output format.
|
||||
|
||||
!!! tip
|
||||
If you are interested in a stable classification of address categories
|
||||
(suburb, city, state, etc), have a look at the `geocodejson` format.
|
||||
All other formats return classifications according to OSM tagging.
|
||||
There is a much larger set of categories and they are not always consistent,
|
||||
which makes them very hard to work with.
|
||||
|
||||
|
||||
* `extratags=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| extratags | 0 or 1 | 0 |
|
||||
|
||||
Include additional information in the result if available,
|
||||
e.g. wikipedia link, opening hours. (Default: 0)
|
||||
When set to 1, the response include any additional information in the result
|
||||
that is available in the database, e.g. wikipedia link, opening hours.
|
||||
|
||||
|
||||
* `namedetails=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| namedetails | 0 or 1 | 0 |
|
||||
|
||||
Include a list of alternative names in the results. These may include
|
||||
language variants, references, operator and brand. (Default: 0)
|
||||
When set to 1, include a full list of names for the result. These may include
|
||||
language variants, older names, references and brand.
|
||||
|
||||
|
||||
### Language of results
|
||||
|
||||
* `accept-language=<browser language string>`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| accept-language | browser language string | content of "Accept-Language" HTTP header |
|
||||
|
||||
Preferred language order for showing search results. This may either be
|
||||
a simple comma-separated list of language codes or have the same format
|
||||
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
|
||||
|
||||
!!! tip
|
||||
First-time users of Nominatim tend to be confused that they get different
|
||||
results when using Nominatim in the browser versus in a command-line tool
|
||||
like wget or curl. The command-line tools
|
||||
usually don't send any Accept-Language header, prompting Nominatim
|
||||
to show results in the local language. Browsers on the contratry always
|
||||
send the currently chosen browser language.
|
||||
|
||||
Preferred language order for showing search results, overrides the value
|
||||
specified in the "Accept-Language" HTTP header.
|
||||
Either use a standard RFC2616 accept-language string or a simple
|
||||
comma-separated list of language codes.
|
||||
|
||||
### Polygon output
|
||||
|
||||
* `polygon_geojson=1`
|
||||
* `polygon_kml=1`
|
||||
* `polygon_svg=1`
|
||||
* `polygon_text=1`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| polygon_geojson | 0 or 1 | 0 |
|
||||
| polygon_kml | 0 or 1 | 0 |
|
||||
| polygon_svg | 0 or 1 | 0 |
|
||||
| polygon_text | 0 or 1 | 0 |
|
||||
|
||||
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
|
||||
options can be used at a time. (Default: 0)
|
||||
Add the full geometry of the place to the result output. Output formats
|
||||
in GeoJSON, KML, SVG or WKT are supported. Only one of these
|
||||
options can be used at a time.
|
||||
|
||||
* `polygon_threshold=0.0`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| polygon_threshold | floating-point number | 0.0 |
|
||||
|
||||
Return a simplified version of the output geometry. The parameter is the
|
||||
When one of the polygon_* outputs is chosen, return a simplified version
|
||||
of the output geometry. The parameter describes the
|
||||
tolerance in degrees with which the geometry may differ from the original
|
||||
geometry. Topology is preserved in the result. (Default: 0.0)
|
||||
geometry. Topology is preserved in the geometry.
|
||||
|
||||
|
||||
### Other
|
||||
|
||||
* `email=<valid email address>`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| email | valid email address | _unset_ |
|
||||
|
||||
If you are making large numbers of request please include an appropriate email
|
||||
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
|
||||
address to identify your requests. See Nominatim's
|
||||
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
|
||||
|
||||
* `debug=[0|1]`
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| debug | 0 or 1 | 0 |
|
||||
|
||||
Output assorted developer debug information. Data on internals of Nominatim's
|
||||
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
|
||||
This overrides the specified machine readable format. (Default: 0)
|
||||
"search loop" logic, and SQL queries. The output is HTML format.
|
||||
This overrides the specified machine readable format.
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -211,8 +211,8 @@ be more than one. The attributes of that element contain:
|
||||
* `ref` - content of `ref` tag if it exists
|
||||
* `lat`, `lon` - latitude and longitude of the centroid of the object
|
||||
* `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))
|
||||
* `place_rank` - class [search rank](../customize/Ranking#search-rank)
|
||||
* `address_rank` - place [address rank](../customize/Ranking#address-rank)
|
||||
* `place_rank` - class [search rank](../customize/Ranking.md#search-rank)
|
||||
* `address_rank` - place [address rank](../customize/Ranking.md#address-rank)
|
||||
* `display_name` - full comma-separated address
|
||||
* `class`, `type` - key and value of the main OSM tag
|
||||
* `importance` - computed importance rank
|
||||
|
||||
@@ -1,8 +1,16 @@
|
||||
### Nominatim API
|
||||
|
||||
Nominatim indexes named (or numbered) features within the OpenStreetMap (OSM) dataset and a subset of other unnamed features (pubs, hotels, churches, etc).
|
||||
!!! Attention
|
||||
The current version of Nominatim implements two different search frontends:
|
||||
the old PHP frontend and the new Python frontend. They have a very similar
|
||||
API but differ in some implementation details. These are marked in the
|
||||
documentation as `[Python-only]` or `[PHP-only]`.
|
||||
|
||||
Its API has the following endpoints for querying the data:
|
||||
`https://nominatim.openstreetmap.org` implements the **Python frontend**.
|
||||
So users should refer to the **`[Python-only]`** comments.
|
||||
|
||||
This section describes the API V1 of the Nominatim web service. The
|
||||
service offers the following endpoints:
|
||||
|
||||
* __[/search](Search.md)__ - search OSM objects by name or type
|
||||
* __[/reverse](Reverse.md)__ - search OSM object by their location
|
||||
@@ -12,3 +20,6 @@ Its API has the following endpoints for querying the data:
|
||||
back in Nominatim in case the deletion was accidental
|
||||
* __/polygons__ - list of broken polygons detected by Nominatim
|
||||
* __[/details](Details.md)__ - show internal details for an object (for debugging only)
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
# Reverse Geocoding
|
||||
|
||||
Reverse geocoding generates an address from a latitude and longitude.
|
||||
Reverse geocoding generates an address from a coordinate given as
|
||||
latitude and longitude.
|
||||
|
||||
## How it works
|
||||
|
||||
@@ -18,8 +19,7 @@ The other issue to be aware of is that the closest OSM object may not always
|
||||
have a similar enough address to the coordinate you were requesting. For
|
||||
example, in dense city areas it may belong to a completely different street.
|
||||
|
||||
|
||||
## Parameters
|
||||
## Endpoint
|
||||
|
||||
The main format of the reverse API is
|
||||
|
||||
@@ -31,57 +31,101 @@ where `lat` and `lon` are latitude and longitude of a coordinate in WGS84
|
||||
projection. The API returns exactly one result or an error when the coordinate
|
||||
is in an area with no OSM data coverage.
|
||||
|
||||
Additional parameters are accepted as listed below.
|
||||
|
||||
!!! warning "Deprecation warning"
|
||||
!!! danger "Deprecation warning"
|
||||
The reverse API used to allow address lookup for a single OSM object by
|
||||
its OSM id. This use is now deprecated. Use the [Address Lookup API](../Lookup)
|
||||
instead.
|
||||
its OSM id for `[PHP-only]`. The use is considered deprecated.
|
||||
Use the [Address Lookup API](Lookup.md) instead.
|
||||
|
||||
!!! danger "Deprecation warning"
|
||||
The API can also be used with the URL
|
||||
`https://nominatim.openstreetmap.org/reverse.php`. This is now deprecated
|
||||
and will be removed in future versions.
|
||||
|
||||
|
||||
## Parameters
|
||||
|
||||
This section lists additional parameters to further influence the output.
|
||||
|
||||
### Output format
|
||||
|
||||
* `format=[xml|json|jsonv2|geojson|geocodejson]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `xml` |
|
||||
|
||||
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
|
||||
See [Place Output Formats](Output.md) for details on each format.
|
||||
|
||||
* `json_callback=<string>`
|
||||
|
||||
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| json_callback | function name | _unset_ |
|
||||
|
||||
When given, then JSON output will be wrapped in a callback function with
|
||||
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
|
||||
information.
|
||||
|
||||
Only has an effect for JSON output formats.
|
||||
|
||||
|
||||
### Output details
|
||||
|
||||
* `addressdetails=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| addressdetails | 0 or 1 | 1 |
|
||||
|
||||
Include a breakdown of the address into elements. (Default: 1)
|
||||
When set to 1, include a breakdown of the address into elements.
|
||||
The exact content of the address breakdown depends on the output format.
|
||||
|
||||
!!! tip
|
||||
If you are interested in a stable classification of address categories
|
||||
(suburb, city, state, etc), have a look at the `geocodejson` format.
|
||||
All other formats return classifications according to OSM tagging.
|
||||
There is a much larger set of categories and they are not always consistent,
|
||||
which makes them very hard to work with.
|
||||
|
||||
|
||||
* `extratags=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| extratags | 0 or 1 | 0 |
|
||||
|
||||
Include additional information in the result if available,
|
||||
e.g. wikipedia link, opening hours. (Default: 0)
|
||||
When set to 1, the response include any additional information in the result
|
||||
that is available in the database, e.g. wikipedia link, opening hours.
|
||||
|
||||
|
||||
* `namedetails=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| namedetails | 0 or 1 | 0 |
|
||||
|
||||
Include a list of alternative names in the results. These may include
|
||||
language variants, references, operator and brand. (Default: 0)
|
||||
When set to 1, include a full list of names for the result. These may include
|
||||
language variants, older names, references and brand.
|
||||
|
||||
|
||||
### Language of results
|
||||
|
||||
* `accept-language=<browser language string>`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| accept-language | browser language string | content of "Accept-Language" HTTP header |
|
||||
|
||||
Preferred language order for showing search results, overrides the value
|
||||
specified in the "Accept-Language" HTTP header.
|
||||
Either use a standard RFC2616 accept-language string or a simple
|
||||
comma-separated list of language codes.
|
||||
Preferred language order for showing search results. This may either be
|
||||
a simple comma-separated list of language codes or have the same format
|
||||
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
|
||||
|
||||
### Result limitation
|
||||
!!! tip
|
||||
First-time users of Nominatim tend to be confused that they get different
|
||||
results when using Nominatim in the browser versus in a command-line tool
|
||||
like wget or curl. The command-line tools
|
||||
usually don't send any Accept-Language header, prompting Nominatim
|
||||
to show results in the local language. Browsers on the contratry always
|
||||
send the currently chosen browser language.
|
||||
|
||||
* `zoom=[0-18]`
|
||||
|
||||
Level of detail required for the address. Default: 18. This is a number that
|
||||
### Result restriction
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| zoom | 0-18 | 18 |
|
||||
|
||||
Level of detail required for the address. This is a number that
|
||||
corresponds roughly to the zoom level used in XYZ tile sources in frameworks
|
||||
like Leaflet.js, Openlayers etc.
|
||||
In terms of address details the zoom levels are as follows:
|
||||
@@ -92,41 +136,81 @@ In terms of address details the zoom levels are as follows:
|
||||
5 | state
|
||||
8 | county
|
||||
10 | city
|
||||
14 | suburb
|
||||
12 | town / borough
|
||||
13 | village / suburb
|
||||
14 | neighbourhood
|
||||
15 | any settlement
|
||||
16 | major streets
|
||||
17 | major and minor streets
|
||||
18 | building
|
||||
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
|
||||
|
||||
**`[Python-only]`**
|
||||
|
||||
The layer filter allows to select places by themes.
|
||||
|
||||
The `address` layer contains all places that make up an address:
|
||||
address points with house numbers, streets, inhabited places (suburbs, villages,
|
||||
cities, states etc.) and administrative boundaries.
|
||||
|
||||
The `poi` layer selects all point of interest. This includes classic points
|
||||
of interest like restaurants, shops, hotels but also less obvious features
|
||||
like recycling bins, guideposts or benches.
|
||||
|
||||
The `railway` layer includes railway infrastructure like tracks.
|
||||
Note that in Nominatim's standard configuration, only very few railway
|
||||
features are imported into the database.
|
||||
|
||||
The `natural` layer collects features like rivers, lakes and mountains while
|
||||
the `manmade` layer functions as a catch-all for features not covered by the
|
||||
other layers.
|
||||
|
||||
|
||||
### Polygon output
|
||||
|
||||
* `polygon_geojson=1`
|
||||
* `polygon_kml=1`
|
||||
* `polygon_svg=1`
|
||||
* `polygon_text=1`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| polygon_geojson | 0 or 1 | 0 |
|
||||
| polygon_kml | 0 or 1 | 0 |
|
||||
| polygon_svg | 0 or 1 | 0 |
|
||||
| polygon_text | 0 or 1 | 0 |
|
||||
|
||||
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
|
||||
options can be used at a time. (Default: 0)
|
||||
Add the full geometry of the place to the result output. Output formats
|
||||
in GeoJSON, KML, SVG or WKT are supported. Only one of these
|
||||
options can be used at a time.
|
||||
|
||||
* `polygon_threshold=0.0`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| polygon_threshold | floating-point number | 0.0 |
|
||||
|
||||
Return a simplified version of the output geometry. The parameter is the
|
||||
When one of the polygon_* outputs is chosen, return a simplified version
|
||||
of the output geometry. The parameter describes the
|
||||
tolerance in degrees with which the geometry may differ from the original
|
||||
geometry. Topology is preserved in the result. (Default: 0.0)
|
||||
geometry. Topology is preserved in the geometry.
|
||||
|
||||
|
||||
### Other
|
||||
|
||||
* `email=<valid email address>`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| email | valid email address | _unset_ |
|
||||
|
||||
If you are making a large number of requests, please include an appropriate email
|
||||
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
|
||||
If you are making large numbers of request please include an appropriate email
|
||||
address to identify your requests. See Nominatim's
|
||||
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
|
||||
|
||||
|
||||
* `debug=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| debug | 0 or 1 | 0 |
|
||||
|
||||
Output assorted developer debug information. Data on internals of Nominatim's
|
||||
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
|
||||
This overrides the specified machine readable format. (Default: 0)
|
||||
"search loop" logic, and SQL queries. The output is HTML format.
|
||||
This overrides the specified machine readable format.
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
@@ -8,12 +8,12 @@ The search query may also contain
|
||||
which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
|
||||
This can be used to narrow down the kind of objects to be returned.
|
||||
|
||||
!!! warning
|
||||
!!! note
|
||||
Special phrases are not suitable to query all objects of a certain type in an
|
||||
area. Nominatim will always just return a collection of the best matches. To
|
||||
download OSM data by object type, use the [Overpass API](https://overpass-api.de/).
|
||||
|
||||
## Parameters
|
||||
## Endpoint
|
||||
|
||||
The search API has the following format:
|
||||
|
||||
@@ -21,35 +21,62 @@ The search API has the following format:
|
||||
https://nominatim.openstreetmap.org/search?<params>
|
||||
```
|
||||
|
||||
The search term may be specified with two different sets of parameters:
|
||||
!!! danger "Deprecation warning"
|
||||
The API can also be used with the URL
|
||||
`https://nominatim.openstreetmap.org/search.php`. This is now deprecated
|
||||
and will be removed in future versions.
|
||||
|
||||
* `q=<query>`
|
||||
The query term can be given in two different forms: free-form or structured.
|
||||
|
||||
Free-form query string to search for.
|
||||
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
|
||||
[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
|
||||
[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
|
||||
Commas are optional, but improve performance by reducing the complexity of the search.
|
||||
### Free-form query
|
||||
|
||||
| Parameter | Value |
|
||||
|-----------| ----- |
|
||||
| q | Free-form query string to search for |
|
||||
|
||||
* `street=<housenumber> <streetname>`
|
||||
* `city=<city>`
|
||||
* `county=<county>`
|
||||
* `state=<state>`
|
||||
* `country=<country>`
|
||||
* `postalcode=<postalcode>`
|
||||
In this form, the query can be unstructured.
|
||||
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
|
||||
[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
|
||||
[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
|
||||
Commas are optional, but improve performance by reducing the complexity of the search.
|
||||
|
||||
Alternative query string format split into several parameters for structured requests.
|
||||
Structured requests are faster but are less robust against alternative
|
||||
OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
|
||||
The free-form may also contain special phrases to describe the type of
|
||||
place to be returned or a coordinate to search close to a position.
|
||||
|
||||
Both query forms accept the additional parameters listed below.
|
||||
### Structured query
|
||||
|
||||
| Parameter | Value |
|
||||
|----------- | ----- |
|
||||
| amenity | name and/or type of POI |
|
||||
| street | housenumber and streetname |
|
||||
| city | city |
|
||||
| county | county |
|
||||
| state | state |
|
||||
| country | country |
|
||||
| postalcode | postal code |
|
||||
|
||||
The structured form of the search query allows to lookup up an address
|
||||
that is already split into its components. Each parameter represents a field
|
||||
of the address. All parameters are optional. You should only use the ones
|
||||
that are relevant for the address you want to geocode.
|
||||
|
||||
!!! Attention
|
||||
Cannot be combined with the `q=<query>` parameter. Newer versions of
|
||||
the API will return an error if you do so. Older versions simply return
|
||||
unexpected results.
|
||||
|
||||
## Parameters
|
||||
|
||||
The following parameters can be used to further restrict the search and
|
||||
change the output. They are usable for both forms of the search query.
|
||||
|
||||
### Output format
|
||||
|
||||
* `format=[xml|json|jsonv2|geojson|geocodejson]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
|
||||
|
||||
See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
|
||||
See [Place Output Formats](Output.md) for details on each format.
|
||||
|
||||
!!! note
|
||||
The Nominatim service at
|
||||
@@ -57,52 +84,150 @@ See [Place Output Formats](Output.md) for details on each format. (Default: json
|
||||
has a different default behaviour for historical reasons. When the
|
||||
`format` parameter is omitted, the request will be forwarded to the Web UI.
|
||||
|
||||
* `json_callback=<string>`
|
||||
|
||||
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| json_callback | function name | _unset_ |
|
||||
|
||||
When given, then JSON output will be wrapped in a callback function with
|
||||
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
|
||||
information.
|
||||
|
||||
Only has an effect for JSON output formats.
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| limit | number | 10 |
|
||||
|
||||
Limit the maximum number of returned results. Cannot be more than 40.
|
||||
Nominatim may decide to return less results than given, if additional
|
||||
results do not sufficiently match the query.
|
||||
|
||||
|
||||
### Output details
|
||||
|
||||
* `addressdetails=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| addressdetails | 0 or 1 | 0 |
|
||||
|
||||
Include a breakdown of the address into elements. (Default: 0)
|
||||
When set to 1, include a breakdown of the address into elements.
|
||||
The exact content of the address breakdown depends on the output format.
|
||||
|
||||
!!! tip
|
||||
If you are interested in a stable classification of address categories
|
||||
(suburb, city, state, etc), have a look at the `geocodejson` format.
|
||||
All other formats return classifications according to OSM tagging.
|
||||
There is a much larger set of categories and they are not always consistent,
|
||||
which makes them very hard to work with.
|
||||
|
||||
|
||||
* `extratags=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| extratags | 0 or 1 | 0 |
|
||||
|
||||
Include additional information in the result if available,
|
||||
e.g. wikipedia link, opening hours. (Default: 0)
|
||||
When set to 1, the response include any additional information in the result
|
||||
that is available in the database, e.g. wikipedia link, opening hours.
|
||||
|
||||
|
||||
* `namedetails=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| namedetails | 0 or 1 | 0 |
|
||||
|
||||
Include a list of alternative names in the results. These may include
|
||||
language variants, references, operator and brand. (Default: 0)
|
||||
When set to 1, include a full list of names for the result. These may include
|
||||
language variants, older names, references and brand.
|
||||
|
||||
|
||||
### Language of results
|
||||
|
||||
* `accept-language=<browser language string>`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| accept-language | browser language string | content of "Accept-Language" HTTP header |
|
||||
|
||||
Preferred language order for showing search results, overrides the value
|
||||
specified in the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
|
||||
Either use a standard RFC2616 accept-language string or a simple
|
||||
comma-separated list of language codes.
|
||||
Preferred language order for showing search results. This may either be
|
||||
a simple comma-separated list of language codes or have the same format
|
||||
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
|
||||
|
||||
### Result limitation
|
||||
!!! tip
|
||||
First-time users of Nominatim tend to be confused that they get different
|
||||
results when using Nominatim in the browser versus in a command-line tool
|
||||
like wget or curl. The command-line tools
|
||||
usually don't send any Accept-Language header, prompting Nominatim
|
||||
to show results in the local language. Browsers on the contratry always
|
||||
send the currently chosen browser language.
|
||||
|
||||
* `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`
|
||||
### Result restriction
|
||||
|
||||
Limit search results to one or more countries. `<countrycode>` must be the
|
||||
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
|
||||
e.g. `gb` for the United Kingdom, `de` for Germany.
|
||||
There are two ways to influence the results. *Filters* exclude certain
|
||||
kinds of results completely. *Boost parameters* only change the order of the
|
||||
results and thus give a preference to some results over others.
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| countrycodes | comma-separated list of country codes | _unset_ |
|
||||
|
||||
Filer that limits the search results to one or more countries.
|
||||
The country code must be the
|
||||
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
|
||||
of the country, e.g. `gb` for the United Kingdom, `de` for Germany.
|
||||
|
||||
Each place in Nominatim is assigned to one country code based
|
||||
on OSM country boundaries. In rare cases a place may not be in any country
|
||||
at all, for example, in international waters.
|
||||
at all, for example, when it is in international waters. These places are
|
||||
also excluded when the filter is set.
|
||||
|
||||
* `exclude_place_ids=<place_id,[place_id],[place_id]`
|
||||
!!! Note
|
||||
This parameter should not be confused with the 'country' parameter of
|
||||
the structured query. The 'country' parameter contains a search term
|
||||
and will be handled with some fuzziness. The `countrycodes` parameter
|
||||
is a hard filter and as such should be preferred. Having both parameters
|
||||
in the same query will work. If the parameters contradict each other,
|
||||
the search will come up empty.
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
|
||||
|
||||
**`[Python-only]`**
|
||||
|
||||
The layer filter allows to select places by themes.
|
||||
|
||||
The `address` layer contains all places that make up an address:
|
||||
address points with house numbers, streets, inhabited places (suburbs, villages,
|
||||
cities, states tec.) and administrative boundaries.
|
||||
|
||||
The `poi` layer selects all point of interest. This includes classic POIs like
|
||||
restaurants, shops, hotels but also less obvious features like recycling bins,
|
||||
guideposts or benches.
|
||||
|
||||
The `railway` layer includes railway infrastructure like tracks.
|
||||
Note that in Nominatim's standard configuration, only very few railway
|
||||
features are imported into the database.
|
||||
|
||||
The `natural` layer collects features like rivers, lakes and mountains while
|
||||
the `manmade` layer functions as a catch-all for features not covered by the
|
||||
other layers.
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| featureType | one of: `country`, `state`, `city`, `settlement` | _unset_ |
|
||||
|
||||
The featureType allows to have a more fine-grained selection for places
|
||||
from the address layer. Results can be restricted to places that make up
|
||||
the 'state', 'country' or 'city' part of an address. A featureType of
|
||||
settlement selects any human inhabited feature from 'state' down to
|
||||
'neighbourhood'.
|
||||
|
||||
When featureType is set, then results are automatically restricted
|
||||
to the address layer (see above).
|
||||
|
||||
!!! tip
|
||||
Instead of using the featureType filters `country`, `state` or `city`,
|
||||
you can also use a structured query without the finer-grained parameters
|
||||
amenity or street.
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| exclude_place_ids | comma-separated list of place ids |
|
||||
|
||||
If you do not want certain OSM objects to appear in the search
|
||||
result, give a comma separated list of the `place_id`s you want to skip.
|
||||
@@ -110,180 +235,212 @@ This can be used to retrieve additional search results. For example, if a
|
||||
previous query only returned a few results, then including those here would
|
||||
cause the search to return other, less accurate, matches (if possible).
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| viewbox | `<x1>,<y1>,<x2>,<y2>` | _unset_ |
|
||||
|
||||
* `limit=<integer>`
|
||||
Boost parameter which focuses the search on the given area.
|
||||
Any two corner points of the box are accepted as long as they make a proper
|
||||
box. `x` is longitude, `y` is latitude.
|
||||
|
||||
Limit the number of returned results. (Default: 10, Maximum: 50)
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| bounded | 0 or 1 | 0 |
|
||||
|
||||
When set to 1, then it turns the 'viewbox' parameter (see above) into
|
||||
a filter parameter, excluding any results outside the viewbox.
|
||||
|
||||
* `viewbox=<x1>,<y1>,<x2>,<y2>`
|
||||
|
||||
The preferred area to find search results. Any two corner points of the box
|
||||
are accepted as long as they span a real box. `x` is longitude,
|
||||
`y` is latitude.
|
||||
|
||||
|
||||
* `bounded=[0|1]`
|
||||
|
||||
When a viewbox is given, restrict the result to items contained within that
|
||||
viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
|
||||
only search is allowed. Give the special keyword for the amenity in square
|
||||
When `bounded=1` is given and the viewbox is small enough, then an amenity-only
|
||||
search is allowed. Give the special keyword for the amenity in square
|
||||
brackets, e.g. `[pub]` and a selection of objects of this type is returned.
|
||||
There is no guarantee that the result is complete. (Default: 0)
|
||||
There is no guarantee that the result returns all objects in the area.
|
||||
|
||||
|
||||
### Polygon output
|
||||
|
||||
* `polygon_geojson=1`
|
||||
* `polygon_kml=1`
|
||||
* `polygon_svg=1`
|
||||
* `polygon_text=1`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| polygon_geojson | 0 or 1 | 0 |
|
||||
| polygon_kml | 0 or 1 | 0 |
|
||||
| polygon_svg | 0 or 1 | 0 |
|
||||
| polygon_text | 0 or 1 | 0 |
|
||||
|
||||
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
|
||||
options can be used at a time. (Default: 0)
|
||||
Add the full geometry of the place to the result output. Output formats
|
||||
in GeoJSON, KML, SVG or WKT are supported. Only one of these
|
||||
options can be used at a time.
|
||||
|
||||
* `polygon_threshold=0.0`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| polygon_threshold | floating-point number | 0.0 |
|
||||
|
||||
Return a simplified version of the output geometry. The parameter is the
|
||||
When one of the polygon_* outputs is chosen, return a simplified version
|
||||
of the output geometry. The parameter describes the
|
||||
tolerance in degrees with which the geometry may differ from the original
|
||||
geometry. Topology is preserved in the result. (Default: 0.0)
|
||||
geometry. Topology is preserved in the geometry.
|
||||
|
||||
### Other
|
||||
|
||||
* `email=<valid email address>`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| email | valid email address | _unset_ |
|
||||
|
||||
If you are making large numbers of request please include an appropriate email
|
||||
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
|
||||
address to identify your requests. See Nominatim's
|
||||
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
|
||||
|
||||
* `dedupe=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| dedupe | 0 or 1 | 1 |
|
||||
|
||||
Sometimes you have several objects in OSM identifying the same place or
|
||||
object in reality. The simplest case is a street being split into many
|
||||
different OSM ways due to different characteristics. Nominatim will
|
||||
attempt to detect such duplicates and only return one match unless
|
||||
this parameter is set to 0. (Default: 1)
|
||||
attempt to detect such duplicates and only return one match. Setting
|
||||
this parameter to 0 disables this deduplication mechanism and
|
||||
ensures that all results are returned.
|
||||
|
||||
* `debug=[0|1]`
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| debug | 0 or 1 | 0 |
|
||||
|
||||
Output assorted developer debug information. Data on internals of Nominatim's
|
||||
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
|
||||
This overrides the specified machine readable format. (Default: 0)
|
||||
|
||||
"search loop" logic, and SQL queries. The output is HTML format.
|
||||
This overrides the specified machine readable format.
|
||||
|
||||
|
||||
## Examples
|
||||
|
||||
|
||||
##### XML with kml polygon
|
||||
##### XML with KML polygon
|
||||
|
||||
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)
|
||||
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1)
|
||||
|
||||
```xml
|
||||
<searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
|
||||
<place
|
||||
place_id="1620612" osm_type="node" osm_id="452010817"
|
||||
boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
|
||||
lat="52.5487429714954" lon="-1.81602098644987"
|
||||
display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
|
||||
class="place" type="house">
|
||||
<geokml>
|
||||
<Polygon>
|
||||
<outerBoundaryIs>
|
||||
<LinearRing>
|
||||
<coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
|
||||
</LinearRing>
|
||||
</outerBoundaryIs>
|
||||
</Polygon>
|
||||
</geokml>
|
||||
<house_number>135</house_number>
|
||||
<road>Pilkington Avenue</road>
|
||||
<village>Wylde Green</village>
|
||||
<town>Sutton Coldfield</town>
|
||||
<city>City of Birmingham</city>
|
||||
<county>West Midlands (county)</county>
|
||||
<postcode>B72</postcode>
|
||||
<country>United Kingdom</country>
|
||||
<country_code>gb</country_code>
|
||||
</place>
|
||||
</searchresults>
|
||||
<?xml version="1.0" encoding="UTF-8" ?>
|
||||
<searchresults timestamp="Tue, 08 Aug 2023 15:45:41 +00:00"
|
||||
attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright"
|
||||
querystring="135 pilkington avenue, birmingham"
|
||||
more_url="https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue%2C+birmingham&polygon_kml=1&addressdetails=1&limit=20&exclude_place_ids=125279639&format=xml"
|
||||
exclude_place_ids="125279639">
|
||||
<place place_id="125279639"
|
||||
osm_type="way"
|
||||
osm_id="90394480"
|
||||
lat="52.5487921"
|
||||
lon="-1.8164308"
|
||||
boundingbox="52.5487473,52.5488481,-1.8165130,-1.8163464"
|
||||
place_rank="30"
|
||||
address_rank="30"
|
||||
display_name="135, Pilkington Avenue, Maney, Sutton Coldfield, Wylde Green, Birmingham, West Midlands Combined Authority, England, B72 1LH, United Kingdom"
|
||||
class="building"
|
||||
type="residential"
|
||||
importance="9.999999994736442e-08">
|
||||
<geokml>
|
||||
<Polygon>
|
||||
<outerBoundaryIs>
|
||||
<LinearRing>
|
||||
<coordinates>-1.816513,52.5487566 -1.816434,52.5487473 -1.816429,52.5487629 -1.8163717,52.5487561 -1.8163464,52.5488346 -1.8164599,52.5488481 -1.8164685,52.5488213 -1.8164913,52.548824 -1.816513,52.5487566</coordinates>
|
||||
</LinearRing>
|
||||
</outerBoundaryIs>
|
||||
</Polygon>
|
||||
</geokml>
|
||||
<house_number>135</house_number>
|
||||
<road>Pilkington Avenue</road>
|
||||
<hamlet>Maney</hamlet>
|
||||
<town>Sutton Coldfield</town>
|
||||
<village>Wylde Green</village>
|
||||
<city>Birmingham</city>
|
||||
<ISO3166-2-lvl8>GB-BIR</ISO3166-2-lvl8>
|
||||
<state_district>West Midlands Combined Authority</state_district>
|
||||
<state>England</state>
|
||||
<ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
|
||||
<postcode>B72 1LH</postcode>
|
||||
<country>United Kingdom</country>
|
||||
<country_code>gb</country_code>
|
||||
</place>
|
||||
</searchresults>
|
||||
```
|
||||
|
||||
##### JSON with SVG polygon
|
||||
|
||||
[https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1)
|
||||
[https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1)
|
||||
|
||||
```json
|
||||
{
|
||||
"address": {
|
||||
"city": "Berlin",
|
||||
"city_district": "Mitte",
|
||||
"construction": "Unter den Linden",
|
||||
"continent": "European Union",
|
||||
"country": "Deutschland",
|
||||
"country_code": "de",
|
||||
"house_number": "1",
|
||||
"neighbourhood": "Scheunenviertel",
|
||||
"postcode": "10117",
|
||||
"public_building": "Kommandantenhaus",
|
||||
"state": "Berlin",
|
||||
"suburb": "Mitte"
|
||||
},
|
||||
"boundingbox": [
|
||||
"52.5170783996582",
|
||||
"52.5173187255859",
|
||||
"13.3975105285645",
|
||||
"13.3981599807739"
|
||||
],
|
||||
"class": "amenity",
|
||||
"display_name": "Kommandantenhaus, 1, Unter den Linden, Scheunenviertel, Mitte, Berlin, 10117, Deutschland, European Union",
|
||||
"importance": 0.73606775332943,
|
||||
"lat": "52.51719785",
|
||||
"licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
|
||||
"lon": "13.3978352028938",
|
||||
"osm_id": "15976890",
|
||||
"osm_type": "way",
|
||||
"place_id": "30848715",
|
||||
"svg": "M 13.397511 -52.517283599999999 L 13.397829400000001 -52.517299800000004 13.398131599999999 -52.517315099999998 13.398159400000001 -52.517112099999999 13.3975388 -52.517080700000001 Z",
|
||||
"type": "public_building"
|
||||
}
|
||||
[
|
||||
{
|
||||
"address": {
|
||||
"ISO3166-2-lvl4": "DE-BE",
|
||||
"borough": "Mitte",
|
||||
"city": "Berlin",
|
||||
"country": "Deutschland",
|
||||
"country_code": "de",
|
||||
"historic": "Kommandantenhaus",
|
||||
"house_number": "1",
|
||||
"neighbourhood": "Friedrichswerder",
|
||||
"postcode": "10117",
|
||||
"road": "Unter den Linden",
|
||||
"suburb": "Mitte"
|
||||
},
|
||||
"boundingbox": [
|
||||
"52.5170798",
|
||||
"52.5173311",
|
||||
"13.3975116",
|
||||
"13.3981577"
|
||||
],
|
||||
"class": "historic",
|
||||
"display_name": "Kommandantenhaus, 1, Unter den Linden, Friedrichswerder, Mitte, Berlin, 10117, Deutschland",
|
||||
"importance": 0.8135042058306902,
|
||||
"lat": "52.51720765",
|
||||
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
|
||||
"lon": "13.397834399325466",
|
||||
"osm_id": 15976890,
|
||||
"osm_type": "way",
|
||||
"place_id": 108681845,
|
||||
"svg": "M 13.3975116 -52.5172905 L 13.397549 -52.5170798 13.397715 -52.5170906 13.3977122 -52.5171064 13.3977392 -52.5171086 13.3977417 -52.5170924 13.3979655 -52.5171069 13.3979623 -52.5171233 13.3979893 -52.5171248 13.3979922 -52.5171093 13.3981577 -52.5171203 13.398121 -52.5173311 13.3978115 -52.5173103 Z",
|
||||
"type": "house"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
##### JSON with address details
|
||||
|
||||
[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
|
||||
[https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1](https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1)
|
||||
|
||||
```json
|
||||
{
|
||||
"address": {
|
||||
"bakery": "B\u00e4cker Kamps",
|
||||
"city_district": "Mitte",
|
||||
"continent": "European Union",
|
||||
"country": "Deutschland",
|
||||
"country_code": "de",
|
||||
"footway": "Bahnsteig U6",
|
||||
"neighbourhood": "Sprengelkiez",
|
||||
"postcode": "13353",
|
||||
"state": "Berlin",
|
||||
"suburb": "Wedding"
|
||||
},
|
||||
"boundingbox": [
|
||||
"52.5460929870605",
|
||||
"52.5460968017578",
|
||||
"13.3591794967651",
|
||||
"13.3591804504395"
|
||||
],
|
||||
"class": "shop",
|
||||
"display_name": "B\u00e4cker Kamps, Bahnsteig U6, Sprengelkiez, Wedding, Mitte, Berlin, 13353, Deutschland, European Union",
|
||||
"icon": "https://nominatim.openstreetmap.org/images/mapicons/shopping_bakery.p.20.png",
|
||||
"importance": 0.201,
|
||||
"lat": "52.5460941",
|
||||
"licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
|
||||
"lon": "13.35918",
|
||||
"osm_id": "317179427",
|
||||
"osm_type": "node",
|
||||
"place_id": "1453068",
|
||||
"type": "bakery"
|
||||
}
|
||||
[
|
||||
{
|
||||
"address": {
|
||||
"ISO3166-2-lvl4": "DE-BE",
|
||||
"borough": "Mitte",
|
||||
"city": "Berlin",
|
||||
"country": "Deutschland",
|
||||
"country_code": "de",
|
||||
"neighbourhood": "Sprengelkiez",
|
||||
"postcode": "13347",
|
||||
"road": "Lindower Straße",
|
||||
"shop": "Ditsch",
|
||||
"suburb": "Wedding"
|
||||
},
|
||||
"addresstype": "shop",
|
||||
"boundingbox": [
|
||||
"52.5427201",
|
||||
"52.5427654",
|
||||
"13.3668619",
|
||||
"13.3669442"
|
||||
],
|
||||
"category": "shop",
|
||||
"display_name": "Ditsch, Lindower Straße, Sprengelkiez, Wedding, Mitte, Berlin, 13347, Deutschland",
|
||||
"importance": 9.99999999995449e-06,
|
||||
"lat": "52.54274275",
|
||||
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright",
|
||||
"lon": "13.36690305710228",
|
||||
"name": "Ditsch",
|
||||
"osm_id": 437595031,
|
||||
"osm_type": "way",
|
||||
"place_id": 204751033,
|
||||
"place_rank": 30,
|
||||
"type": "bakery"
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
##### GeoJSON
|
||||
|
||||
@@ -1,35 +1,50 @@
|
||||
# Status
|
||||
|
||||
Useful for checking if the service and database is running. The JSON output also shows
|
||||
Report on the state of the service and database. Useful for checking if the
|
||||
service is up and running. The JSON output also reports
|
||||
when the database was last updated.
|
||||
|
||||
## Endpoint
|
||||
|
||||
The status API has the following format:
|
||||
|
||||
```
|
||||
https://nominatim.openstreetmap.org/status
|
||||
```
|
||||
|
||||
!!! danger "Deprecation warning"
|
||||
The API can also be used with the URL
|
||||
`https://nominatim.openstreetmap.org/status.php`. This is now deprecated
|
||||
and will be removed in future versions.
|
||||
|
||||
|
||||
## Parameters
|
||||
|
||||
* `format=[text|json]` (defaults to 'text')
|
||||
The status endpoint takes a single optional parameter:
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| format | one of: `text`, `json` | 'text' |
|
||||
|
||||
Selects the output format. See below.
|
||||
|
||||
|
||||
## Output
|
||||
|
||||
#### Text format
|
||||
|
||||
```
|
||||
https://nominatim.openstreetmap.org/status.php
|
||||
```
|
||||
When everything is okay, a status code 200 is returned and a simple message: `OK`
|
||||
|
||||
will return HTTP status code 200 and print `OK`.
|
||||
|
||||
On error it will return HTTP status code 500 and print a message, e.g.
|
||||
On error it will return HTTP status code 500 and print a detailed error message, e.g.
|
||||
`ERROR: Database connection failed`.
|
||||
|
||||
|
||||
|
||||
#### JSON format
|
||||
|
||||
```
|
||||
https://nominatim.openstreetmap.org/status.php?format=json
|
||||
```
|
||||
Always returns a HTTP code 200, when the status call could be executed.
|
||||
|
||||
will return HTTP code 200 and a structure
|
||||
On success a JSON dictionary with the following structure is returned:
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -45,8 +60,8 @@ The `software_version` field contains the version of Nominatim used to serve
|
||||
the API. The `database_version` field contains the version of the data format
|
||||
in the database.
|
||||
|
||||
On error will also return HTTP status code 200 and a structure with error
|
||||
code and message, e.g.
|
||||
On error will return a shorter JSON dictionary with the error message
|
||||
and status only, e.g.
|
||||
|
||||
```json
|
||||
{
|
||||
@@ -54,14 +69,3 @@ code and message, e.g.
|
||||
"message": "Database connection failed"
|
||||
}
|
||||
```
|
||||
|
||||
Possible status codes are
|
||||
|
||||
| | message | notes |
|
||||
| --- | ------------------------------ | ----------------------------------------------------------------- |
|
||||
| 700 | "No database" | connection failed |
|
||||
| 701 | "Module failed" | database could not load nominatim.so |
|
||||
| 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
|
||||
| 703 | "Query failed" | test query against a database table failed |
|
||||
| 704 | "No value" | test query worked but returned no results |
|
||||
| 705 | "Import date is not available" | No import dates were returned (enabling replication can fix this) |
|
||||
|
||||
@@ -1,149 +1,439 @@
|
||||
## Configuring the Import
|
||||
|
||||
Which OSM objects are added to the database and which of the tags are used
|
||||
can be configured via the import style configuration file. This
|
||||
is a JSON file which contains a list of rules which are matched against every
|
||||
tag of every object and then assign the tag its specific role.
|
||||
In the very first step of a Nominatim import, OSM data is loaded into the
|
||||
database. Nominatim uses [osm2pgsql](https://osm2pgsql.org) for this task.
|
||||
It comes with a [flex style](https://osm2pgsql.org/doc/manual.html#the-flex-output)
|
||||
specifically tailored to filter and convert OSM data into Nominatim's
|
||||
internal data representation.
|
||||
|
||||
The style to use is given by the `NOMINATIM_IMPORT_STYLE` configuration
|
||||
option. There are a number of default styles, which are explained in detail
|
||||
in the [Import section](../admin/Import.md#filtering-imported-data). These
|
||||
standard styles may be referenced by their name.
|
||||
There are a number of default configurations for the flex style which
|
||||
result in geocoding databases of different detail. The
|
||||
[Import section](../admin/Import.md#filtering-imported-data) explains
|
||||
these default configurations in detail.
|
||||
|
||||
You can also create your own custom style. Put the style file into your
|
||||
project directory and then set `NOMINATIM_IMPORT_STYLE` to the name of the file.
|
||||
It is always recommended to start with one of the standard styles and customize
|
||||
those. You find the standard styles under the name `import-<stylename>.style`
|
||||
those. You find the standard styles under the name `import-<stylename>.lua`
|
||||
in the standard Nominatim configuration path (usually `/etc/nominatim` or
|
||||
`/usr/local/etc/nominatim`).
|
||||
|
||||
The remainder of the page describes the format of the file.
|
||||
The remainder of the page describes how the flex style works and how to
|
||||
customize it.
|
||||
|
||||
### Configuration Rules
|
||||
### The `flex-base.lua` module
|
||||
|
||||
A single rule looks like this:
|
||||
The core of Nominatim's flex import configuration is the `flex-base` module.
|
||||
It defines the table layout used by Nominatim and provides standard
|
||||
implementations for the import callbacks that make it easy to customize
|
||||
how OSM tags are used by Nominatim.
|
||||
|
||||
Every custom style should include this module to make sure that the correct
|
||||
tables are created. Thus start your custom style as follows:
|
||||
|
||||
``` lua
|
||||
local flex = require('flex-base')
|
||||
|
||||
```json
|
||||
{
|
||||
"keys" : ["key1", "key2", ...],
|
||||
"values" : {
|
||||
"value1" : "prop",
|
||||
"value2" : "prop1,prop2"
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
A rule first defines a list of keys to apply the rule to. This is always a list
|
||||
of strings. The string may have four forms. An empty string matches against
|
||||
any key. A string that ends in an asterisk `*` is a prefix match and accordingly
|
||||
matches against any key that starts with the given string (minus the `*`). A
|
||||
suffix match can be defined similarly with a string that starts with a `*`. Any
|
||||
other string constitutes an exact match.
|
||||
The following sections explain how the module can be customized.
|
||||
|
||||
The second part of the rules defines a list of values and the properties that
|
||||
apply to a successful match. Value strings may be either empty, which
|
||||
means that they match any value, or describe an exact match. Prefix
|
||||
or suffix matching of values is not possible.
|
||||
|
||||
For a rule to match, it has to find a valid combination of keys and values. The
|
||||
resulting property is that of the matched values.
|
||||
### Changing the recognized tags
|
||||
|
||||
The rules in a configuration file are processed sequentially and the first
|
||||
match for each tag wins.
|
||||
If you just want to change which OSM tags are recognized during import,
|
||||
then there are a number of convenience functions to set the tag lists used
|
||||
during the processing.
|
||||
|
||||
A rule where key and value are the empty string is special. This defines the
|
||||
fallback when none of the rules match. The fallback is always used as a last
|
||||
resort when nothing else matches, no matter where the rule appears in the file.
|
||||
Defining multiple fallback rules is not allowed. What happens in this case,
|
||||
is undefined.
|
||||
!!! warning
|
||||
There are no built-in defaults for the tag lists, so all the functions
|
||||
need to be called from your style script to fully process the data.
|
||||
Make sure you start from one of the default style and only modify
|
||||
the data you are interested in. You can also derive your style from an
|
||||
existing style by importing the appropriate module, e.g.
|
||||
`local flex = require('import-street')`.
|
||||
|
||||
### Tag Properties
|
||||
Many of the following functions take _key match lists_. These lists can
|
||||
contain three kinds of strings to match against tag keys:
|
||||
A string that ends in an asterisk `*` is a prefix match and accordingly matches
|
||||
against any key that starts with the given string (minus the `*`).
|
||||
A suffix match can be defined similarly with a string that starts with a `*`.
|
||||
Any other string is matched exactly against tag keys.
|
||||
|
||||
One or more of the following properties may be given for each tag:
|
||||
|
||||
* `main`
|
||||
#### `set_main_tags()` - principal tags
|
||||
|
||||
A principal tag. A new row will be added for the object with key and value
|
||||
as `class` and `type`.
|
||||
If a principal or main tag is found on an OSM object, then the object
|
||||
is included in Nominatim's search index. A single object may also have
|
||||
multiple main tags. In that case, the object will be included multiple
|
||||
times in the index, once for each main tag.
|
||||
|
||||
* `with_name`
|
||||
The flex script distinguishes between four types of main tags:
|
||||
|
||||
When the tag is a principal tag (`main` property set): only really add a new
|
||||
row, if there is any name tag found (a reference tag is not sufficient, see
|
||||
below).
|
||||
* __always__: a main tag that is used unconditionally
|
||||
* __named__: consider this main tag only, if the object has a proper name
|
||||
(a reference is not enough, see below).
|
||||
* __named_with_key__: consider this main tag only, when the object has
|
||||
a proper name with a domain prefix. For example, if the main tag is
|
||||
`bridge=yes`, then it will only be added as an extra row, if there is
|
||||
a tag `bridge:name[:XXX]` for the same object. If this property is set,
|
||||
all other names that are not domain-specific are ignored.
|
||||
* __fallback__: use this main tag only, if there is no other main tag.
|
||||
Fallback always implied `named`, i.e. fallbacks are only tried for
|
||||
named objects.
|
||||
|
||||
* `with_name_key`
|
||||
The `set_main_tags()` function takes exactly one table parameter which
|
||||
defines the keys and key/value combinations to include and the kind of
|
||||
main tag. Each lua table key defines an OSM tag key. The value may
|
||||
be a string defining the kind of main key as described above. Then the tag will
|
||||
be considered a main tag for any possible value. To further restrict
|
||||
which values are acceptable, give a table with the permitted values
|
||||
and their kind of main tag. If the table contains a simple value without
|
||||
key, then this is used as default for values that are not listed.
|
||||
|
||||
When the tag is a principal tag (`main` property set): only really add a new
|
||||
row, if there is also a name tag that matches the key of the principal tag.
|
||||
For example, if the main tag is `bridge=yes`, then it will only be added as
|
||||
an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
|
||||
If this property is set, all other names that are not domain-specific are
|
||||
ignored.
|
||||
!!! example
|
||||
``` lua
|
||||
local flex = require('import-full')
|
||||
|
||||
* `fallback`
|
||||
flex.set_main_tags{
|
||||
boundary = {administrative = 'named'},
|
||||
highway = {'always', street_lamp = 'named'},
|
||||
landuse = 'fallback'
|
||||
}
|
||||
```
|
||||
|
||||
When the tag is a principal tag (`main` property set): only really add a new
|
||||
row, when no other principal tags for this object have been found. Only one
|
||||
fallback tag can win for an object.
|
||||
In this example an object with a `boundary` tag will only be included
|
||||
when it has a value of `administrative`. Objects with `highway` tags are
|
||||
always included. However when the value is `street_lamp` then the object
|
||||
must have a name, too. With any other value, the object is included
|
||||
independently of the name. Finally, if a `landuse` tag is present then
|
||||
it will be used independely of the concrete value if neither boundary
|
||||
nor highway tags were found and the object is named.
|
||||
|
||||
* `operator`
|
||||
|
||||
When the tag is a principal tag (`main` property set): also include the
|
||||
`operator` tag in the list of names. This is a special construct for an
|
||||
out-dated tagging practise in OSM. Fuel stations and chain restaurants
|
||||
in particular used to have the name of the chain tagged as `operator`.
|
||||
These days the chain can be more commonly found in the `brand` tag but
|
||||
there is still enough old data around to warrant this special case.
|
||||
#### `set_prefilters()` - ignoring tags
|
||||
|
||||
* `name`
|
||||
Pre-filtering of tags allows to ignore them for any further processing.
|
||||
Thus pre-filtering takes precedence over any other tag processing. This is
|
||||
useful when some specific key/value combinations need to be excluded from
|
||||
processing. When tags are filtered, they may either be deleted completely
|
||||
or moved to `extratags`. Extra tags are saved with the object and returned
|
||||
to the user when requested, but are not used otherwise.
|
||||
|
||||
Add tag to the list of names.
|
||||
`set_prefilters()` takes a table with four optional fields:
|
||||
|
||||
* `ref`
|
||||
* __delete_keys__ is a _key match list_ for tags that should be deleted
|
||||
* __delete_tags__ contains a table of tag keys pointing to a list of tag
|
||||
values. Tags with matching key/value pairs are deleted.
|
||||
* __extra_keys__ is a _key match list_ for tags which should be saved into
|
||||
extratags
|
||||
* __extra_tags__ contains a table of tag keys pointing to a list of tag
|
||||
values. Tags with matching key/value pairs are moved to extratags.
|
||||
|
||||
Add tag to the list of names as a reference. At the moment this only means
|
||||
that the object is not considered to be named for `with_name`.
|
||||
Key list may contain three kinds of strings:
|
||||
A string that ends in an asterisk `*` is a prefix match and accordingly matches
|
||||
against any key that starts with the given string (minus the `*`).
|
||||
A suffix match can be defined similarly with a string that starts with a `*`.
|
||||
Any other string is matched exactly against tag keys.
|
||||
|
||||
* `address`
|
||||
!!! example
|
||||
``` lua
|
||||
local flex = require('import-full')
|
||||
|
||||
Add tag to the list of address tags. If the tag starts with `addr:` or
|
||||
`is_in:`, then this prefix is cut off before adding it to the list.
|
||||
flex.set_prefilters{
|
||||
delete_keys = {'source', 'source:*'},
|
||||
extra_tags = {amenity = {'yes', 'no'}}
|
||||
}
|
||||
flex.set_main_tags{
|
||||
amenity = 'always'
|
||||
}
|
||||
```
|
||||
|
||||
* `postcode`
|
||||
In this example any tags `source` and tags that begin with `source:` are
|
||||
deleted before any other processing is done. Getting rid of frequent tags
|
||||
this way can speed up the import.
|
||||
|
||||
Add the value as a postcode to the address tags. If multiple tags are
|
||||
candidate for postcodes, one wins out and the others are dropped.
|
||||
Tags with `amenity=yes` or `amenity=no` are moved to extratags. Later
|
||||
all tags with an `amenity` key are made a main tag. This effectively means
|
||||
that Nominatim will use all amenity tags except for those with value
|
||||
yes and no.
|
||||
|
||||
* `country`
|
||||
#### `set_name_tags()` - defining names
|
||||
|
||||
Add the value as a country code to the address tags. The value must be a
|
||||
two letter country code, otherwise it is ignored. If there are multiple
|
||||
tags that match, then one wins out and the others are dropped.
|
||||
The flex script distinguishes between two kinds of names:
|
||||
|
||||
* `house`
|
||||
* __main__: the primary names make an object fully searchable.
|
||||
Main tags of type _named_ will only cause the object to be included when
|
||||
such a primary name is present. Primary names are usually those found
|
||||
in the `name` tag and its variants.
|
||||
* __extra__: extra names are still added to the search index but they are
|
||||
alone not sufficient to make an object named.
|
||||
|
||||
If no principle tags can be found for the object, still add the object with
|
||||
`class`=`place` and `type`=`house`. Use this for address nodes that have no
|
||||
other function.
|
||||
`set_name_tags()` takes a table with two optional fields `main` and `extra`.
|
||||
They take _key match lists_ for main and extra names respectively.
|
||||
|
||||
* `interpolation`
|
||||
!!! example
|
||||
``` lua
|
||||
local flex = require('flex-base')
|
||||
|
||||
Add this object as an address interpolation (appears as `class`=`place` and
|
||||
`type`=`houses` in the database).
|
||||
flex.set_main_tags{highway = {traffic_light = 'named'}}
|
||||
flex.set_name_tags{main = {'name', 'name:*'},
|
||||
extra = {'ref'}
|
||||
}
|
||||
```
|
||||
|
||||
* `extra`
|
||||
This example creates a search index over traffic lights but will
|
||||
only include those that have a common name and not those which just
|
||||
have some reference ID from the city.
|
||||
|
||||
Add tag to the list of extra tags.
|
||||
#### `set_address_tags()` - defining address parts
|
||||
|
||||
* `skip`
|
||||
Address tags will be used to build up the address of an object.
|
||||
|
||||
Skip the tag completely. Useful when a custom default fallback is defined
|
||||
or to define exceptions to rules.
|
||||
`set_address_tags()` takes a table with arbitrary fields pointing to
|
||||
_key match lists_. To fields have a special meaning:
|
||||
|
||||
A rule can define as many of these properties for one match as it likes. For
|
||||
example, if the property is `"main,extra"` then the tag will open a new row
|
||||
but also have the tag appear in the list of extra tags.
|
||||
* __main__: defines
|
||||
the tags that make a full address object out of the OSM object. This
|
||||
is usually the housenumber or variants thereof. If a main address tag
|
||||
appears, then the object will always be included, if necessary with a
|
||||
fallback of `place=house`. If the key has a prefix of `addr:` or `is_in:`
|
||||
this will be stripped.
|
||||
|
||||
* __extra__: defines all supplementary tags for addresses, tags like `addr:street`, `addr:city` etc. If the key has a prefix of `addr:` or `is_in:` this will be stripped.
|
||||
|
||||
All other fields will be handled as summary fields. If a key matches the
|
||||
key match list, then its value will be added to the address tags with the
|
||||
name of the field as key. If multiple tags match, then an arbitrary one
|
||||
wins.
|
||||
|
||||
Country tags are handled slightly special. Only tags with a two-letter code
|
||||
are accepted, all other values are discarded.
|
||||
|
||||
!!! example
|
||||
``` lua
|
||||
local flex = require('import-full')
|
||||
|
||||
flex.set_address_tags{
|
||||
main = {'addr:housenumber'},
|
||||
extra = {'addr:*'},
|
||||
postcode = {'postal_code', 'postcode', 'addr:postcode'},
|
||||
country = {'country-code', 'ISO3166-1'}
|
||||
}
|
||||
```
|
||||
|
||||
In this example all tags which begin with `addr:` will be saved in
|
||||
the address tag list. If one of the tags is `addr:housenumber`, the
|
||||
object will fall back to be entered as a `place=house` in the database
|
||||
unless there is another interested main tag to be found.
|
||||
|
||||
Tags with keys `country-code` and `ISO3166-1` are saved with their
|
||||
value under `country` in the address tag list. The same thing happens
|
||||
to postcodes, they will always be saved under the key `postcode` thus
|
||||
normalizing the multitude of keys that are used in the OSM database.
|
||||
|
||||
|
||||
#### `set_unused_handling()` - processing remaining tags
|
||||
|
||||
This function defines what to do with tags that remain after all tags
|
||||
have been classified using the functions above. There are two ways in
|
||||
which the function can be used:
|
||||
|
||||
`set_unused_handling(delete_keys = ..., delete_tags = ...)` deletes all
|
||||
keys that match the descriptions in the parameters and moves all remaining
|
||||
tags into the extratags list.
|
||||
`set_unused_handling(extra_keys = ..., extra_tags = ...)` moves all tags
|
||||
matching the parameters into the extratags list and then deletes the remaining
|
||||
tags. For the format of the parameters see the description in `set_prefilters()`
|
||||
above.
|
||||
|
||||
!!! example
|
||||
``` lua
|
||||
local flex = require('import-full')
|
||||
|
||||
flex.set_address_tags{
|
||||
main = {'addr:housenumber'},
|
||||
extra = {'addr:*', 'tiger:county'}
|
||||
}
|
||||
flex.set_unused_handling{delete_keys = {'tiger:*'}}
|
||||
```
|
||||
|
||||
In this example all remaining tags except those beginning with `tiger:`
|
||||
are moved to the extratags list. Note that it is not possible to
|
||||
already delete the tiger tags with `set_prefilters()` because that
|
||||
would remove tiger:county before the address tags are processed.
|
||||
|
||||
### Customizing osm2pgsql callbacks
|
||||
|
||||
osm2pgsql expects the flex style to implement three callbacks, one process
|
||||
function per OSM type. If you want to implement special handling for
|
||||
certain OSM types, you can override the default implementations provided
|
||||
by the flex-base module.
|
||||
|
||||
#### Changing the relation types to be handled
|
||||
|
||||
The default scripts only allows relations of type `multipolygon`, `boundary`
|
||||
and `waterway`. To add other types relations, set `RELATION_TYPES` for
|
||||
the type to the kind of geometry that should be created. The following
|
||||
kinds of geometries can be used:
|
||||
|
||||
* __relation_as_multipolygon__ creates a (Multi)Polygon from the ways in
|
||||
the relation. If the ways do not form a valid area, then the object is
|
||||
silently discarded.
|
||||
* __relation_as_multiline__ creates a (Multi)LineString from the ways in
|
||||
the relations. Ways are combined as much as possible without any regards
|
||||
to their order in the relation.
|
||||
|
||||
!!! Example
|
||||
``` lua
|
||||
local flex = require('import-full')
|
||||
|
||||
flex.RELATION_TYPES['site'] = flex.relation_as_multipolygon
|
||||
```
|
||||
|
||||
With this line relations of `type=site` will be included in the index
|
||||
according to main tags found. This only works when the site relation
|
||||
resolves to a valid area. Nodes in the site relation are not part of the
|
||||
geometry.
|
||||
|
||||
|
||||
#### Adding additional logic to processing functions
|
||||
|
||||
The default processing functions are also exported by the flex-base module
|
||||
as `process_node`, `process_way` and `process_relation`. These can be used
|
||||
to implement your own processing functions with some additional processing
|
||||
logic.
|
||||
|
||||
!!! Example
|
||||
``` lua
|
||||
local flex = require('import-full')
|
||||
|
||||
function osm2pgsql.process_relation(object)
|
||||
if object.tags.boundary ~= 'administrative' or object.tags.admin_level ~= '2' then
|
||||
flex.process_relation(object)
|
||||
end
|
||||
end
|
||||
```
|
||||
|
||||
This example discards all country-level boundaries and uses standard
|
||||
handling for everything else. This can be useful if you want to use
|
||||
your own custom country boundaries.
|
||||
|
||||
|
||||
### Customizing the main processing function
|
||||
|
||||
The main processing function of the flex style can be found in the function
|
||||
`process_tags`. This function is called for all OSM object kinds and is
|
||||
responsible for filtering the tags and writing out the rows into Postgresql.
|
||||
|
||||
!!! Example
|
||||
``` lua
|
||||
local flex = require('import-full')
|
||||
|
||||
local original_process_tags = flex.process_tags
|
||||
|
||||
function flex.process_tags(o)
|
||||
if o.object.tags.highway ~= nil and o.object.tags.access == 'no' then
|
||||
return
|
||||
end
|
||||
|
||||
original_process_tags(o)
|
||||
end
|
||||
```
|
||||
|
||||
This example shows the most simple customization of the process_tags function.
|
||||
It simply adds some additional processing before running the original code.
|
||||
To do that, first save the original function and then overwrite process_tags
|
||||
from the module. In this example all highways which are not accessible
|
||||
by anyone will be ignored.
|
||||
|
||||
|
||||
#### The `Place` class
|
||||
|
||||
The `process_tags` function receives a Lua object of `Place` type which comes
|
||||
with some handy functions to collect the data necessary for geocoding and
|
||||
writing it into the place table. Always use this object to fill the table.
|
||||
|
||||
The Place class has some attributes which you may access read-only:
|
||||
|
||||
* __object__ is the original OSM object data handed in by osm2pgsql
|
||||
* __admin_level__ is the content of the admin_level tag, parsed into an
|
||||
integer and normalized to a value between 0 and 15
|
||||
* __has_name__ is a boolean indicating if the object has a full name
|
||||
* __names__ is a table with the collected list of name tags
|
||||
* __address__ is a table with the collected list of address tags
|
||||
* __extratags__ is a table with the collected list of additional tags to save
|
||||
|
||||
There are a number of functions to fill these fields. All functions expect
|
||||
a table parameter with fields as indicated in the description.
|
||||
Many of these functions expect match functions which are described in detail
|
||||
further below.
|
||||
|
||||
* __delete{match=...}__ removes all tags that match the match function given
|
||||
in _match_.
|
||||
* __grab_extratags{match=...}__ moves all tags that match the match function
|
||||
given in _match_ into extratags. Returns the number of tags moved.
|
||||
* __clean{delete=..., extra=...}__ deletes all tags that match _delete_ and
|
||||
moves the ones that match _extra_ into extratags
|
||||
* __grab_address_parts{groups=...}__ moves matching tags into the address table.
|
||||
_groups_ must be a group match function. Tags of the group `main` and
|
||||
`extra` are added to the address table as is but with `addr:` and `is_in:`
|
||||
prefixes removed from the tag key. All other groups are added with the
|
||||
group name as key and the value from the tag. Multiple values of the same
|
||||
group overwrite each other. The function returns the number of tags saved
|
||||
from the main group.
|
||||
* __grab_main_parts{groups=...}__ moves matching tags into the name table.
|
||||
_groups_ must be a group match function. If a tags of the group `main` is
|
||||
present, the object will be marked as having a name. Tags of group `house`
|
||||
produce a fallback to `place=house`. This fallback is return by the function
|
||||
if present.
|
||||
|
||||
There are two functions to write a row into the place table. Both functions
|
||||
expect the main tag (key and value) for the row and then use the collected
|
||||
information from the name, address, extratags etc. fields to complete the row.
|
||||
They also have a boolean parameter `save_extra_mains` which defines how any
|
||||
unprocessed tags are handled: when True, the tags will be saved as extratags,
|
||||
when False, they will be simply discarded.
|
||||
|
||||
* __write_row(key, value, save_extra_mains)__ creates a new table row from
|
||||
the current state of the Place object.
|
||||
* __write_place(key, value, mtype, save_extra_mains)__ creates a new row
|
||||
conditionally. When value is nil, the function will attempt to look up the
|
||||
value in the object tags. If value is still nil or mtype is nil, the row
|
||||
is ignored. An mtype of `always` will then always write out the row,
|
||||
a mtype of `named` only, when the object has a full name. When mtype
|
||||
is `named_with_key`, the function checks for a domain name, i.e. a name
|
||||
tag prefixed with the name of the main key. Only if at least one is found,
|
||||
the row will be written. The names are replaced with the domain names found.
|
||||
|
||||
#### Match functions
|
||||
|
||||
The Place functions usually expect either a _match function_ or a
|
||||
_group match function_ to find the tags to apply their function to.
|
||||
|
||||
The __match function__ is a Lua function which takes two parameters,
|
||||
key and value, and returns a boolean to indicate that a tag matches. The
|
||||
flex-base module has a convenience function `tag_match()` to create such a
|
||||
function. It takes a table with two optional fields: `keys` takes a key match
|
||||
list (see above), `tags` takes a table with keys that point to a list of
|
||||
possible values, thus defining key/value matches.
|
||||
|
||||
The __group match function__ is a Lua function which also takes two parameters,
|
||||
key and value, and returns a string indicating to which group or type they
|
||||
belong to. The `tag_group()` can be used to create such a function. It expects
|
||||
a table where the group names are the keys and the values are a key match list.
|
||||
|
||||
|
||||
|
||||
### Using the gazetteer output of osm2pgsql
|
||||
|
||||
Nominatim still allows you to configure the gazetteer output to remain
|
||||
backwards compatible with older imports. It will be automatically used
|
||||
when the style file name ends in `.style`. For documentation of the
|
||||
old import style, please refer to the documentation of older releases
|
||||
of Nominatim. Do not use the gazetteer output for new imports. There is no
|
||||
guarantee that new versions of Nominatim are fully compatible with the
|
||||
gazetteer output.
|
||||
|
||||
### Changing the Style of Existing Databases
|
||||
|
||||
|
||||
@@ -12,7 +12,7 @@ customize them.
|
||||
The main value for importance is derived from page ranking values for Wikipedia
|
||||
pages for a place. For places that do not have their own
|
||||
Wikipedia page, a formula is used that derives a static importance from the
|
||||
places [search rank](../customize/Ranking#search-rank).
|
||||
places [search rank](../customize/Ranking.md#search-rank).
|
||||
|
||||
In a second step, a secondary importance value is added which is meant to
|
||||
represent how well-known the general area is where the place is located. It
|
||||
|
||||
55
docs/customize/SQLite.md
Normal file
55
docs/customize/SQLite.md
Normal file
@@ -0,0 +1,55 @@
|
||||
A Nominatim database can be converted into an SQLite database and used as
|
||||
a read-only source for geocoding queries. This sections describes how to
|
||||
create and use an SQLite database.
|
||||
|
||||
!!! danger
|
||||
This feature is in an experimental state at the moment. Use at your own
|
||||
risk.
|
||||
|
||||
## Installing prerequisites
|
||||
|
||||
To use a SQLite database, you need to install:
|
||||
|
||||
* SQLite (>= 3.30)
|
||||
* Spatialite (> 5.0.0)
|
||||
|
||||
On Ubuntu/Debian, you can run:
|
||||
|
||||
sudo apt install sqlite3 libsqlite3-mod-spatialite libspatialite7
|
||||
|
||||
## Creating a new SQLite database
|
||||
|
||||
Nominatim cannot import directly into SQLite database. Instead you have to
|
||||
first create a geocoding database in PostgreSQL by running a
|
||||
[regular Nominatim import](../admin/Import.md).
|
||||
|
||||
Once this is done, the database can be converted to SQLite with
|
||||
|
||||
nominatim convert -o mydb.sqlite
|
||||
|
||||
This will create a database where all geocoding functions are available.
|
||||
Depending on what functions you need, the database can be made smaller:
|
||||
|
||||
* `--without-reverse` omits indexes only needed for reverse geocoding
|
||||
* `--without-search` omit tables and indexes used for forward search
|
||||
* `--without-details` leaves out extra information only available in the
|
||||
details API
|
||||
|
||||
## Using an SQLite database
|
||||
|
||||
Once you have created the database, you can use it by simply pointing the
|
||||
database DSN to the SQLite file:
|
||||
|
||||
NOMINATIM_DATABASE_DSN=sqlite:dbname=mydb.sqlite
|
||||
|
||||
Please note that SQLite support is only available for the Python frontend. To
|
||||
use the test server with an SQLite database, you therefore need to switch
|
||||
the frontend engine:
|
||||
|
||||
nominatim serve --engine falcon
|
||||
|
||||
You need to install falcon or starlette for this, depending on which engine
|
||||
you choose.
|
||||
|
||||
The CLI query commands and the library interface already use the new Python
|
||||
frontend and therefore work right out of the box.
|
||||
@@ -91,7 +91,7 @@ The option is only used by the Legacy tokenizer and ignored otherwise.
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Tokenizer used for normalizing and parsing queries and names |
|
||||
| **Format:** | string |
|
||||
| **Default:** | legacy |
|
||||
| **Default:** | icu |
|
||||
| **After Changes:** | cannot be changed after import |
|
||||
|
||||
Sets the tokenizer type to use for the import. For more information on
|
||||
@@ -148,29 +148,6 @@ Setting this option to 'yes' means that Nominatim skips reindexing of contained
|
||||
objects when the area becomes too large.
|
||||
|
||||
|
||||
#### NOMINATIM_UPDATE_FORWARD_DEPENDENCIES
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Forward geometry changes to dependet objects |
|
||||
| **Format:** | bool |
|
||||
| **Default:** | no |
|
||||
| **Comment:** | EXPERT ONLY. Must not be enabled after import. |
|
||||
|
||||
The geometry of OSM ways and relations may change when a node that is part
|
||||
of the object is moved around. These changes are not propagated per default.
|
||||
The geometry of ways/relations is only updated the next time that the object
|
||||
itself is touched. When enabling this option, then dependent objects will
|
||||
be marked for update when one of its member objects changes.
|
||||
|
||||
Enabling this option may slow down updates significantly.
|
||||
|
||||
!!! warning
|
||||
If you want to enable this option, it must be set already on import.
|
||||
Do not enable this option on an existing database that was imported with
|
||||
NOMINATIM_UPDATE_FORWARD_DEPENDENCIES=no.
|
||||
Updates will become unusably slow.
|
||||
|
||||
#### NOMINATIM_LANGUAGES
|
||||
|
||||
| Summary | |
|
||||
@@ -575,6 +552,8 @@ used.
|
||||
| **Format:** | boolean |
|
||||
| **Default:** | no |
|
||||
| **After Changes:** | run `nominatim refresh --website` |
|
||||
| **Comment:** | PHP frontend only |
|
||||
|
||||
|
||||
This feature is currently undocumented and potentially broken.
|
||||
|
||||
@@ -587,6 +566,7 @@ This feature is currently undocumented and potentially broken.
|
||||
| **Format:** | integer |
|
||||
| **Default:** | 500 |
|
||||
| **After Changes:** | run `nominatim refresh --website` |
|
||||
| **Comment:** | PHP frontend only |
|
||||
|
||||
This setting defines the threshold over which a name is no longer considered
|
||||
as rare. When searching for places with rare names, only the name is used
|
||||
@@ -627,6 +607,88 @@ with a single query.
|
||||
|
||||
Setting this parameter to 0 disables polygon output completely.
|
||||
|
||||
|
||||
#### NOMINATIM_SEARCH_WITHIN_COUNTRIES
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Disable search for elements that are not in the country grid |
|
||||
| **Format:** | boolean |
|
||||
| **Default:** | no |
|
||||
| **After Changes:** | run `nominatim refresh --website` |
|
||||
| **Comment:** | PHP frontend only |
|
||||
|
||||
Enable to search elements just within countries.
|
||||
|
||||
When enabled, if, despite not finding a point within the static grid of countries, it
|
||||
finds a geometry of a region, do not return the geometry.
|
||||
Return "Unable to geocode" instead.
|
||||
|
||||
|
||||
#### NOMINATIM_SERVE_LEGACY_URLS
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Enable serving via URLs with a .php suffix |
|
||||
| **Format:** | boolean |
|
||||
| **Default:** | yes |
|
||||
| **Comment:** | Python frontend only |
|
||||
|
||||
When enabled, then endpoints are reachable as `/<name>` as well as `/<name>.php`.
|
||||
This can be useful when you want to be backwards-compatible with previous
|
||||
versions of Nominatim.
|
||||
|
||||
|
||||
#### NOMINATIM_API_POOL_SIZE
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Number of parallel database connections per worker |
|
||||
| **Format:** | number |
|
||||
| **Default:** | 10 |
|
||||
| **Comment:** | Python frontend only |
|
||||
|
||||
Sets the maximum number of database connections available for a single instance
|
||||
of Nominatim. When configuring the maximum number of connections that your
|
||||
PostgreSQL database can handle, you need at least
|
||||
`NOMINATIM_API_POOL_SIZE` * `<number of configured workers>` connections.
|
||||
For configuring the number of workers, refer to the section about
|
||||
[Deploying the Python frontend](../admin/Deployment-Python.md).
|
||||
|
||||
#### NOMINATIM_QUERY_TIMEOUT
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Timeout for SQL queries to the database |
|
||||
| **Format:** | number (seconds) |
|
||||
| **Default:** | 10 |
|
||||
| **Comment:** | Python frontend only |
|
||||
|
||||
When this timeout is set, then all SQL queries that run longer than the
|
||||
specified numbers of seconds will be cancelled and the user receives a
|
||||
timeout exceptions. Users of the API see a 503 HTTP error.
|
||||
|
||||
The timeout does ont apply when using the
|
||||
[low-level DB access](../library/Low-Level-DB-Access.md)
|
||||
of the library. A timeout can be manually set, if required.
|
||||
|
||||
|
||||
#### NOMINATIM_REQUEST_TIMEOUT
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Timeout for search queries |
|
||||
| **Format:** | number (seconds) |
|
||||
| **Default:** | 60 |
|
||||
| **Comment:** | Python frontend only |
|
||||
|
||||
When this timeout is set, a search query will finish sending queries
|
||||
to the database after the timeout has passed and immediately return the
|
||||
results gathered so far.
|
||||
|
||||
Note that under high load you may observe that users receive different results
|
||||
than usual without seeing an error. This may cause some confusion.
|
||||
|
||||
### Logging Settings
|
||||
|
||||
#### NOMINATIM_LOG_DB
|
||||
@@ -666,7 +728,24 @@ The entries in the log file have the following format:
|
||||
<request time> <execution time in s> <number of results> <type> "<query string>"
|
||||
|
||||
Request time is the time when the request was started. The execution time is
|
||||
given in ms and corresponds to the time the query took executing in PHP.
|
||||
given in seconds and corresponds to the time the query took executing in PHP.
|
||||
type contains the name of the endpoint used.
|
||||
|
||||
Can be used as the same time as NOMINATIM_LOG_DB.
|
||||
|
||||
#### NOMINATIM_DEBUG_SQL
|
||||
|
||||
| Summary | |
|
||||
| -------------- | --------------------------------------------------- |
|
||||
| **Description:** | Enable printing of raw SQL by SQLAlchemy |
|
||||
| **Format:** | boolean |
|
||||
| **Default:** | no |
|
||||
| **Comment:** | **For developers only.** |
|
||||
|
||||
This settings enables
|
||||
[SQL debugging](https://docs.sqlalchemy.org/en/20/core/engines.html#dbengine-logging)
|
||||
by SQLAlchemy. This can be helpful when debugging some bugs with internal
|
||||
query handling. It should only be used together with the CLI query functions.
|
||||
Enabling it for server mode may have unintended consequences. Use the `debug`
|
||||
parameter instead, which prints information on how the search is executed
|
||||
including SQL statements.
|
||||
|
||||
@@ -102,7 +102,7 @@ Here is an example configuration file:
|
||||
``` yaml
|
||||
normalization:
|
||||
- ":: lower ()"
|
||||
- "ß > 'ss'" # German szet is unimbigiously equal to double ss
|
||||
- "ß > 'ss'" # German szet is unambiguously equal to double ss
|
||||
transliteration:
|
||||
- !include /etc/nominatim/icu-rules/extended-unicode-to-asccii.yaml
|
||||
- ":: Ascii ()"
|
||||
@@ -128,7 +128,7 @@ The configuration file contains four sections:
|
||||
The normalization and transliteration sections each define a set of
|
||||
ICU rules that are applied to the names.
|
||||
|
||||
The **normalisation** rules are applied after sanitation. They should remove
|
||||
The **normalization** rules are applied after sanitation. They should remove
|
||||
any information that is not relevant for search at all. Usual rules to be
|
||||
applied here are: lower-casing, removing of special characters, cleanup of
|
||||
spaces.
|
||||
@@ -176,52 +176,66 @@ The following is a list of sanitizers that are shipped with Nominatim.
|
||||
##### split-name-list
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.split_name_list
|
||||
selection:
|
||||
options:
|
||||
members: False
|
||||
rendering:
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
##### strip-brace-terms
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.strip_brace_terms
|
||||
selection:
|
||||
options:
|
||||
members: False
|
||||
rendering:
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
##### tag-analyzer-by-language
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
|
||||
selection:
|
||||
options:
|
||||
members: False
|
||||
rendering:
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
##### clean-housenumbers
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.clean_housenumbers
|
||||
selection:
|
||||
options:
|
||||
members: False
|
||||
rendering:
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
##### clean-postcodes
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.clean_postcodes
|
||||
selection:
|
||||
options:
|
||||
members: False
|
||||
rendering:
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
##### clean-tiger-tags
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.clean_tiger_tags
|
||||
selection:
|
||||
options:
|
||||
members: False
|
||||
rendering:
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
#### delete-tags
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.delete_tags
|
||||
options:
|
||||
members: False
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
#### tag-japanese
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.tag_japanese
|
||||
options:
|
||||
members: False
|
||||
heading_level: 6
|
||||
docstring_section_style: spacy
|
||||
|
||||
#### Token Analysis
|
||||
|
||||
@@ -380,7 +394,7 @@ The analyzer cannot be customized.
|
||||
##### Postcode token analyzer
|
||||
|
||||
The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
|
||||
a 'lookup' varaint of the token, which produces variants with optional
|
||||
a 'lookup' variant of the token, which produces variants with optional
|
||||
spaces. Use together with the clean-postcodes sanitizer.
|
||||
|
||||
The analyzer cannot be customized.
|
||||
|
||||
@@ -36,12 +36,19 @@ It has the following additional requirements:
|
||||
* [mypy](http://mypy-lang.org/) (plus typing information for external libs)
|
||||
* [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9)
|
||||
* [pytest](https://pytest.org)
|
||||
* [pytest-asyncio](https://pytest-asyncio.readthedocs.io)
|
||||
|
||||
For testing the Python search frontend, you need to install extra dependencies
|
||||
depending on your choice of webserver framework:
|
||||
|
||||
* [httpx](https://www.python-httpx.org/) (starlette only)
|
||||
* [asgi-lifespan](https://github.com/florimondmanca/asgi-lifespan) (starlette only)
|
||||
|
||||
The documentation is built with mkdocs:
|
||||
|
||||
* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
|
||||
* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.16
|
||||
* [mkdocstrings-python-legacy](https://mkdocstrings.github.io/python-legacy/)
|
||||
* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
|
||||
* [mkdocstrings-python](https://mkdocstrings.github.io/python/)
|
||||
|
||||
### Installing prerequisites on Ubuntu/Debian
|
||||
|
||||
@@ -55,8 +62,10 @@ To install all necessary packages run:
|
||||
sudo apt install php-cgi phpunit php-codesniffer \
|
||||
python3-pip python3-setuptools python3-dev
|
||||
|
||||
pip3 install --user behave mkdocs mkdocstrings pytest pylint \
|
||||
mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil
|
||||
pip3 install --user behave mkdocs mkdocstrings pytest pytest-asyncio pylint \
|
||||
mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \
|
||||
types-ujson types-requests types-Pygments typing-extensions\
|
||||
httpx asgi-lifespan
|
||||
```
|
||||
|
||||
The `mkdocs` executable will be located in `.local/bin`. You may have to add
|
||||
|
||||
@@ -53,8 +53,7 @@ the function.
|
||||
### Sanitizer configuration
|
||||
|
||||
::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
|
||||
rendering:
|
||||
show_source: no
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
### The main filter function of the sanitizer
|
||||
@@ -62,12 +61,10 @@ the function.
|
||||
The filter function receives a single object of type `ProcessInfo`
|
||||
which has with three members:
|
||||
|
||||
* `place`: read-only information about the place being processed.
|
||||
* `place: PlaceInfo`: read-only information about the place being processed.
|
||||
See PlaceInfo below.
|
||||
* `names`: The current list of names for the place. Each name is a
|
||||
PlaceName object.
|
||||
* `address`: The current list of address names for the place. Each name
|
||||
is a PlaceName object.
|
||||
* `names: List[PlaceName]`: The current list of names for the place.
|
||||
* `address: List[PlaceName]`: The current list of address names for the place.
|
||||
|
||||
While the `place` member is provided for information only, the `names` and
|
||||
`address` lists are meant to be manipulated by the sanitizer. It may add and
|
||||
@@ -77,16 +74,14 @@ adding extra attributes) or completely replace the list with a different one.
|
||||
#### PlaceInfo - information about the place
|
||||
|
||||
::: nominatim.data.place_info.PlaceInfo
|
||||
rendering:
|
||||
show_source: no
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
|
||||
#### PlaceName - extended naming information
|
||||
|
||||
::: nominatim.data.place_name.PlaceName
|
||||
rendering:
|
||||
show_source: no
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
|
||||
@@ -134,7 +129,7 @@ sanitizers:
|
||||
!!! warning
|
||||
This example is just a simplified show case on how to create a sanitizer.
|
||||
It is not really read for real-world use: while the sanitizer would
|
||||
correcly transform `West 5th Street` into `5th Street`. it would also
|
||||
correctly transform `West 5th Street` into `5th Street`. it would also
|
||||
shorten a simple `North Street` to `Street`.
|
||||
|
||||
For more sanitizer examples, have a look at the sanitizers provided by Nominatim.
|
||||
@@ -145,14 +140,12 @@ They can be found in the directory
|
||||
## Custom token analysis module
|
||||
|
||||
::: nominatim.tokenizer.token_analysis.base.AnalysisModule
|
||||
rendering:
|
||||
show_source: no
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
|
||||
::: nominatim.tokenizer.token_analysis.base.Analyzer
|
||||
rendering:
|
||||
show_source: no
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
### Example: Creating acronym variants for long names
|
||||
|
||||
@@ -10,7 +10,7 @@ There are two kind of tests in this test suite. There are functional tests
|
||||
which test the API interface using a BDD test framework and there are unit
|
||||
tests for specific PHP functions.
|
||||
|
||||
This test directory is sturctured as follows:
|
||||
This test directory is structured as follows:
|
||||
|
||||
```
|
||||
-+- bdd Functional API tests
|
||||
@@ -84,6 +84,8 @@ The tests can be configured with a set of environment variables (`behave -D key=
|
||||
* `TEST_DB` - name of test database (db tests)
|
||||
* `API_TEST_DB` - name of the database containing the API test data (api tests)
|
||||
* `API_TEST_FILE` - OSM file to be imported into the API test database (api tests)
|
||||
* `API_ENGINE` - webframe to use for running search queries, same values as
|
||||
`nominatim serve --engine` parameter
|
||||
* `DB_HOST` - (optional) hostname of database host
|
||||
* `DB_PORT` - (optional) port of database on host
|
||||
* `DB_USER` - (optional) username of database login
|
||||
@@ -120,7 +122,7 @@ and compromises the following data:
|
||||
API tests should only be testing the functionality of the website PHP code.
|
||||
Most tests should be formulated as BDD DB creation tests (see below) instead.
|
||||
|
||||
#### Code Coverage
|
||||
#### Code Coverage (PHP engine only)
|
||||
|
||||
The API tests also support code coverage tests. You need to install
|
||||
[PHP_CodeCoverage](https://github.com/sebastianbergmann/php-code-coverage).
|
||||
@@ -153,7 +155,3 @@ needs superuser rights for postgres.
|
||||
|
||||
These tests check that data is imported correctly into the place table. They
|
||||
use the same template database as the DB Creation tests, so the same remarks apply.
|
||||
|
||||
Note that most testing of the gazetteer output of osm2pgsql is done in the tests
|
||||
of osm2pgsql itself. The BDD tests are just there to ensure compatibility of
|
||||
the osm2pgsql and Nominatim code.
|
||||
|
||||
@@ -134,14 +134,14 @@ All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
|
||||
and implement the abstract functions defined there.
|
||||
|
||||
::: nominatim.tokenizer.base.AbstractTokenizer
|
||||
rendering:
|
||||
heading_level: 4
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
### Python Analyzer Class
|
||||
|
||||
::: nominatim.tokenizer.base.AbstractAnalyzer
|
||||
rendering:
|
||||
heading_level: 4
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
### PL/pgSQL Functions
|
||||
|
||||
@@ -189,6 +189,28 @@ a house number token text. If a place has multiple house numbers they must
|
||||
be listed with a semicolon as delimiter. Must be NULL when the place has no
|
||||
house numbers.
|
||||
|
||||
```sql
|
||||
FUNCTION token_is_street_address(info JSONB) RETURNS BOOLEAN
|
||||
```
|
||||
|
||||
Return true if this is an object that should be parented against a street.
|
||||
Only relevant for objects with address rank 30.
|
||||
|
||||
```sql
|
||||
FUNCTION token_has_addr_street(info JSONB) RETURNS BOOLEAN
|
||||
```
|
||||
|
||||
Return true if there are street names to match against for finding the
|
||||
parent of the object.
|
||||
|
||||
|
||||
```sql
|
||||
FUNCTION token_has_addr_place(info JSONB) RETURNS BOOLEAN
|
||||
```
|
||||
|
||||
Return true if there are place names to match against for finding the
|
||||
parent of the object.
|
||||
|
||||
```sql
|
||||
FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[]) RETURNS BOOLEAN
|
||||
```
|
||||
|
||||
@@ -18,7 +18,7 @@ elseif (has 'addr:place'?) then (yes)
|
||||
**with same name**;
|
||||
kill
|
||||
else (no)
|
||||
:add addr:place to adress;
|
||||
:add addr:place to address;
|
||||
:**Use closest place**\n**rank 16 to 25**;
|
||||
kill
|
||||
endif
|
||||
|
||||
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 9.8 KiB After Width: | Height: | Size: 9.8 KiB |
@@ -2,6 +2,10 @@
|
||||
display: none!important
|
||||
}
|
||||
|
||||
.wy-nav-content {
|
||||
max-width: 900px!important
|
||||
}
|
||||
|
||||
table {
|
||||
margin-bottom: 12pt
|
||||
}
|
||||
@@ -16,9 +20,17 @@ th {
|
||||
|
||||
.doc-object h6 {
|
||||
margin-bottom: 0.8em;
|
||||
font-size: 120%;
|
||||
font-size: 130%;
|
||||
}
|
||||
|
||||
.doc-object {
|
||||
margin-bottom: 1.3em;
|
||||
}
|
||||
|
||||
.doc-children .doc-contents {
|
||||
margin-left: 3em;
|
||||
}
|
||||
|
||||
.md-footer__inner {
|
||||
display: none;
|
||||
}
|
||||
|
||||
@@ -1,10 +1,15 @@
|
||||
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).
|
||||
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and
|
||||
address and to generate synthetic addresses of OSM points (reverse geocoding).
|
||||
It has also limited capability to search features by their type
|
||||
(pubs, hotels, churches, etc).
|
||||
|
||||
This guide comes in four parts:
|
||||
This guide comes in five parts:
|
||||
|
||||
* __[API reference](api/Overview.md)__ for users of Nominatim
|
||||
* __[Administration Guide](admin/Installation.md)__ for those who want
|
||||
to install their own Nominatim server
|
||||
* __[Customization Guide](customize/Overview.md)__ for those who want to
|
||||
adapt their own installation to their special requirements
|
||||
* __[Library Guide](library/Getting-Started.md)__ for Python developers who
|
||||
want to use Nominatim as a library in their project
|
||||
* __[Developer's Guide](develop/overview.md)__ for developers of the software
|
||||
|
||||
31
docs/library/Configuration.md
Normal file
31
docs/library/Configuration.md
Normal file
@@ -0,0 +1,31 @@
|
||||
# Configuration
|
||||
|
||||
When using Nominatim through the library, it can be configured in exactly
|
||||
the same way as when running as a service. This means that you should have
|
||||
created a [project directory](../admin/Import.md#creating-the-project-directory)
|
||||
which contains all files belonging to the Nominatim instance. It can also contain
|
||||
an `.env` file with configuration options. Setting configuration parameters
|
||||
via environment variables works as well.
|
||||
|
||||
Configuration options are resolved in the following order:
|
||||
|
||||
* from the OS environment (or the dictionary given in `environ`,
|
||||
(see NominatimAPI.md#nominatim.api.core.NominatimAPI.__init__)
|
||||
* from the .env file in the project directory of the installation
|
||||
* from the default installation in the configuration directory
|
||||
|
||||
For more information on configuration via dotenv and a list of possible
|
||||
configuration parameters, see the [Configuration page](../customize/Settings.md).
|
||||
|
||||
|
||||
## `Configuration` class
|
||||
|
||||
::: nominatim.config.Configuration
|
||||
options:
|
||||
members:
|
||||
- get_bool
|
||||
- get_int
|
||||
- get_str_list
|
||||
- get_path
|
||||
heading_level: 6
|
||||
show_signature_annotations: True
|
||||
248
docs/library/Getting-Started.md
Normal file
248
docs/library/Getting-Started.md
Normal file
@@ -0,0 +1,248 @@
|
||||
# Getting Started
|
||||
|
||||
The Nominatim search frontend can directly be used as a Python library in
|
||||
scripts and applications. When you have imported your own Nominatim database,
|
||||
then it is no longer necessary to run a full web service for it and access
|
||||
the database through http requests. There are
|
||||
also less constraints on the kinds of data that can be accessed. The library
|
||||
allows to get access to more detailed information about the objects saved
|
||||
in the database.
|
||||
|
||||
!!! danger
|
||||
The library interface is currently in an experimental stage. There might
|
||||
be some smaller adjustments to the public interface until the next version.
|
||||
|
||||
The library also misses a proper installation routine, so some manipulation
|
||||
of the PYTHONPATH is required. At the moment, use is only recommended for
|
||||
developers with some experience in Python.
|
||||
|
||||
## Installation
|
||||
|
||||
To use the Nominatim library, you need access to a local Nominatim database.
|
||||
Follow the [installation](../admin/Installation.md) and
|
||||
[import](../admin/Import.md) instructions to set up your database.
|
||||
|
||||
It is not yet possible to install it in the usual way via pip or inside a
|
||||
virtualenv. To get access to the library you need to set an appropriate
|
||||
`PYTHONPATH`. With the default installation, the python library can be found
|
||||
under `/usr/local/share/nominatim/lib-python`. If you have installed
|
||||
Nominatim under a different prefix, adapt the `/usr/local/` part accordingly.
|
||||
You can also point the `PYTHONPATH` to the Nominatim source code.
|
||||
|
||||
### A simple search example
|
||||
|
||||
To query the Nominatim database you need to first set up a connection. This
|
||||
is done by creating an Nominatim API object. This object exposes all the
|
||||
search functions of Nominatim that are also known from its web API.
|
||||
|
||||
This code snippet implements a simple search for the town of 'Brugge':
|
||||
|
||||
!!! example
|
||||
=== "NominatimAPIAsync"
|
||||
``` python
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
|
||||
import nominatim.api as napi
|
||||
|
||||
async def search(query):
|
||||
api = napi.NominatimAPIAsync(Path('.'))
|
||||
|
||||
return await api.search(query)
|
||||
|
||||
results = asyncio.run(search('Brugge'))
|
||||
if not results:
|
||||
print('Cannot find Brugge')
|
||||
else:
|
||||
print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
|
||||
```
|
||||
|
||||
=== "NominatimAPI"
|
||||
``` python
|
||||
from pathlib import Path
|
||||
|
||||
import nominatim.api as napi
|
||||
|
||||
api = napi.NominatimAPI(Path('.'))
|
||||
|
||||
results = api.search('Brugge')
|
||||
|
||||
if not results:
|
||||
print('Cannot find Brugge')
|
||||
else:
|
||||
print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
|
||||
```
|
||||
|
||||
The Nominatim library is designed around
|
||||
[asyncio](https://docs.python.org/3/library/asyncio.html). `NominatimAPIAsync`
|
||||
provides you with an interface of coroutines.
|
||||
If you have many requests to make, coroutines can speed up your applications
|
||||
significantly.
|
||||
|
||||
For smaller scripts there is also a synchronous wrapper around the API. By
|
||||
using `NominatimAPI`, you get exactly the same interface using classic functions.
|
||||
|
||||
The examples in this chapter will always show-case both
|
||||
implementations. The documentation itself will usually refer only to
|
||||
'Nominatim API class' when both flavours are meant. If a functionality is
|
||||
available only for the synchronous or asynchronous version, this will be
|
||||
explicitly mentioned.
|
||||
|
||||
### Defining which database to use
|
||||
|
||||
The [Configuration](../admin/Import.md#configuration-setup-in-env)
|
||||
section explains how Nominatim is configured using the
|
||||
[dotenv](https://github.com/theskumar/python-dotenv) library.
|
||||
The same configuration mechanism is used with the
|
||||
Nominatim API library. You should therefore be sure you are familiar with
|
||||
the section.
|
||||
|
||||
The constructor of the 'Nominatim API class' takes one mandatory parameter:
|
||||
the path to the [project directory](../admin/Import.md#creating-the-project-directory).
|
||||
You should have set up this directory as part of the Nominatim import.
|
||||
Any configuration found in the `.env` file in this directory will automatically
|
||||
used.
|
||||
|
||||
Yo may also configure Nominatim be setting environment variables.
|
||||
Normally, Nominatim will check the operating system environment. This can be
|
||||
overwritten by giving the constructor a dictionary of configuration parameters.
|
||||
|
||||
Let us look up 'Brugge' in the special database named 'belgium' instead of the
|
||||
standard 'nominatim' database:
|
||||
|
||||
!!! example
|
||||
=== "NominatimAPIAsync"
|
||||
``` python
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
|
||||
import nominatim.api as napi
|
||||
|
||||
config_params = {
|
||||
'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
|
||||
}
|
||||
|
||||
async def search(query):
|
||||
api = napi.NominatimAPIAsync(Path('.'), environ=config_params)
|
||||
|
||||
return await api.search(query)
|
||||
|
||||
results = asyncio.run(search('Brugge'))
|
||||
```
|
||||
|
||||
=== "NominatimAPI"
|
||||
``` python
|
||||
from pathlib import Path
|
||||
|
||||
import nominatim.api as napi
|
||||
|
||||
config_params = {
|
||||
'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
|
||||
}
|
||||
|
||||
api = napi.NominatimAPI(Path('.'), environ=config_params)
|
||||
|
||||
results = api.search('Brugge')
|
||||
```
|
||||
|
||||
### Presenting results to humans
|
||||
|
||||
All search functions return the raw results from the database. There is no
|
||||
full human-readable label. To create such a label, you need two things:
|
||||
|
||||
* the address details of the place
|
||||
* adapt the result to the language you wish to use for display
|
||||
|
||||
Again searching for 'Brugge', this time with a nicely formatted result:
|
||||
|
||||
!!! example
|
||||
=== "NominatimAPIAsync"
|
||||
``` python
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
|
||||
import nominatim.api as napi
|
||||
|
||||
async def search(query):
|
||||
api = napi.NominatimAPIAsync(Path('.'))
|
||||
|
||||
return await api.search(query, address_details=True)
|
||||
|
||||
results = asyncio.run(search('Brugge'))
|
||||
|
||||
locale = napi.Locales(['fr', 'en'])
|
||||
for i, result in enumerate(results):
|
||||
address_parts = result.address_rows.localize(locale)
|
||||
print(f"{i + 1}. {', '.join(address_parts)}")
|
||||
```
|
||||
|
||||
=== "NominatimAPI"
|
||||
``` python
|
||||
from pathlib import Path
|
||||
|
||||
import nominatim.api as napi
|
||||
|
||||
api = napi.NominatimAPI(Path('.'))
|
||||
|
||||
results = api.search('Brugge', address_details=True)
|
||||
|
||||
locale = napi.Locales(['fr', 'en'])
|
||||
for i, result in enumerate(results):
|
||||
address_parts = result.address_rows.localize(locale)
|
||||
print(f"{i + 1}. {', '.join(address_parts)}")
|
||||
```
|
||||
|
||||
To request information about the address of a result, add the optional
|
||||
parameter 'address_details' to your search:
|
||||
|
||||
``` python
|
||||
>>> results = api.search('Brugge', address_details=True)
|
||||
```
|
||||
|
||||
An additional field `address_rows` will set in results that are returned.
|
||||
It contains a list of all places that make up the address of the place. For
|
||||
simplicity, this includes name and house number of the place itself. With
|
||||
the names in this list it is possible to create a human-readable description
|
||||
of the result. To do that, you first need to decide in which language the
|
||||
results should be presented. As with the names in the result itself, the
|
||||
places in `address_rows` contain all possible name translation for each row.
|
||||
|
||||
The library has a helper class `Locale` which helps extracting a name of a
|
||||
place in the preferred language. It takes a single parameter with a list
|
||||
of language codes in the order of preference. So
|
||||
|
||||
``` python
|
||||
locale = napi.Locale(['fr', 'en'])
|
||||
```
|
||||
|
||||
creates a helper class that returns the name preferably in French. If that is
|
||||
not possible, it tries English and eventually falls back to the default `name`
|
||||
or `ref`.
|
||||
|
||||
The `Locale` object can be applied to a name dictionary to return the best-matching
|
||||
name out of it:
|
||||
|
||||
``` python
|
||||
>>> print(locale.display_name(results[0].names))
|
||||
'Brugges'
|
||||
```
|
||||
|
||||
The `address_row` field has a helper function to apply the function to all
|
||||
its members and save the result in the `local_name` field. It also returns
|
||||
all the localized names as a convenient simple list. This list can be used
|
||||
to create a human-readable output:
|
||||
|
||||
``` python
|
||||
>>> address_parts = results[0].address_rows.localize(locale)
|
||||
>>> print(', '.join(address_parts))
|
||||
Bruges, Flandre-Occidentale, Flandre, Belgique
|
||||
```
|
||||
|
||||
This is a fairly simple way to create a human-readable description. The
|
||||
place information in `address_rows` contains further information about each
|
||||
place. For example, which OSM `adlin_level` was used, what category the place
|
||||
belongs to or what rank Nominatim has assigned. Use this to adapt the output
|
||||
to local address formats.
|
||||
|
||||
For more information on address rows, see
|
||||
[detailed address description](Result-Handling.md#detailed-address-description).
|
||||
62
docs/library/Input-Parameter-Types.md
Normal file
62
docs/library/Input-Parameter-Types.md
Normal file
@@ -0,0 +1,62 @@
|
||||
# Input Parameter Types
|
||||
|
||||
This page describes in more detail some of the input parameter types used
|
||||
in the query functions of the API object.
|
||||
|
||||
## Place identification
|
||||
|
||||
The [details](NominatimAPI.md#nominatim.api.core.NominatimAPI.details) and
|
||||
[lookup](NominatimAPI.md#nominatim.api.core.NominatimAPI.lookup) functions
|
||||
require references to places in the database. Below the possible
|
||||
types for place identification are listed. All types are dataclasses.
|
||||
|
||||
### PlaceID
|
||||
|
||||
::: nominatim.api.PlaceID
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
### OsmID
|
||||
|
||||
::: nominatim.api.OsmID
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
## Geometry types
|
||||
|
||||
::: nominatim.api.GeometryFormat
|
||||
options:
|
||||
heading_level: 6
|
||||
members_order: source
|
||||
|
||||
## Geometry input
|
||||
|
||||
### Point
|
||||
|
||||
::: nominatim.api.Point
|
||||
options:
|
||||
heading_level: 6
|
||||
show_signature_annotations: True
|
||||
|
||||
### Bbox
|
||||
|
||||
::: nominatim.api.Bbox
|
||||
options:
|
||||
heading_level: 6
|
||||
show_signature_annotations: True
|
||||
members_order: source
|
||||
group_by_category: False
|
||||
|
||||
## Layers
|
||||
|
||||
Layers allow to restrict the search result to thematic groups. This is
|
||||
orthogonal to restriction by address ranks, which groups places by their
|
||||
geographic extent.
|
||||
|
||||
|
||||
::: nominatim.api.DataLayer
|
||||
options:
|
||||
heading_level: 6
|
||||
members_order: source
|
||||
|
||||
|
||||
56
docs/library/Low-Level-DB-Access.md
Normal file
56
docs/library/Low-Level-DB-Access.md
Normal file
@@ -0,0 +1,56 @@
|
||||
# Low-level connections
|
||||
|
||||
The `NominatimAPIAsync` class allows to directly access the underlying
|
||||
database connection to explore the raw data. Nominatim uses
|
||||
[SQLAlchemy](https://docs.sqlalchemy.org/) for building queries. Please
|
||||
refer to the documentation of the library to understand how to write SQL.
|
||||
|
||||
To get access to a search connection, use the `begin()` function of your
|
||||
API object. This returns a `SearchConnection` object described below
|
||||
wrapped in a context manager. Its
|
||||
`t` property has definitions for all Nominatim search tables. For an
|
||||
overview of available tables, refer to the
|
||||
[Development Layout](../develop/Database-Layout.md) in in the development
|
||||
chapter. Note that only tables that are needed for search are accessible
|
||||
as SQLAlchemy tables.
|
||||
|
||||
!!! warning
|
||||
The database layout is not part of the API definition and may change
|
||||
without notice. If you play with the low-level access functions, you
|
||||
need to be prepared for such changes.
|
||||
|
||||
Here is a simple example, which prints how many places are available in
|
||||
the placex table:
|
||||
|
||||
```
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
import sqlalchemy as sa
|
||||
from nominatim.api import NominatimAPIAsync
|
||||
|
||||
async def print_table_size():
|
||||
api = NominatimAPIAsync(Path('.'))
|
||||
|
||||
async with api.begin() as conn:
|
||||
cnt = await conn.scalar(sa.select(sa.func.count()).select_from(conn.t.placex))
|
||||
print(f'placex table has {cnt} rows.')
|
||||
|
||||
asyncio.run(print_table_size())
|
||||
```
|
||||
|
||||
!!! warning
|
||||
Low-level connections may only be used to read data from the database.
|
||||
Do not use it to add or modify data or you might break Nominatim's
|
||||
normal functions.
|
||||
|
||||
## SearchConnection class
|
||||
|
||||
::: nominatim.api.SearchConnection
|
||||
options:
|
||||
members:
|
||||
- scalar
|
||||
- execute
|
||||
- get_class_table
|
||||
- get_db_property
|
||||
- get_property
|
||||
heading_level: 6
|
||||
36
docs/library/NominatimAPI.md
Normal file
36
docs/library/NominatimAPI.md
Normal file
@@ -0,0 +1,36 @@
|
||||
# The Nominatim API classes
|
||||
|
||||
The API classes are the core object of the search library. Always instantiate
|
||||
one of these classes first. The API classes are **not threadsafe**. You need
|
||||
to instantiate a separate instance for each thread.
|
||||
|
||||
### NominatimAPI
|
||||
|
||||
::: nominatim.api.NominatimAPI
|
||||
options:
|
||||
members:
|
||||
- __init__
|
||||
- config
|
||||
- close
|
||||
- status
|
||||
- details
|
||||
- lookup
|
||||
- reverse
|
||||
- search
|
||||
- search_address
|
||||
- search_category
|
||||
heading_level: 6
|
||||
group_by_category: False
|
||||
|
||||
|
||||
### NominatimAPIAsync
|
||||
|
||||
::: nominatim.api.NominatimAPIAsync
|
||||
options:
|
||||
members:
|
||||
- __init__
|
||||
- setup_database
|
||||
- close
|
||||
- begin
|
||||
heading_level: 6
|
||||
group_by_category: False
|
||||
58
docs/library/Result-Handling.md
Normal file
58
docs/library/Result-Handling.md
Normal file
@@ -0,0 +1,58 @@
|
||||
# Result handling
|
||||
|
||||
The search functions of the Nominatim API always return a result object
|
||||
with the raw information about the place that is available in the
|
||||
database. This section discusses data types used in the results and utility
|
||||
functions that allow further processing of the results.
|
||||
|
||||
## Result fields
|
||||
|
||||
### Sources
|
||||
|
||||
Nominatim takes the result data from multiple sources. The `source_table` field
|
||||
in the result describes, from which source the result was retrieved.
|
||||
|
||||
::: nominatim.api.SourceTable
|
||||
options:
|
||||
heading_level: 6
|
||||
members_order: source
|
||||
|
||||
### Detailed address description
|
||||
|
||||
When the `address_details` parameter is set, then functions return not
|
||||
only information about the result place but also about the place that
|
||||
make up the address. This information is almost always required when you
|
||||
want to present the user with a human-readable description of the result.
|
||||
See also [Localization](#localization) below.
|
||||
|
||||
The address details are available in the `address_rows` field as a ordered
|
||||
list of `AddressLine` objects with the country information last. The list also
|
||||
contains the result place itself and some artificial entries, for example,
|
||||
for the house number or the country code. This makes processing and creating
|
||||
a full address easier.
|
||||
|
||||
::: nominatim.api.AddressLine
|
||||
options:
|
||||
heading_level: 6
|
||||
members_order: source
|
||||
|
||||
### Detailed search terms
|
||||
|
||||
The `details` function can return detailed information about which search terms
|
||||
may be used to find a place, when the `keywords` parameter is set. Search
|
||||
terms are split into terms for the name of the place and search terms for
|
||||
its address.
|
||||
|
||||
::: nominatim.api.WordInfo
|
||||
options:
|
||||
heading_level: 6
|
||||
|
||||
## Localization
|
||||
|
||||
Results are always returned with the full list of available names.
|
||||
|
||||
### Locale
|
||||
|
||||
::: nominatim.api.Locales
|
||||
options:
|
||||
heading_level: 6
|
||||
@@ -1,5 +1,9 @@
|
||||
site_name: Nominatim Documentation
|
||||
theme: readthedocs
|
||||
site_name: Nominatim Manual
|
||||
theme:
|
||||
name: material
|
||||
features:
|
||||
- navigation.tabs
|
||||
copyright: Copyright © Nominatim developer community
|
||||
docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
|
||||
site_url: https://nominatim.org
|
||||
repo_url: https://github.com/openstreetmap/Nominatim
|
||||
@@ -18,7 +22,8 @@ nav:
|
||||
- 'Basic Installation': 'admin/Installation.md'
|
||||
- 'Import' : 'admin/Import.md'
|
||||
- 'Update' : 'admin/Update.md'
|
||||
- 'Deploy' : 'admin/Deployment.md'
|
||||
- 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
|
||||
- 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
|
||||
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
|
||||
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
|
||||
- 'Maintenance' : 'admin/Maintenance.md'
|
||||
@@ -35,6 +40,14 @@ nav:
|
||||
- 'Special Phrases': 'customize/Special-Phrases.md'
|
||||
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
|
||||
- 'External data: Postcodes': 'customize/Postcodes.md'
|
||||
- 'Conversion to SQLite': 'customize/SQLite.md'
|
||||
- 'Library Guide':
|
||||
- 'Getting Started': 'library/Getting-Started.md'
|
||||
- 'Nominatim API class': 'library/NominatimAPI.md'
|
||||
- 'Configuration': 'library/Configuration.md'
|
||||
- 'Input Parameter Types': 'library/Input-Parameter-Types.md'
|
||||
- 'Result Handling': 'library/Result-Handling.md'
|
||||
- 'Low-level DB Access': 'library/Low-Level-DB-Access.md'
|
||||
- 'Developers Guide':
|
||||
- 'Architecture Overview' : 'develop/overview.md'
|
||||
- 'Database Layout' : 'develop/Database-Layout.md'
|
||||
@@ -45,12 +58,14 @@ nav:
|
||||
- 'Testing' : 'develop/Testing.md'
|
||||
- 'External Data Sources': 'develop/data-sources.md'
|
||||
- 'Appendix':
|
||||
- 'Installation on Ubuntu 18' : 'appendix/Install-on-Ubuntu-18.md'
|
||||
- 'Installation on Ubuntu 20' : 'appendix/Install-on-Ubuntu-20.md'
|
||||
- 'Installation on Ubuntu 22' : 'appendix/Install-on-Ubuntu-22.md'
|
||||
markdown_extensions:
|
||||
- codehilite
|
||||
- admonition
|
||||
- pymdownx.superfences
|
||||
- pymdownx.tabbed:
|
||||
alternate_style: true
|
||||
- def_list
|
||||
- toc:
|
||||
permalink:
|
||||
@@ -59,7 +74,8 @@ plugins:
|
||||
- search
|
||||
- mkdocstrings:
|
||||
handlers:
|
||||
python-legacy:
|
||||
rendering:
|
||||
show_source: false
|
||||
show_signature_annotations: false
|
||||
python:
|
||||
paths: ["${PROJECT_SOURCE_DIR}"]
|
||||
options:
|
||||
show_source: False
|
||||
show_bases: False
|
||||
|
||||
@@ -38,23 +38,25 @@ class DB
|
||||
|
||||
// https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
|
||||
try {
|
||||
$conn = new \PDO($this->sDSN, null, null, $aConnOptions);
|
||||
$this->connection = new \PDO($this->sDSN, null, null, $aConnOptions);
|
||||
} catch (\PDOException $e) {
|
||||
$sMsg = 'Failed to establish database connection:' . $e->getMessage();
|
||||
throw new \Nominatim\DatabaseError($sMsg, 500, null, $e->getMessage());
|
||||
}
|
||||
|
||||
$conn->exec("SET DateStyle TO 'sql,european'");
|
||||
$conn->exec("SET client_encoding TO 'utf-8'");
|
||||
$this->connection->exec("SET DateStyle TO 'sql,european'");
|
||||
$this->connection->exec("SET client_encoding TO 'utf-8'");
|
||||
// Disable JIT and parallel workers. They interfere badly with search SQL.
|
||||
$conn->exec("UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost'");
|
||||
$conn->exec("UPDATE pg_settings SET setting = 0 WHERE name = 'max_parallel_workers_per_gather'");
|
||||
$this->connection->exec('SET max_parallel_workers_per_gather TO 0');
|
||||
if ($this->getPostgresVersion() >= 11) {
|
||||
$this->connection->exec('SET jit_above_cost TO -1');
|
||||
}
|
||||
|
||||
$iMaxExecution = ini_get('max_execution_time');
|
||||
if ($iMaxExecution > 0) {
|
||||
$conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
|
||||
$this->connection->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
|
||||
}
|
||||
|
||||
$this->connection = $conn;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
@@ -135,7 +135,7 @@ class Debug
|
||||
|
||||
public static function printSQL($sSQL)
|
||||
{
|
||||
echo '<p><tt><font color="#aaa">'.$sSQL.'</font></tt></p>'."\n";
|
||||
echo '<p><tt><b>'.date('c').'</b> <font color="#aaa">'.htmlspecialchars($sSQL, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401).'</font></tt></p>'."\n";
|
||||
}
|
||||
|
||||
private static function outputVar($mVar, $sPreNL)
|
||||
@@ -178,11 +178,12 @@ class Debug
|
||||
}
|
||||
|
||||
if (is_string($mVar)) {
|
||||
echo "'$mVar'";
|
||||
return strlen($mVar) + 2;
|
||||
$sOut = "'$mVar'";
|
||||
} else {
|
||||
$sOut = (string)$mVar;
|
||||
}
|
||||
|
||||
echo (string)$mVar;
|
||||
return strlen((string)$mVar);
|
||||
echo htmlspecialchars($sOut, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401);
|
||||
return strlen($sOut);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -874,7 +874,7 @@ class Geocode
|
||||
$iCountWords = 0;
|
||||
$sAddress = $aResult['langaddress'];
|
||||
foreach ($aRecheckWords as $i => $sWord) {
|
||||
if (stripos($sAddress, $sWord)!==false) {
|
||||
if (grapheme_stripos($sAddress, $sWord)!==false) {
|
||||
$iCountWords++;
|
||||
if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) {
|
||||
$iCountWords += 0.1;
|
||||
|
||||
@@ -86,7 +86,7 @@ class PlaceLookup
|
||||
($this->bIncludePolygonAsSVG ? 1 : 0);
|
||||
if ($iWantedTypes > CONST_PolygonOutput_MaximumTypes) {
|
||||
if (CONST_PolygonOutput_MaximumTypes) {
|
||||
userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polgyon output option');
|
||||
userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polygon output option');
|
||||
} else {
|
||||
userError('Polygon output is disabled');
|
||||
}
|
||||
@@ -524,12 +524,7 @@ class PlaceLookup
|
||||
|
||||
// Get the bounding box and outline polygon
|
||||
$sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
|
||||
if ($fLonReverse != null && $fLatReverse != null) {
|
||||
$sSQL .= ' ST_Y(closest_point) as centrelat,';
|
||||
$sSQL .= ' ST_X(closest_point) as centrelon,';
|
||||
} else {
|
||||
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
|
||||
}
|
||||
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
|
||||
$sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
|
||||
$sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
|
||||
if ($this->bIncludePolygonAsGeoJSON) {
|
||||
@@ -544,19 +539,21 @@ class PlaceLookup
|
||||
if ($this->bIncludePolygonAsText) {
|
||||
$sSQL .= ',ST_AsText(geometry) as astext';
|
||||
}
|
||||
|
||||
$sSQL .= ' FROM (SELECT place_id';
|
||||
if ($fLonReverse != null && $fLatReverse != null) {
|
||||
$sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
|
||||
$sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
|
||||
$sFrom .=' ELSE centroid END AS closest_point';
|
||||
$sFrom .= ' from placex where place_id = '.$iPlaceID.') as plx';
|
||||
$sSQL .= ',CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
|
||||
$sSQL .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
|
||||
$sSQL .=' ELSE centroid END AS centroid';
|
||||
} else {
|
||||
$sFrom = ' from placex where place_id = '.$iPlaceID;
|
||||
$sSQL .= ',centroid';
|
||||
}
|
||||
if ($this->fPolygonSimplificationThreshold > 0) {
|
||||
$sSQL .= ' from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'.$sFrom.') as plx';
|
||||
$sSQL .= ',ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry';
|
||||
} else {
|
||||
$sSQL .= $sFrom;
|
||||
$sSQL .= ',geometry';
|
||||
}
|
||||
$sSQL .= ' FROM placex where place_id = '.$iPlaceID.') as plx';
|
||||
|
||||
$aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');
|
||||
|
||||
|
||||
@@ -40,10 +40,10 @@ class ReverseGeocode
|
||||
9 => 12,
|
||||
10 => 17, // City
|
||||
11 => 17,
|
||||
12 => 18, // Town / Village
|
||||
13 => 18,
|
||||
14 => 22, // Suburb
|
||||
15 => 22,
|
||||
12 => 18, // Town
|
||||
13 => 19, // Village
|
||||
14 => 22, // Neighbourhood
|
||||
15 => 25, // Locality
|
||||
16 => 26, // major street
|
||||
17 => 27, // minor street
|
||||
18 => 30, // or >, Building
|
||||
@@ -85,6 +85,11 @@ class ReverseGeocode
|
||||
|
||||
protected function lookupLargeArea($sPointSQL, $iMaxRank)
|
||||
{
|
||||
$sCountryCode = $this->getCountryCode($sPointSQL);
|
||||
if (CONST_Search_WithinCountries and $sCountryCode == null) {
|
||||
return null;
|
||||
}
|
||||
|
||||
if ($iMaxRank > 4) {
|
||||
$aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
|
||||
if ($aPlace) {
|
||||
@@ -94,12 +99,12 @@ class ReverseGeocode
|
||||
|
||||
// If no polygon which contains the searchpoint is found,
|
||||
// searches in the country_osm_grid table for a polygon.
|
||||
return $this->lookupInCountry($sPointSQL, $iMaxRank);
|
||||
return $this->lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode);
|
||||
}
|
||||
|
||||
protected function lookupInCountry($sPointSQL, $iMaxRank)
|
||||
protected function getCountryCode($sPointSQL)
|
||||
{
|
||||
Debug::newFunction('lookupInCountry');
|
||||
Debug::newFunction('getCountryCode');
|
||||
// searches for polygon in table country_osm_grid which contains the searchpoint
|
||||
// and searches for the nearest place node to the searchpoint in this polygon
|
||||
$sSQL = 'SELECT country_code FROM country_osm_grid';
|
||||
@@ -111,8 +116,12 @@ class ReverseGeocode
|
||||
null,
|
||||
'Could not determine country polygon containing the point.'
|
||||
);
|
||||
Debug::printVar('Country code', $sCountryCode);
|
||||
return $sCountryCode;
|
||||
}
|
||||
|
||||
protected function lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode)
|
||||
{
|
||||
Debug::newFunction('lookupInCountry');
|
||||
if ($sCountryCode) {
|
||||
if ($iMaxRank > 4) {
|
||||
// look for place nodes with the given country code
|
||||
@@ -122,12 +131,13 @@ class ReverseGeocode
|
||||
$sSQL .= ' FROM placex';
|
||||
$sSQL .= ' WHERE osm_type = \'N\'';
|
||||
$sSQL .= ' AND country_code = \''.$sCountryCode.'\'';
|
||||
$sSQL .= ' AND rank_search < 26 '; // needed to select right index
|
||||
$sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
|
||||
$sSQL .= ' AND rank_search between 5 and ' .min(25, $iMaxRank);
|
||||
$sSQL .= ' AND class = \'place\' AND type != \'postcode\'';
|
||||
$sSQL .= ' AND type != \'postcode\'';
|
||||
$sSQL .= ' AND name IS NOT NULL ';
|
||||
$sSQL .= ' and indexed_status = 0 and linked_place_id is null';
|
||||
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, 1.8)) p ';
|
||||
$sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
|
||||
$sSQL .= ') as a ';
|
||||
$sSQL .= 'WHERE distance <= reverse_place_diameter(rank_search)';
|
||||
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
|
||||
$sSQL .= ' LIMIT 1';
|
||||
@@ -216,23 +226,18 @@ class ReverseGeocode
|
||||
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
|
||||
$sSQL .= ' FROM placex';
|
||||
$sSQL .= ' WHERE osm_type = \'N\'';
|
||||
// using rank_search because of a better differentiation
|
||||
// for place nodes at rank_address 16
|
||||
$sSQL .= ' AND rank_search > '.$iRankSearch;
|
||||
$sSQL .= ' AND rank_search <= '.$iMaxRank;
|
||||
$sSQL .= ' AND rank_search < 26 '; // needed to select right index
|
||||
$sSQL .= ' AND rank_address > 0';
|
||||
$sSQL .= ' AND class = \'place\'';
|
||||
$sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
|
||||
$sSQL .= ' AND type != \'postcode\'';
|
||||
$sSQL .= ' AND name IS NOT NULL ';
|
||||
$sSQL .= ' AND indexed_status = 0 AND linked_place_id is null';
|
||||
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, reverse_place_diameter('.$iRankSearch.'::smallint))';
|
||||
$sSQL .= ' ORDER BY distance ASC,';
|
||||
$sSQL .= ' rank_address DESC';
|
||||
$sSQL .= ' limit 500) as a';
|
||||
$sSQL .= ' WHERE ST_CONTAINS((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
|
||||
$sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
|
||||
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
|
||||
$sSQL .= ' limit 100) as a';
|
||||
$sSQL .= ' WHERE ST_Contains((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
|
||||
$sSQL .= ' AND distance <= reverse_place_diameter(rank_search)';
|
||||
$sSQL .= ' ORDER BY distance ASC, rank_search DESC';
|
||||
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
|
||||
$sSQL .= ' LIMIT 1';
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
|
||||
@@ -1,190 +0,0 @@
|
||||
<?php
|
||||
/**
|
||||
* SPDX-License-Identifier: GPL-2.0-only
|
||||
*
|
||||
* This file is part of Nominatim. (https://nominatim.org)
|
||||
*
|
||||
* Copyright (C) 2022 by the Nominatim developer community.
|
||||
* For a full list of authors see the git log.
|
||||
*/
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
// Script to extract structured city and street data
|
||||
// from a running nominatim instance as CSV data
|
||||
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/ParameterParser.php');
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
$aCMDOptions = array(
|
||||
'Export addresses as CSV file from a Nominatim database',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
|
||||
array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
|
||||
array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
|
||||
array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
|
||||
array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
|
||||
array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
|
||||
array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
|
||||
array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
|
||||
array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
"\nAddress ranks: continent, country, state, county, city, suburb, street, path",
|
||||
'Additional output types: postcode, placeid (placeid for each object)',
|
||||
"\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
|
||||
'can be merged into one column by simply using a comma-separated list.',
|
||||
"\nDefault output-type: street",
|
||||
'Default output format: street;suburb;city;county;state;country'
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
|
||||
$aRankmap = array(
|
||||
'continent' => 1,
|
||||
'country' => 4,
|
||||
'state' => 8,
|
||||
'county' => 12,
|
||||
'city' => 16,
|
||||
'suburb' => 20,
|
||||
'street' => 26,
|
||||
'path' => 27
|
||||
);
|
||||
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
|
||||
if (isset($aCMDResult['output-type'])) {
|
||||
if (!isset($aRankmap[$aCMDResult['output-type']])) {
|
||||
fail('unknown output-type: '.$aCMDResult['output-type']);
|
||||
}
|
||||
$iOutputRank = $aRankmap[$aCMDResult['output-type']];
|
||||
} else {
|
||||
$iOutputRank = $aRankmap['street'];
|
||||
}
|
||||
|
||||
|
||||
// Preferred language
|
||||
$oParams = new Nominatim\ParameterParser();
|
||||
if (!isset($aCMDResult['language'])) {
|
||||
$aCMDResult['language'] = 'xx';
|
||||
}
|
||||
$aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
|
||||
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
|
||||
|
||||
// output formatting: build up a lookup table that maps address ranks to columns
|
||||
$aColumnMapping = array();
|
||||
$iNumCol = 0;
|
||||
if (!isset($aCMDResult['output-format'])) {
|
||||
$aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
|
||||
}
|
||||
foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
|
||||
$bHasData = false;
|
||||
foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
|
||||
if ($sRank == 'postcode' || $sRank == 'placeid') {
|
||||
$aColumnMapping[$sRank] = $iNumCol;
|
||||
$bHasData = true;
|
||||
} elseif (isset($aRankmap[$sRank])) {
|
||||
$iRank = $aRankmap[$sRank];
|
||||
if ($iRank <= $iOutputRank) {
|
||||
$aColumnMapping[(string)$iRank] = $iNumCol;
|
||||
$bHasData = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($bHasData) {
|
||||
$iNumCol++;
|
||||
}
|
||||
}
|
||||
|
||||
// build the query for objects
|
||||
$sPlacexSQL = 'select min(place_id) as place_id, ';
|
||||
$sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
|
||||
$sPlacexSQL .= 'country_code as cc, ';
|
||||
$sPlacexSQL .= 'postcode, ';
|
||||
// get the address places excluding postcodes
|
||||
$sPlacexSQL .= 'array(select address_place_id from place_addressline a';
|
||||
$sPlacexSQL .= ' where a.place_id = placex.place_id and isaddress';
|
||||
$sPlacexSQL .= ' and address_place_id != placex.place_id';
|
||||
$sPlacexSQL .= ' and not cached_rank_address in (5,11)';
|
||||
$sPlacexSQL .= ' and cached_rank_address > 2 order by cached_rank_address)';
|
||||
$sPlacexSQL .= ' as address';
|
||||
$sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
|
||||
|
||||
$sPlacexSQL .= ' and rank_address = '.$iOutputRank;
|
||||
|
||||
if (isset($aCMDResult['restrict-to-country'])) {
|
||||
$sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']);
|
||||
}
|
||||
|
||||
// restriction to parent place id
|
||||
$sParentId = false;
|
||||
$sOsmType = false;
|
||||
|
||||
if (isset($aCMDResult['restrict-to-osm-node'])) {
|
||||
$sOsmType = 'N';
|
||||
$sOsmId = $aCMDResult['restrict-to-osm-node'];
|
||||
}
|
||||
if (isset($aCMDResult['restrict-to-osm-way'])) {
|
||||
$sOsmType = 'W';
|
||||
$sOsmId = $aCMDResult['restrict-to-osm-way'];
|
||||
}
|
||||
if (isset($aCMDResult['restrict-to-osm-relation'])) {
|
||||
$sOsmType = 'R';
|
||||
$sOsmId = $aCMDResult['restrict-to-osm-relation'];
|
||||
}
|
||||
if ($sOsmType) {
|
||||
$sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
|
||||
$sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
|
||||
if (!$sParentId) {
|
||||
fail('Could not find place '.$sOsmType.' '.$sOsmId);
|
||||
}
|
||||
}
|
||||
if ($sParentId) {
|
||||
$sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
|
||||
}
|
||||
|
||||
$sPlacexSQL .= " group by name->'name', address, postcode, country_code, placex.place_id";
|
||||
|
||||
// Iterate over placeids
|
||||
// to get further hierarchical information
|
||||
//var_dump($sPlacexSQL);
|
||||
$oResults = $oDB->getQueryStatement($sPlacexSQL);
|
||||
$fOutstream = fopen('php://output', 'w');
|
||||
while ($aRow = $oResults->fetch()) {
|
||||
$iPlaceID = $aRow['place_id'];
|
||||
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
|
||||
$sSQL .= ' WHERE isaddress';
|
||||
$sSQL .= ' order by rank_address desc,isaddress desc';
|
||||
$aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID));
|
||||
|
||||
$aOutput = array_fill(0, $iNumCol, '');
|
||||
// output address parts
|
||||
foreach ($aAddressLines as $aAddress) {
|
||||
if (isset($aColumnMapping[$aAddress['rank_address']])) {
|
||||
$aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
|
||||
}
|
||||
}
|
||||
// output postcode
|
||||
if (isset($aColumnMapping['postcode'])) {
|
||||
if ($aCMDResult['output-all-postcodes']) {
|
||||
$sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
|
||||
$sSQL .= 'on px.place_id = pa.address_place_id ';
|
||||
$sSQL .= 'where pa.cached_rank_address in (5,11) ';
|
||||
$sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) ';
|
||||
$sSQL .= 'group by postcode order by count(*) desc limit 1';
|
||||
$sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1)));
|
||||
|
||||
$aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
|
||||
} else {
|
||||
$aOutput[$aColumnMapping['postcode']] = $aRow['postcode'];
|
||||
}
|
||||
}
|
||||
if (isset($aColumnMapping['placeid'])) {
|
||||
$aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
|
||||
}
|
||||
fputcsv($fOutstream, $aOutput);
|
||||
}
|
||||
fclose($fOutstream);
|
||||
@@ -1,114 +0,0 @@
|
||||
<?php
|
||||
/**
|
||||
* SPDX-License-Identifier: GPL-2.0-only
|
||||
*
|
||||
* This file is part of Nominatim. (https://nominatim.org)
|
||||
*
|
||||
* Copyright (C) 2022 by the Nominatim developer community.
|
||||
* For a full list of authors see the git log.
|
||||
*/
|
||||
@define('CONST_LibDir', dirname(dirname(__FILE__)));
|
||||
|
||||
require_once(CONST_LibDir.'/init-cmd.php');
|
||||
require_once(CONST_LibDir.'/log.php');
|
||||
require_once(CONST_LibDir.'/PlaceLookup.php');
|
||||
require_once(CONST_LibDir.'/ReverseGeocode.php');
|
||||
|
||||
ini_set('memory_limit', '800M');
|
||||
|
||||
$aCMDOptions = array(
|
||||
'Tools to warm nominatim db',
|
||||
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
|
||||
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
|
||||
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
|
||||
array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'),
|
||||
array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'),
|
||||
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
|
||||
);
|
||||
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
|
||||
|
||||
loadSettings($aCMDResult['project-dir'] ?? getcwd());
|
||||
|
||||
@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
|
||||
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
|
||||
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
|
||||
@define('CONST_Log_File', getSetting('LOG_FILE', false));
|
||||
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
|
||||
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
|
||||
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
|
||||
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
|
||||
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
|
||||
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
|
||||
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
|
||||
@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
|
||||
|
||||
require_once(CONST_LibDir.'/Geocode.php');
|
||||
|
||||
$oDB = new Nominatim\DB();
|
||||
$oDB->connect();
|
||||
|
||||
$bVerbose = $aResult['verbose'];
|
||||
|
||||
function print_results($aResults, $bVerbose)
|
||||
{
|
||||
if ($bVerbose) {
|
||||
if ($aResults && count($aResults)) {
|
||||
echo $aResults[0]['langaddress']."\n";
|
||||
} else {
|
||||
echo "<not found>\n";
|
||||
}
|
||||
} else {
|
||||
echo '.';
|
||||
}
|
||||
}
|
||||
|
||||
if (!$aResult['search-only']) {
|
||||
$oReverseGeocode = new Nominatim\ReverseGeocode($oDB);
|
||||
$oReverseGeocode->setZoom(20);
|
||||
$oPlaceLookup = new Nominatim\PlaceLookup($oDB);
|
||||
$oPlaceLookup->setIncludeAddressDetails(true);
|
||||
$oPlaceLookup->setLanguagePreference(array('en'));
|
||||
|
||||
echo 'Warm reverse: ';
|
||||
if ($bVerbose) {
|
||||
echo "\n";
|
||||
}
|
||||
for ($i = 0; $i < 1000; $i++) {
|
||||
$fLat = rand(-9000, 9000) / 100;
|
||||
$fLon = rand(-18000, 18000) / 100;
|
||||
if ($bVerbose) {
|
||||
echo "$fLat, $fLon = ";
|
||||
}
|
||||
|
||||
$oLookup = $oReverseGeocode->lookup($fLat, $fLon);
|
||||
$aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
|
||||
print_results($aSearchResults, $bVerbose);
|
||||
}
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
if (!$aResult['reverse-only']) {
|
||||
$oGeocode = new Nominatim\Geocode($oDB);
|
||||
|
||||
echo 'Warm search: ';
|
||||
if ($bVerbose) {
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
$oTokenizer = new \Nominatim\Tokenizer($oDB);
|
||||
|
||||
$aWords = $oTokenizer->mostFrequentWords(1000);
|
||||
|
||||
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
|
||||
foreach ($aWords as $sWord) {
|
||||
if ($bVerbose) {
|
||||
echo "$sWord = ";
|
||||
}
|
||||
|
||||
$oGeocode->setLanguagePreference(array('en'));
|
||||
$oGeocode->setQuery($sWord);
|
||||
$aSearchResults = $oGeocode->lookup();
|
||||
print_results($aSearchResults, $bVerbose);
|
||||
}
|
||||
echo "\n";
|
||||
}
|
||||
@@ -36,6 +36,9 @@ if (empty($aPlace)) {
|
||||
$aFilteredPlaces['properties']['geocoding']['osm_id'] = $aPlace['osm_id'];
|
||||
}
|
||||
|
||||
$aFilteredPlaces['properties']['geocoding']['osm_key'] = $aPlace['class'];
|
||||
$aFilteredPlaces['properties']['geocoding']['osm_value'] = $aPlace['type'];
|
||||
|
||||
$aFilteredPlaces['properties']['geocoding']['type'] = addressRankToGeocodeJsonType($aPlace['rank_address']);
|
||||
|
||||
$aFilteredPlaces['properties']['geocoding']['accuracy'] = (int) $fDistance;
|
||||
|
||||
@@ -23,7 +23,7 @@ $aLangPrefOrder = $oParams->getPreferredLanguages();
|
||||
|
||||
$sPlaceId = $oParams->getString('place_id');
|
||||
$sOsmType = $oParams->getSet('osmtype', array('N', 'W', 'R'));
|
||||
$iOsmId = $oParams->getInt('osmid', -1);
|
||||
$iOsmId = $oParams->getInt('osmid', 0);
|
||||
$sClass = $oParams->getString('class');
|
||||
|
||||
$bIncludeKeywords = $oParams->getBool('keywords', false);
|
||||
@@ -38,7 +38,7 @@ $oDB->connect();
|
||||
|
||||
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
|
||||
|
||||
if ($sOsmType && $iOsmId > 0) {
|
||||
if ($sOsmType && $iOsmId !== 0) {
|
||||
$sSQL = 'SELECT place_id FROM placex WHERE osm_type = :type AND osm_id = :id';
|
||||
$aSQLParams = array(':type' => $sOsmType, ':id' => $iOsmId);
|
||||
// osm_type and osm_id are not unique enough
|
||||
|
||||
@@ -187,6 +187,7 @@ BEGIN
|
||||
|
||||
-- --- Return the record for the base entry.
|
||||
|
||||
current_rank_address := 1000;
|
||||
FOR location IN
|
||||
SELECT placex.place_id, osm_type, osm_id, name,
|
||||
coalesce(extratags->'linked_place', extratags->'place') as place_type,
|
||||
@@ -261,7 +262,7 @@ BEGIN
|
||||
-- If the place had a postcode assigned, take this one only
|
||||
-- into consideration when it is an area and the place does not have
|
||||
-- a postcode itself.
|
||||
IF location.fromarea AND location.isaddress
|
||||
IF location.fromarea AND location_isaddress
|
||||
AND (place.address is null or not place.address ? 'postcode')
|
||||
THEN
|
||||
place.postcode := null; -- remove the less exact postcode
|
||||
|
||||
@@ -62,10 +62,6 @@ BEGIN
|
||||
WHILE langs[i] IS NOT NULL LOOP
|
||||
wiki_article := extratags->(case when langs[i] in ('english','country') THEN 'wikipedia' ELSE 'wikipedia:'||langs[i] END);
|
||||
IF wiki_article is not null THEN
|
||||
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/wiki/',E'\\2:');
|
||||
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/w/index.php\\?title=',E'\\2:');
|
||||
wiki_article := regexp_replace(wiki_article,E'^(.*?)/([a-z]{2,3})/wiki/',E'\\2:');
|
||||
--wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3})[=:]',E'\\2:');
|
||||
wiki_article := replace(wiki_article,' ','_');
|
||||
IF strpos(wiki_article, ':') IN (3,4) THEN
|
||||
wiki_article_language := lower(trim(split_part(wiki_article, ':', 1)));
|
||||
@@ -134,7 +130,7 @@ BEGIN
|
||||
|
||||
-- Still nothing? Fall back to a default.
|
||||
IF result.importance is null THEN
|
||||
result.importance := 0.75001 - (rank_search::float / 40);
|
||||
result.importance := 0.40001 - (rank_search::float / 75);
|
||||
END IF;
|
||||
|
||||
{% if 'secondary_importance' in db.tables %}
|
||||
|
||||
@@ -164,7 +164,7 @@ DECLARE
|
||||
newend INTEGER;
|
||||
moddiff SMALLINT;
|
||||
linegeo GEOMETRY;
|
||||
splitline GEOMETRY;
|
||||
splitpoint FLOAT;
|
||||
sectiongeo GEOMETRY;
|
||||
postcode TEXT;
|
||||
stepmod SMALLINT;
|
||||
@@ -223,15 +223,27 @@ BEGIN
|
||||
FROM placex, generate_series(1, array_upper(waynodes, 1)) nodeidpos
|
||||
WHERE osm_type = 'N' and osm_id = waynodes[nodeidpos]::BIGINT
|
||||
and address is not NULL and address ? 'housenumber'
|
||||
and ST_Distance(NEW.linegeo, geometry) < 0.0005
|
||||
ORDER BY nodeidpos
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'processing point % (%)', nextnode.hnr, ST_AsText(nextnode.geometry);{% endif %}
|
||||
IF linegeo is null THEN
|
||||
linegeo := NEW.linegeo;
|
||||
ELSE
|
||||
splitline := ST_Split(ST_Snap(linegeo, nextnode.geometry, 0.0005), nextnode.geometry);
|
||||
sectiongeo := ST_GeometryN(splitline, 1);
|
||||
linegeo := ST_GeometryN(splitline, 2);
|
||||
splitpoint := ST_LineLocatePoint(linegeo, nextnode.geometry);
|
||||
IF splitpoint = 0 THEN
|
||||
-- Corner case where the splitpoint falls on the first point
|
||||
-- and thus would not return a geometry. Skip that section.
|
||||
sectiongeo := NULL;
|
||||
ELSEIF splitpoint = 1 THEN
|
||||
-- Point is at the end of the line.
|
||||
sectiongeo := linegeo;
|
||||
linegeo := NULL;
|
||||
ELSE
|
||||
-- Split the line.
|
||||
sectiongeo := ST_LineSubstring(linegeo, 0, splitpoint);
|
||||
linegeo := ST_LineSubstring(linegeo, splitpoint, 1);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
IF prevnode.hnr is not null
|
||||
@@ -239,6 +251,9 @@ BEGIN
|
||||
-- regularly mapped housenumbers.
|
||||
-- (Conveniently also fails if one of the house numbers is not a number.)
|
||||
and abs(prevnode.hnr - nextnode.hnr) > NEW.step
|
||||
-- If the interpolation geometry is broken or two nodes are at the
|
||||
-- same place, then splitting might produce a point. Ignore that.
|
||||
and ST_GeometryType(sectiongeo) = 'ST_LineString'
|
||||
THEN
|
||||
IF prevnode.hnr < nextnode.hnr THEN
|
||||
startnumber := prevnode.hnr;
|
||||
@@ -300,12 +315,12 @@ BEGIN
|
||||
NEW.address, postcode,
|
||||
NEW.country_code, NEW.geometry_sector, 0);
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
-- early break if we are out of line string,
|
||||
-- might happen when a line string loops back on itself
|
||||
IF ST_GeometryType(linegeo) != 'ST_LineString' THEN
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
-- early break if we are out of line string,
|
||||
-- might happen when a line string loops back on itself
|
||||
IF linegeo is null or ST_GeometryType(linegeo) != 'ST_LineString' THEN
|
||||
RETURN NEW;
|
||||
END IF;
|
||||
|
||||
prevnode := nextnode;
|
||||
|
||||
@@ -37,7 +37,7 @@ BEGIN
|
||||
-- Remove the place from the list of places to be deleted
|
||||
DELETE FROM place_to_be_deleted pdel
|
||||
WHERE pdel.osm_type = NEW.osm_type and pdel.osm_id = NEW.osm_id
|
||||
and pdel.class = NEW.class;
|
||||
and pdel.class = NEW.class and pdel.type = NEW.type;
|
||||
|
||||
-- Have we already done this place?
|
||||
SELECT * INTO existing
|
||||
@@ -296,7 +296,9 @@ BEGIN
|
||||
extratags = NEW.extratags,
|
||||
admin_level = NEW.admin_level,
|
||||
indexed_status = 2,
|
||||
geometry = NEW.geometry
|
||||
geometry = CASE WHEN existingplacex.rank_address = 0
|
||||
THEN simplify_large_polygons(NEW.geometry)
|
||||
ELSE NEW.geometry END
|
||||
WHERE place_id = existingplacex.place_id;
|
||||
|
||||
-- Invalidate linked places: they potentially get a new name and addresses.
|
||||
@@ -363,45 +365,3 @@ BEGIN
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION flush_deleted_places()
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
BEGIN
|
||||
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
|
||||
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
|
||||
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
|
||||
|
||||
-- delete from place table
|
||||
ALTER TABLE place DISABLE TRIGGER place_before_delete;
|
||||
DELETE FROM place USING place_to_be_deleted
|
||||
WHERE place.osm_type = place_to_be_deleted.osm_type
|
||||
and place.osm_id = place_to_be_deleted.osm_id
|
||||
and place.class = place_to_be_deleted.class
|
||||
and place.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
ALTER TABLE place ENABLE TRIGGER place_before_delete;
|
||||
|
||||
-- Mark for delete in the placex table
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = place_to_be_deleted.osm_type
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
|
||||
-- Mark for delete in interpolations
|
||||
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE place_to_be_deleted.osm_type = 'W'
|
||||
and place_to_be_deleted.class = 'place'
|
||||
and place_to_be_deleted.type = 'houses'
|
||||
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
|
||||
and not deferred;
|
||||
|
||||
-- Clear todo list.
|
||||
TRUNCATE TABLE place_to_be_deleted;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
||||
-- Copyright (C) 2024 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
-- Trigger functions for the placex table.
|
||||
@@ -88,12 +88,18 @@ BEGIN
|
||||
-- Add all names from the place nodes that deviate from the name
|
||||
-- in the relation with the prefix '_place_'. Deviation means that
|
||||
-- either the value is different or a given key is missing completely
|
||||
SELECT hstore(array_agg('_place_' || key), array_agg(value)) INTO extra_names
|
||||
FROM each(location.name - result.name);
|
||||
{% if debug %}RAISE WARNING 'Extra names: %', extra_names;{% endif %}
|
||||
IF result.name is null THEN
|
||||
SELECT hstore(array_agg('_place_' || key), array_agg(value))
|
||||
INTO result.name
|
||||
FROM each(location.name);
|
||||
ELSE
|
||||
SELECT hstore(array_agg('_place_' || key), array_agg(value)) INTO extra_names
|
||||
FROM each(location.name - result.name);
|
||||
{% if debug %}RAISE WARNING 'Extra names: %', extra_names;{% endif %}
|
||||
|
||||
IF extra_names is not null THEN
|
||||
result.name := result.name || extra_names;
|
||||
IF extra_names is not null THEN
|
||||
result.name := result.name || extra_names;
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'Final names: %', result.name;{% endif %}
|
||||
@@ -113,12 +119,14 @@ CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
|
||||
AS $$
|
||||
DECLARE
|
||||
location RECORD;
|
||||
member JSONB;
|
||||
parent RECORD;
|
||||
result BIGINT;
|
||||
distance FLOAT;
|
||||
new_distance FLOAT;
|
||||
waygeom GEOMETRY;
|
||||
BEGIN
|
||||
{% if db.middle_db_format == '1' %}
|
||||
FOR location IN
|
||||
SELECT members FROM planet_osm_rels
|
||||
WHERE parts @> ARRAY[poi_osm_id]
|
||||
@@ -155,6 +163,40 @@ BEGIN
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
|
||||
{% else %}
|
||||
FOR member IN
|
||||
SELECT value FROM planet_osm_rels r, LATERAL jsonb_array_elements(members)
|
||||
WHERE planet_osm_member_ids(members, poi_osm_type::char(1)) && ARRAY[poi_osm_id]
|
||||
and tags->>'type' = 'associatedStreet'
|
||||
and value->>'role' = 'street'
|
||||
LOOP
|
||||
FOR parent IN
|
||||
SELECT place_id, geometry
|
||||
FROM placex
|
||||
WHERE osm_type = (member->>'type')::char(1)
|
||||
and osm_id = (member->>'ref')::bigint
|
||||
and name is not null
|
||||
and rank_search between 26 and 27
|
||||
LOOP
|
||||
-- Find the closest 'street' member.
|
||||
-- Avoid distance computation for the frequent case where there is
|
||||
-- only one street member.
|
||||
IF waygeom is null THEN
|
||||
result := parent.place_id;
|
||||
waygeom := parent.geometry;
|
||||
ELSE
|
||||
distance := coalesce(distance, ST_Distance(waygeom, bbox));
|
||||
new_distance := ST_Distance(parent.geometry, bbox);
|
||||
IF new_distance < distance THEN
|
||||
distance := new_distance;
|
||||
result := parent.place_id;
|
||||
waygeom := parent.geometry;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
{% endif %}
|
||||
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
@@ -251,7 +293,11 @@ CREATE OR REPLACE FUNCTION find_linked_place(bnd placex)
|
||||
RETURNS placex
|
||||
AS $$
|
||||
DECLARE
|
||||
{% if db.middle_db_format == '1' %}
|
||||
relation_members TEXT[];
|
||||
{% else %}
|
||||
relation_members JSONB;
|
||||
{% endif %}
|
||||
rel_member RECORD;
|
||||
linked_placex placex%ROWTYPE;
|
||||
bnd_name TEXT;
|
||||
@@ -672,6 +718,12 @@ BEGIN
|
||||
NEW.country_code := NULL;
|
||||
END IF;
|
||||
|
||||
-- Simplify polygons with a very large memory footprint when they
|
||||
-- do not take part in address computation.
|
||||
IF NEW.rank_address = 0 THEN
|
||||
NEW.geometry := simplify_large_polygons(NEW.geometry);
|
||||
END IF;
|
||||
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %}
|
||||
@@ -743,7 +795,11 @@ CREATE OR REPLACE FUNCTION placex_update()
|
||||
DECLARE
|
||||
i INTEGER;
|
||||
location RECORD;
|
||||
{% if db.middle_db_format == '1' %}
|
||||
relation_members TEXT[];
|
||||
{% else %}
|
||||
relation_member JSONB;
|
||||
{% endif %}
|
||||
|
||||
geom GEOMETRY;
|
||||
parent_address_level SMALLINT;
|
||||
@@ -788,6 +844,9 @@ BEGIN
|
||||
result := deleteLocationArea(NEW.partition, NEW.place_id, NEW.rank_search);
|
||||
|
||||
NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
|
||||
IF NEW.extratags = ''::hstore THEN
|
||||
NEW.extratags := NULL;
|
||||
END IF;
|
||||
|
||||
-- NEW.linked_place_id contains the precomputed linkee. Save this and restore
|
||||
-- the previous link status.
|
||||
@@ -962,6 +1021,7 @@ BEGIN
|
||||
|
||||
-- waterway ways are linked when they are part of a relation and have the same class/type
|
||||
IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN
|
||||
{% if db.middle_db_format == '1' %}
|
||||
FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[]
|
||||
LOOP
|
||||
FOR i IN 1..array_upper(relation_members, 1) BY 2 LOOP
|
||||
@@ -980,6 +1040,29 @@ BEGIN
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
{% else %}
|
||||
FOR relation_member IN
|
||||
SELECT value FROM planet_osm_rels r, LATERAL jsonb_array_elements(r.members)
|
||||
WHERE r.id = NEW.osm_id
|
||||
LOOP
|
||||
IF relation_member->>'role' IN ('', 'main_stream', 'side_stream')
|
||||
and relation_member->>'type' = 'W'
|
||||
THEN
|
||||
{% if debug %}RAISE WARNING 'waterway parent %, child %', NEW.osm_id, relation_member;{% endif %}
|
||||
FOR linked_node_id IN
|
||||
SELECT place_id FROM placex
|
||||
WHERE osm_type = 'W' and osm_id = (relation_member->>'ref')::bigint
|
||||
and class = NEW.class and type in ('river', 'stream', 'canal', 'drain', 'ditch')
|
||||
and (relation_member->>'role' != 'side_stream' or NEW.name->'name' = name->'name')
|
||||
LOOP
|
||||
UPDATE placex SET linked_place_id = NEW.place_id WHERE place_id = linked_node_id;
|
||||
{% if 'search_name' in db.tables %}
|
||||
DELETE FROM search_name WHERE place_id = linked_node_id;
|
||||
{% endif %}
|
||||
END LOOP;
|
||||
END IF;
|
||||
END LOOP;
|
||||
{% endif %}
|
||||
{% if debug %}RAISE WARNING 'Waterway processed';{% endif %}
|
||||
END IF;
|
||||
|
||||
@@ -996,7 +1079,7 @@ BEGIN
|
||||
|
||||
{% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %}
|
||||
NEW.parent_place_id := null;
|
||||
is_place_address := coalesce(not NEW.address ? 'street' and NEW.address ? 'place', FALSE);
|
||||
is_place_address := not token_is_street_address(NEW.token_info);
|
||||
|
||||
-- We have to find our parent road.
|
||||
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
|
||||
@@ -1013,7 +1096,7 @@ BEGIN
|
||||
SELECT p.country_code, p.postcode, p.name FROM placex p
|
||||
WHERE p.place_id = NEW.parent_place_id INTO location;
|
||||
|
||||
IF is_place_address THEN
|
||||
IF is_place_address and NEW.address ? 'place' THEN
|
||||
-- Check if the addr:place tag is part of the parent name
|
||||
SELECT count(*) INTO i
|
||||
FROM svals(location.name) AS pname WHERE pname = NEW.address->'place';
|
||||
@@ -1120,7 +1203,7 @@ BEGIN
|
||||
ELSE
|
||||
-- No linked place? As a last resort check if the boundary is tagged with
|
||||
-- a place type and adapt the rank address.
|
||||
IF NEW.rank_address > 0 and NEW.extratags ? 'place' THEN
|
||||
IF NEW.rank_address between 4 and 25 and NEW.extratags ? 'place' THEN
|
||||
SELECT address_rank INTO place_address_level
|
||||
FROM compute_place_rank(NEW.country_code, 'A', 'place',
|
||||
NEW.extratags->'place', 0::SMALLINT, False, null);
|
||||
@@ -1182,6 +1265,8 @@ BEGIN
|
||||
END IF;
|
||||
ELSEIF NEW.rank_address > 25 THEN
|
||||
max_rank := 25;
|
||||
ELSEIF NEW.class in ('place','boundary') and NEW.type in ('postcode','postal_code') THEN
|
||||
max_rank := NEW.rank_search;
|
||||
ELSE
|
||||
max_rank := NEW.rank_address;
|
||||
END IF;
|
||||
@@ -1230,8 +1315,14 @@ BEGIN
|
||||
{% endif %}
|
||||
END IF;
|
||||
|
||||
IF NEW.postcode is null AND NEW.rank_search > 8 THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
|
||||
IF NEW.postcode is null AND NEW.rank_search > 8
|
||||
AND (NEW.rank_address > 0
|
||||
OR ST_GeometryType(NEW.geometry) not in ('ST_LineString','ST_MultiLineString')
|
||||
OR ST_Length(NEW.geometry) < 0.02)
|
||||
THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code,
|
||||
CASE WHEN NEW.rank_address > 25
|
||||
THEN NEW.centroid ELSE NEW.geometry END);
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'place update % % finished.', NEW.osm_type, NEW.osm_id;{% endif %}
|
||||
|
||||
@@ -284,3 +284,24 @@ BEGIN
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
|
||||
rankings TEXT,
|
||||
def_weight FLOAT)
|
||||
RETURNS FLOAT
|
||||
AS $$
|
||||
DECLARE
|
||||
rank JSON;
|
||||
BEGIN
|
||||
FOR rank IN
|
||||
SELECT * FROM json_array_elements(rankings::JSON)
|
||||
LOOP
|
||||
IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN
|
||||
RETURN (rank->>0)::float;
|
||||
END IF;
|
||||
END LOOP;
|
||||
RETURN def_weight;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
@@ -73,6 +73,26 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_rel_node_members(members JSONB, memberLabels TEXT[])
|
||||
RETURNS SETOF BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
member JSONB;
|
||||
BEGIN
|
||||
FOR member IN SELECT * FROM jsonb_array_elements(members)
|
||||
LOOP
|
||||
IF member->>'type' = 'N' and member->>'role' = ANY(memberLabels) THEN
|
||||
RETURN NEXT (member->>'ref')::bigint;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
RETURN;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
-- Copy 'name' to or from the default language.
|
||||
--
|
||||
-- \param country_code Country code of the object being named.
|
||||
@@ -273,8 +293,8 @@ BEGIN
|
||||
END IF;
|
||||
|
||||
RETURN ST_Envelope(ST_Collect(
|
||||
ST_Project(geom, radius, 0.785398)::geometry,
|
||||
ST_Project(geom, radius, 3.9269908)::geometry));
|
||||
ST_Project(geom::geography, radius, 0.785398)::geometry,
|
||||
ST_Project(geom::geography, radius, 3.9269908)::geometry));
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
@@ -416,6 +436,20 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION simplify_large_polygons(geometry GEOMETRY)
|
||||
RETURNS GEOMETRY
|
||||
AS $$
|
||||
BEGIN
|
||||
IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
|
||||
and ST_MemSize(geometry) > 3000000
|
||||
THEN
|
||||
geometry := ST_SimplifyPreserveTopology(geometry, 0.0001);
|
||||
END IF;
|
||||
RETURN geometry;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION place_force_delete(placeid BIGINT)
|
||||
RETURNS BOOLEAN
|
||||
@@ -429,9 +463,10 @@ BEGIN
|
||||
SELECT osm_type, osm_id, class, type FROM placex WHERE place_id = placeid INTO osmtype, osmid, pclass, ptype;
|
||||
DELETE FROM import_polygon_delete where osm_type = osmtype and osm_id = osmid and class = pclass and type = ptype;
|
||||
DELETE FROM import_polygon_error where osm_type = osmtype and osm_id = osmid and class = pclass and type = ptype;
|
||||
-- force delete from place/placex by making it a very small geometry
|
||||
UPDATE place set geometry = ST_SetSRID(ST_Point(0,0), 4326) where osm_type = osmtype and osm_id = osmid and class = pclass and type = ptype;
|
||||
DELETE FROM place where osm_type = osmtype and osm_id = osmid and class = pclass and type = ptype;
|
||||
-- force delete by directly entering it into the to-be-deleted table
|
||||
INSERT INTO place_to_be_deleted (osm_type, osm_id, class, type, deferred)
|
||||
VALUES(osmtype, osmid, pclass, ptype, false);
|
||||
PERFORM flush_deleted_places();
|
||||
|
||||
RETURN TRUE;
|
||||
END;
|
||||
@@ -486,3 +521,56 @@ BEGIN
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION flush_deleted_places()
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
BEGIN
|
||||
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
|
||||
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
|
||||
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
|
||||
|
||||
-- delete from place table
|
||||
ALTER TABLE place DISABLE TRIGGER place_before_delete;
|
||||
DELETE FROM place USING place_to_be_deleted
|
||||
WHERE place.osm_type = place_to_be_deleted.osm_type
|
||||
and place.osm_id = place_to_be_deleted.osm_id
|
||||
and place.class = place_to_be_deleted.class
|
||||
and place.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
ALTER TABLE place ENABLE TRIGGER place_before_delete;
|
||||
|
||||
-- Mark for delete in the placex table
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
|
||||
-- Mark for delete in interpolations
|
||||
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE place_to_be_deleted.osm_type = 'W'
|
||||
and place_to_be_deleted.class = 'place'
|
||||
and place_to_be_deleted.type = 'houses'
|
||||
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
|
||||
and not deferred;
|
||||
|
||||
-- Clear todo list.
|
||||
TRUNCATE TABLE place_to_be_deleted;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
@@ -23,6 +23,10 @@ CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
|
||||
USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
-- Index is needed during import but can be dropped as soon as a full
|
||||
-- geometry index is in place. The partial index is almost as big as the full
|
||||
-- index.
|
||||
DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
|
||||
ON placex USING gist (geometry) {{db.tablespace.search_index}}
|
||||
@@ -30,6 +34,13 @@ CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
|
||||
AND rank_address between 4 and 25 AND type != 'postcode'
|
||||
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
|
||||
---
|
||||
-- used in reverse large area lookup
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
|
||||
ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
|
||||
{{db.tablespace.search_index}}
|
||||
WHERE rank_address between 4 and 25 AND type != 'postcode'
|
||||
AND name is not null AND linked_place_id is null AND osm_type = 'N';
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id
|
||||
ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}
|
||||
WHERE parent_place_id is not null;
|
||||
|
||||
@@ -190,7 +190,6 @@ CREATE INDEX idx_placex_geometry_buildings ON placex
|
||||
|
||||
-- Usage: - linking of similar named places to boundaries
|
||||
-- - linking of place nodes with same type to boundaries
|
||||
-- - lookupPolygon()
|
||||
CREATE INDEX idx_placex_geometry_placenode ON placex
|
||||
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
|
||||
WHERE osm_type = 'N' and rank_search < 26
|
||||
@@ -299,7 +298,15 @@ CREATE TABLE IF NOT EXISTS wikipedia_redirect (
|
||||
|
||||
-- osm2pgsql does not create indexes on the middle tables for Nominatim
|
||||
-- Add one for lookup of associated street relations.
|
||||
CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts) WHERE tags @> ARRAY['associatedStreet'];
|
||||
{% if db.middle_db_format == '1' %}
|
||||
CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts)
|
||||
{{db.tablespace.address_index}}
|
||||
WHERE tags @> ARRAY['associatedStreet'];
|
||||
{% else %}
|
||||
CREATE INDEX planet_osm_rels_relation_members_idx ON planet_osm_rels USING gin(planet_osm_member_ids(members, 'R'::character(1)))
|
||||
WITH (fastupdate=off)
|
||||
{{db.tablespace.address_index}};
|
||||
{% endif %}
|
||||
|
||||
-- Needed for lookups if a node is part of an interpolation.
|
||||
CREATE INDEX IF NOT EXISTS idx_place_interpolations
|
||||
|
||||
@@ -41,10 +41,17 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'street' is not null or info->>'place' is null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'street' is not null;
|
||||
SELECT info->>'street' is not null and info->>'street' != '{}';
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS word;
|
||||
CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text NOT NULL,
|
||||
type text NOT NULL,
|
||||
word text,
|
||||
info jsonb
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE INDEX idx_word_word_token ON word
|
||||
USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||
-- Used when updating country names from the boundary relation.
|
||||
CREATE INDEX idx_word_country_names ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'C';
|
||||
-- Used when inserting new postcodes on updates.
|
||||
CREATE INDEX idx_word_postcodes ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'P';
|
||||
-- Used when inserting full words.
|
||||
CREATE INDEX idx_word_full_word ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'W';
|
||||
-- Used when inserting analyzed housenumbers (exclude old-style entries).
|
||||
CREATE INDEX idx_word_housenumbers ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'H' and word is not null;
|
||||
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_word;
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
|
||||
@@ -41,10 +41,17 @@ AS $$
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'street' is not null or info->>'place_search' is null;
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
|
||||
RETURNS BOOLEAN
|
||||
AS $$
|
||||
SELECT info->>'street' is not null;
|
||||
SELECT info->>'street' is not null and info->>'street' != '{}';
|
||||
$$ LANGUAGE SQL IMMUTABLE;
|
||||
|
||||
|
||||
@@ -340,7 +347,7 @@ BEGIN
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- consider parts before an opening braket a full word as well
|
||||
-- consider parts before an opening bracket a full word as well
|
||||
words := regexp_split_to_array(value, E'[(]');
|
||||
IF array_upper(words, 1) > 1 THEN
|
||||
s := make_standard_name(words[1]);
|
||||
|
||||
@@ -7,6 +7,6 @@ sys.path.append('@PROJECT_SOURCE_DIR@')
|
||||
from nominatim.cli import get_set_parser
|
||||
|
||||
def get_parser():
|
||||
parser = get_set_parser(phpcgi_path='@PHPCGI_BIN@')
|
||||
parser = get_set_parser()
|
||||
|
||||
return parser.parser
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# just use the pgxs makefile
|
||||
|
||||
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "15" "14" "13" "12" "11" "10" "9.6")
|
||||
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "16" "15" "14" "13" "12" "11" "10" "9.6")
|
||||
list(APPEND PG_CONFIG_HINTS
|
||||
"/usr/pgsql-${suffix}/bin")
|
||||
endforeach()
|
||||
|
||||
@@ -11,10 +11,12 @@
|
||||
#include "mb/pg_wchar.h"
|
||||
#include <utfasciitable.h>
|
||||
|
||||
#ifdef PG_MODULE_MAGIC
|
||||
PG_MODULE_MAGIC;
|
||||
#if PG_MAJORVERSION_NUM > 15
|
||||
#include "varatt.h"
|
||||
#endif
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
Datum transliteration( PG_FUNCTION_ARGS );
|
||||
Datum gettokenstring( PG_FUNCTION_ARGS );
|
||||
void str_replace(char* buffer, int* len, int* changes, char* from, int fromlen, char* to, int tolen, int);
|
||||
|
||||
38
nominatim/api/__init__.py
Normal file
38
nominatim/api/__init__.py
Normal file
@@ -0,0 +1,38 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
The public interface of the Nominatim library.
|
||||
|
||||
Classes and functions defined in this file are considered stable. Always
|
||||
import from this file, not from the source files directly.
|
||||
"""
|
||||
|
||||
# See also https://github.com/PyCQA/pylint/issues/6006
|
||||
# pylint: disable=useless-import-alias
|
||||
|
||||
from .core import (NominatimAPI as NominatimAPI,
|
||||
NominatimAPIAsync as NominatimAPIAsync)
|
||||
from .connection import (SearchConnection as SearchConnection)
|
||||
from .status import (StatusResult as StatusResult)
|
||||
from .types import (PlaceID as PlaceID,
|
||||
OsmID as OsmID,
|
||||
PlaceRef as PlaceRef,
|
||||
Point as Point,
|
||||
Bbox as Bbox,
|
||||
GeometryFormat as GeometryFormat,
|
||||
DataLayer as DataLayer)
|
||||
from .results import (SourceTable as SourceTable,
|
||||
AddressLine as AddressLine,
|
||||
AddressLines as AddressLines,
|
||||
WordInfo as WordInfo,
|
||||
WordInfos as WordInfos,
|
||||
DetailedResult as DetailedResult,
|
||||
ReverseResult as ReverseResult,
|
||||
ReverseResults as ReverseResults,
|
||||
SearchResult as SearchResult,
|
||||
SearchResults as SearchResults)
|
||||
from .localization import (Locales as Locales)
|
||||
149
nominatim/api/connection.py
Normal file
149
nominatim/api/connection.py
Normal file
@@ -0,0 +1,149 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Extended SQLAlchemy connection class that also includes access to the schema.
|
||||
"""
|
||||
from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
|
||||
Awaitable, Callable, TypeVar
|
||||
import asyncio
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.asyncio import AsyncConnection
|
||||
|
||||
from nominatim.typing import SaFromClause
|
||||
from nominatim.db.sqlalchemy_schema import SearchTables
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
from nominatim.api.logging import log
|
||||
|
||||
T = TypeVar('T')
|
||||
|
||||
class SearchConnection:
|
||||
""" An extended SQLAlchemy connection class, that also contains
|
||||
then table definitions. The underlying asynchronous SQLAlchemy
|
||||
connection can be accessed with the 'connection' property.
|
||||
The 't' property is the collection of Nominatim tables.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: AsyncConnection,
|
||||
tables: SearchTables,
|
||||
properties: Dict[str, Any]) -> None:
|
||||
self.connection = conn
|
||||
self.t = tables # pylint: disable=invalid-name
|
||||
self._property_cache = properties
|
||||
self._classtables: Optional[Set[str]] = None
|
||||
self.query_timeout: Optional[int] = None
|
||||
|
||||
|
||||
def set_query_timeout(self, timeout: Optional[int]) -> None:
|
||||
""" Set the timeout after which a query over this connection
|
||||
is cancelled.
|
||||
"""
|
||||
self.query_timeout = timeout
|
||||
|
||||
|
||||
async def scalar(self, sql: sa.sql.base.Executable,
|
||||
params: Union[Mapping[str, Any], None] = None
|
||||
) -> Any:
|
||||
""" Execute a 'scalar()' query on the connection.
|
||||
"""
|
||||
log().sql(self.connection, sql, params)
|
||||
return await asyncio.wait_for(self.connection.scalar(sql, params), self.query_timeout)
|
||||
|
||||
|
||||
async def execute(self, sql: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None] = None
|
||||
) -> 'sa.Result[Any]':
|
||||
""" Execute a 'execute()' query on the connection.
|
||||
"""
|
||||
log().sql(self.connection, sql, params)
|
||||
return await asyncio.wait_for(self.connection.execute(sql, params), self.query_timeout)
|
||||
|
||||
|
||||
async def get_property(self, name: str, cached: bool = True) -> str:
|
||||
""" Get a property from Nominatim's property table.
|
||||
|
||||
Property values are normally cached so that they are only
|
||||
retrieved from the database when they are queried for the
|
||||
first time with this function. Set 'cached' to False to force
|
||||
reading the property from the database.
|
||||
|
||||
Raises a ValueError if the property does not exist.
|
||||
"""
|
||||
lookup_name = f'DBPROP:{name}'
|
||||
|
||||
if cached and lookup_name in self._property_cache:
|
||||
return cast(str, self._property_cache[lookup_name])
|
||||
|
||||
sql = sa.select(self.t.properties.c.value)\
|
||||
.where(self.t.properties.c.property == name)
|
||||
value = await self.connection.scalar(sql)
|
||||
|
||||
if value is None:
|
||||
raise ValueError(f"Property '{name}' not found in database.")
|
||||
|
||||
self._property_cache[lookup_name] = cast(str, value)
|
||||
|
||||
return cast(str, value)
|
||||
|
||||
|
||||
async def get_db_property(self, name: str) -> Any:
|
||||
""" Get a setting from the database. At the moment, only
|
||||
'server_version', the version of the database software, can
|
||||
be retrieved with this function.
|
||||
|
||||
Raises a ValueError if the property does not exist.
|
||||
"""
|
||||
if name != 'server_version':
|
||||
raise ValueError(f"DB setting '{name}' not found in database.")
|
||||
|
||||
return self._property_cache['DB:server_version']
|
||||
|
||||
|
||||
async def get_cached_value(self, group: str, name: str,
|
||||
factory: Callable[[], Awaitable[T]]) -> T:
|
||||
""" Access the cache for this Nominatim instance.
|
||||
Each cache value needs to belong to a group and have a name.
|
||||
This function is for internal API use only.
|
||||
|
||||
`factory` is an async callback function that produces
|
||||
the value if it is not already cached.
|
||||
|
||||
Returns the cached value or the result of factory (also caching
|
||||
the result).
|
||||
"""
|
||||
full_name = f'{group}:{name}'
|
||||
|
||||
if full_name in self._property_cache:
|
||||
return cast(T, self._property_cache[full_name])
|
||||
|
||||
value = await factory()
|
||||
self._property_cache[full_name] = value
|
||||
|
||||
return value
|
||||
|
||||
|
||||
async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
|
||||
""" Lookup up if there is a classtype table for the given category
|
||||
and return a SQLAlchemy table for it, if it exists.
|
||||
"""
|
||||
if self._classtables is None:
|
||||
res = await self.execute(sa.text("""SELECT tablename FROM pg_tables
|
||||
WHERE tablename LIKE 'place_classtype_%'
|
||||
"""))
|
||||
self._classtables = {r[0] for r in res}
|
||||
|
||||
tablename = f"place_classtype_{cls}_{typ}"
|
||||
|
||||
if tablename not in self._classtables:
|
||||
return None
|
||||
|
||||
if tablename in self.t.meta.tables:
|
||||
return self.t.meta.tables[tablename]
|
||||
|
||||
return sa.Table(tablename, self.t.meta,
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('centroid', Geometry))
|
||||
974
nominatim/api/core.py
Normal file
974
nominatim/api/core.py
Normal file
@@ -0,0 +1,974 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of classes for API access via libraries.
|
||||
"""
|
||||
from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
|
||||
import asyncio
|
||||
import sys
|
||||
import contextlib
|
||||
from pathlib import Path
|
||||
|
||||
import sqlalchemy as sa
|
||||
import sqlalchemy.ext.asyncio as sa_asyncio
|
||||
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.db.sqlalchemy_schema import SearchTables
|
||||
from nominatim.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
|
||||
import nominatim.db.sqlite_functions
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.status import get_status, StatusResult
|
||||
from nominatim.api.lookup import get_detailed_place, get_simple_place
|
||||
from nominatim.api.reverse import ReverseGeocoder
|
||||
from nominatim.api.search import ForwardGeocoder, Phrase, PhraseType, make_query_analyzer
|
||||
import nominatim.api.types as ntyp
|
||||
from nominatim.api.results import DetailedResult, ReverseResult, SearchResults
|
||||
|
||||
|
||||
class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
|
||||
""" The main frontend to the Nominatim database implements the
|
||||
functions for lookup, forward and reverse geocoding using
|
||||
asynchronous functions.
|
||||
|
||||
This class shares most of the functions with its synchronous
|
||||
version. There are some additional functions or parameters,
|
||||
which are documented below.
|
||||
"""
|
||||
def __init__(self, project_dir: Path,
|
||||
environ: Optional[Mapping[str, str]] = None,
|
||||
loop: Optional[asyncio.AbstractEventLoop] = None) -> None:
|
||||
""" Initiate a new frontend object with synchronous API functions.
|
||||
|
||||
Parameters:
|
||||
project_dir: Path to the
|
||||
[project directory](../admin/Import.md#creating-the-project-directory)
|
||||
of the local Nominatim installation.
|
||||
environ: Mapping of [configuration parameters](../customize/Settings.md).
|
||||
When set, replaces any configuration via environment variables.
|
||||
Settings in this mapping also have precedence over any
|
||||
parameters found in the `.env` file of the project directory.
|
||||
loop: The asyncio event loop that will be used when calling
|
||||
functions. Only needed, when a custom event loop is used
|
||||
and the Python version is 3.9 or earlier.
|
||||
"""
|
||||
self.config = Configuration(project_dir, environ)
|
||||
self.query_timeout = self.config.get_int('QUERY_TIMEOUT') \
|
||||
if self.config.QUERY_TIMEOUT else None
|
||||
self.reverse_restrict_to_country_area = self.config.get_bool('SEARCH_WITHIN_COUNTRIES')
|
||||
self.server_version = 0
|
||||
|
||||
if sys.version_info >= (3, 10):
|
||||
self._engine_lock = asyncio.Lock()
|
||||
else:
|
||||
self._engine_lock = asyncio.Lock(loop=loop) # pylint: disable=unexpected-keyword-arg
|
||||
self._engine: Optional[sa_asyncio.AsyncEngine] = None
|
||||
self._tables: Optional[SearchTables] = None
|
||||
self._property_cache: Dict[str, Any] = {'DB:server_version': 0}
|
||||
|
||||
|
||||
async def setup_database(self) -> None:
|
||||
""" Set up the SQL engine and connections.
|
||||
|
||||
This function will be implicitly called when the database is
|
||||
accessed for the first time. You may also call it explicitly to
|
||||
avoid that the first call is delayed by the setup.
|
||||
"""
|
||||
async with self._engine_lock:
|
||||
if self._engine:
|
||||
return
|
||||
|
||||
extra_args: Dict[str, Any] = {'future': True,
|
||||
'echo': self.config.get_bool('DEBUG_SQL')}
|
||||
|
||||
if self.config.get_int('API_POOL_SIZE') == 0:
|
||||
extra_args['poolclass'] = sa.pool.NullPool
|
||||
else:
|
||||
extra_args['poolclass'] = sa.pool.AsyncAdaptedQueuePool
|
||||
extra_args['max_overflow'] = 0
|
||||
extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
|
||||
|
||||
|
||||
is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
|
||||
|
||||
if is_sqlite:
|
||||
params = dict((p.split('=', 1)
|
||||
for p in self.config.DATABASE_DSN[7:].split(';')))
|
||||
dburl = sa.engine.URL.create('sqlite+aiosqlite',
|
||||
database=params.get('dbname'))
|
||||
|
||||
if not ('NOMINATIM_DATABASE_RW' in self.config.environ
|
||||
and self.config.get_bool('DATABASE_RW')) \
|
||||
and not Path(params.get('dbname', '')).is_file():
|
||||
raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
|
||||
else:
|
||||
dsn = self.config.get_database_params()
|
||||
query = {k: v for k, v in dsn.items()
|
||||
if k not in ('user', 'password', 'dbname', 'host', 'port')}
|
||||
|
||||
dburl = sa.engine.URL.create(
|
||||
f'postgresql+{PGCORE_LIB}',
|
||||
database=dsn.get('dbname'),
|
||||
username=dsn.get('user'),
|
||||
password=dsn.get('password'),
|
||||
host=dsn.get('host'),
|
||||
port=int(dsn['port']) if 'port' in dsn else None,
|
||||
query=query)
|
||||
|
||||
engine = sa_asyncio.create_async_engine(dburl, **extra_args)
|
||||
|
||||
if is_sqlite:
|
||||
server_version = 0
|
||||
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
|
||||
dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
|
||||
nominatim.db.sqlite_functions.install_custom_functions(dbapi_con)
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SELECT load_extension('mod_spatialite')")
|
||||
cursor.execute('SELECT SetDecimalPrecision(7)')
|
||||
dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
|
||||
else:
|
||||
try:
|
||||
async with engine.begin() as conn:
|
||||
result = await conn.scalar(sa.text('SHOW server_version_num'))
|
||||
server_version = int(result)
|
||||
if server_version >= 110000:
|
||||
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
|
||||
await conn.execute(sa.text(
|
||||
"SET max_parallel_workers_per_gather TO '0'"))
|
||||
except (PGCORE_ERROR, sa.exc.OperationalError):
|
||||
server_version = 0
|
||||
|
||||
if server_version >= 110000:
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_connect(dbapi_con: Any, _: Any) -> None:
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SET jit_above_cost TO '-1'")
|
||||
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
|
||||
|
||||
self._property_cache['DB:server_version'] = server_version
|
||||
|
||||
self._tables = SearchTables(sa.MetaData()) # pylint: disable=no-member
|
||||
self._engine = engine
|
||||
|
||||
|
||||
async def close(self) -> None:
|
||||
""" Close all active connections to the database. The NominatimAPIAsync
|
||||
object remains usable after closing. If a new API functions is
|
||||
called, new connections are created.
|
||||
"""
|
||||
if self._engine is not None:
|
||||
await self._engine.dispose()
|
||||
|
||||
|
||||
@contextlib.asynccontextmanager
|
||||
async def begin(self) -> AsyncIterator[SearchConnection]:
|
||||
""" Create a new connection with automatic transaction handling.
|
||||
|
||||
This function may be used to get low-level access to the database.
|
||||
Refer to the documentation of SQLAlchemy for details how to use
|
||||
the connection object.
|
||||
"""
|
||||
if self._engine is None:
|
||||
await self.setup_database()
|
||||
|
||||
assert self._engine is not None
|
||||
assert self._tables is not None
|
||||
|
||||
async with self._engine.begin() as conn:
|
||||
yield SearchConnection(conn, self._tables, self._property_cache)
|
||||
|
||||
|
||||
async def status(self) -> StatusResult:
|
||||
""" Return the status of the database.
|
||||
"""
|
||||
try:
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
status = await get_status(conn)
|
||||
except (PGCORE_ERROR, sa.exc.OperationalError):
|
||||
return StatusResult(700, 'Database connection failed')
|
||||
|
||||
return status
|
||||
|
||||
|
||||
async def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
|
||||
""" Get detailed information about a place in the database.
|
||||
|
||||
Returns None if there is no entry under the given ID.
|
||||
"""
|
||||
details = ntyp.LookupDetails.from_kwargs(params)
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await make_query_analyzer(conn)
|
||||
return await get_detailed_place(conn, place, details)
|
||||
|
||||
|
||||
async def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
|
||||
""" Get simple information about a list of places.
|
||||
|
||||
Returns a list of place information for all IDs that were found.
|
||||
"""
|
||||
details = ntyp.LookupDetails.from_kwargs(params)
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await make_query_analyzer(conn)
|
||||
return SearchResults(filter(None,
|
||||
[await get_simple_place(conn, p, details) for p in places]))
|
||||
|
||||
|
||||
async def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
|
||||
""" Find a place by its coordinates. Also known as reverse geocoding.
|
||||
|
||||
Returns the closest result that can be found or None if
|
||||
no place matches the given criteria.
|
||||
"""
|
||||
# The following negation handles NaN correctly. Don't change.
|
||||
if not abs(coord[0]) <= 180 or not abs(coord[1]) <= 90:
|
||||
# There are no results to be expected outside valid coordinates.
|
||||
return None
|
||||
|
||||
details = ntyp.ReverseDetails.from_kwargs(params)
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if details.keywords:
|
||||
await make_query_analyzer(conn)
|
||||
geocoder = ReverseGeocoder(conn, details,
|
||||
self.reverse_restrict_to_country_area)
|
||||
return await geocoder.lookup(coord)
|
||||
|
||||
|
||||
async def search(self, query: str, **params: Any) -> SearchResults:
|
||||
""" Find a place by free-text search. Also known as forward geocoding.
|
||||
"""
|
||||
query = query.strip()
|
||||
if not query:
|
||||
raise UsageError('Nothing to search for.')
|
||||
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params),
|
||||
self.config.get_int('REQUEST_TIMEOUT') \
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')]
|
||||
return await geocoder.lookup(phrases)
|
||||
|
||||
|
||||
# pylint: disable=too-many-arguments,too-many-branches
|
||||
async def search_address(self, amenity: Optional[str] = None,
|
||||
street: Optional[str] = None,
|
||||
city: Optional[str] = None,
|
||||
county: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
country: Optional[str] = None,
|
||||
postalcode: Optional[str] = None,
|
||||
**params: Any) -> SearchResults:
|
||||
""" Find an address using structured search.
|
||||
"""
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
details = ntyp.SearchDetails.from_kwargs(params)
|
||||
|
||||
phrases: List[Phrase] = []
|
||||
|
||||
if amenity:
|
||||
phrases.append(Phrase(PhraseType.AMENITY, amenity))
|
||||
if street:
|
||||
phrases.append(Phrase(PhraseType.STREET, street))
|
||||
if city:
|
||||
phrases.append(Phrase(PhraseType.CITY, city))
|
||||
if county:
|
||||
phrases.append(Phrase(PhraseType.COUNTY, county))
|
||||
if state:
|
||||
phrases.append(Phrase(PhraseType.STATE, state))
|
||||
if postalcode:
|
||||
phrases.append(Phrase(PhraseType.POSTCODE, postalcode))
|
||||
if country:
|
||||
phrases.append(Phrase(PhraseType.COUNTRY, country))
|
||||
|
||||
if not phrases:
|
||||
raise UsageError('Nothing to search for.')
|
||||
|
||||
if amenity or street:
|
||||
details.restrict_min_max_rank(26, 30)
|
||||
elif city:
|
||||
details.restrict_min_max_rank(13, 25)
|
||||
elif county:
|
||||
details.restrict_min_max_rank(10, 12)
|
||||
elif state:
|
||||
details.restrict_min_max_rank(5, 9)
|
||||
elif postalcode:
|
||||
details.restrict_min_max_rank(5, 11)
|
||||
else:
|
||||
details.restrict_min_max_rank(4, 4)
|
||||
|
||||
if 'layers' not in params:
|
||||
details.layers = ntyp.DataLayer.ADDRESS
|
||||
if amenity:
|
||||
details.layers |= ntyp.DataLayer.POI
|
||||
|
||||
geocoder = ForwardGeocoder(conn, details,
|
||||
self.config.get_int('REQUEST_TIMEOUT') \
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
return await geocoder.lookup(phrases)
|
||||
|
||||
|
||||
async def search_category(self, categories: List[Tuple[str, str]],
|
||||
near_query: Optional[str] = None,
|
||||
**params: Any) -> SearchResults:
|
||||
""" Find an object of a certain category near another place.
|
||||
The near place may either be given as an unstructured search
|
||||
query in itself or as coordinates.
|
||||
"""
|
||||
if not categories:
|
||||
return SearchResults()
|
||||
|
||||
details = ntyp.SearchDetails.from_kwargs(params)
|
||||
async with self.begin() as conn:
|
||||
conn.set_query_timeout(self.query_timeout)
|
||||
if near_query:
|
||||
phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')]
|
||||
else:
|
||||
phrases = []
|
||||
if details.keywords:
|
||||
await make_query_analyzer(conn)
|
||||
|
||||
geocoder = ForwardGeocoder(conn, details,
|
||||
self.config.get_int('REQUEST_TIMEOUT') \
|
||||
if self.config.REQUEST_TIMEOUT else None)
|
||||
return await geocoder.lookup_pois(categories, phrases)
|
||||
|
||||
|
||||
|
||||
class NominatimAPI:
|
||||
""" This class provides a thin synchronous wrapper around the asynchronous
|
||||
Nominatim functions. It creates its own event loop and runs each
|
||||
synchronous function call to completion using that loop.
|
||||
"""
|
||||
|
||||
def __init__(self, project_dir: Path,
|
||||
environ: Optional[Mapping[str, str]] = None) -> None:
|
||||
""" Initiate a new frontend object with synchronous API functions.
|
||||
|
||||
Parameters:
|
||||
project_dir: Path to the
|
||||
[project directory](../admin/Import.md#creating-the-project-directory)
|
||||
of the local Nominatim installation.
|
||||
environ: Mapping of [configuration parameters](../customize/Settings.md).
|
||||
When set, replaces any configuration via environment variables.
|
||||
Settings in this mapping also have precedence over any
|
||||
parameters found in the `.env` file of the project directory.
|
||||
"""
|
||||
self._loop = asyncio.new_event_loop()
|
||||
self._async_api = NominatimAPIAsync(project_dir, environ, loop=self._loop)
|
||||
|
||||
|
||||
def close(self) -> None:
|
||||
""" Close all active connections to the database.
|
||||
|
||||
This function also closes the asynchronous worker loop making
|
||||
the NominatimAPI object unusable.
|
||||
"""
|
||||
self._loop.run_until_complete(self._async_api.close())
|
||||
self._loop.close()
|
||||
|
||||
|
||||
@property
|
||||
def config(self) -> Configuration:
|
||||
""" Provide read-only access to the [configuration](#Configuration)
|
||||
used by the API.
|
||||
"""
|
||||
return self._async_api.config
|
||||
|
||||
def status(self) -> StatusResult:
|
||||
""" Return the status of the database as a dataclass object
|
||||
with the fields described below.
|
||||
|
||||
Returns:
|
||||
status(int): A status code as described on the status page.
|
||||
message(str): Either 'OK' or a human-readable message of the
|
||||
problem encountered.
|
||||
software_version(tuple): A tuple with the version of the
|
||||
Nominatim library consisting of (major, minor, patch, db-patch)
|
||||
version.
|
||||
database_version(tuple): A tuple with the version of the library
|
||||
which was used for the import or last migration.
|
||||
Also consists of (major, minor, patch, db-patch).
|
||||
data_updated(datetime): Timestamp with the age of the data.
|
||||
"""
|
||||
return self._loop.run_until_complete(self._async_api.status())
|
||||
|
||||
|
||||
def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
|
||||
""" Get detailed information about a place in the database.
|
||||
|
||||
The result is a dataclass object with the fields described below
|
||||
or `None` if the place could not be found in the database.
|
||||
|
||||
Parameters:
|
||||
place: Description of the place to look up. See
|
||||
[Place identification](Input-Parameter-Types.md#place-identification)
|
||||
for the various ways to reference a place.
|
||||
|
||||
Other parameters:
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
parent_place_id (Optional(int]): Internal ID of the parent of this
|
||||
place. Only meaning full for POI-like objects (places with a
|
||||
rank_address of 30).
|
||||
linked_place_id (Optional[int]): Internal ID of the place this object
|
||||
links to. When this ID is set then there is no guarantee that
|
||||
the rest of the result information is complete.
|
||||
admin_level (int): Value of the `admin_level` OSM tag. Only meaningful
|
||||
for administrative boundary objects.
|
||||
indexed_date (datetime): Timestamp when the place was last updated.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(self._async_api.details(place, **params))
|
||||
|
||||
|
||||
def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
|
||||
""" Get simple information about a list of places.
|
||||
|
||||
Returns a list of place information for all IDs that were found.
|
||||
Each result is a dataclass with the fields detailed below.
|
||||
|
||||
Parameters:
|
||||
places: List of descriptions of the place to look up. See
|
||||
[Place identification](Input-Parameter-Types.md#place-identification)
|
||||
for the various ways to reference a place.
|
||||
|
||||
Other parameters:
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(self._async_api.lookup(places, **params))
|
||||
|
||||
|
||||
def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
|
||||
""" Find a place by its coordinates. Also known as reverse geocoding.
|
||||
|
||||
Returns the closest result that can be found or `None` if
|
||||
no place matches the given criteria. The result is a dataclass
|
||||
with the fields as detailed below.
|
||||
|
||||
Parameters:
|
||||
coord: Coordinate to lookup the place for as a Point
|
||||
or a tuple (x, y). Must be in WGS84 projection.
|
||||
|
||||
Other parameters:
|
||||
max_rank (int): Highest address rank to return. Can be used to
|
||||
restrict search to streets or settlements.
|
||||
layers (enum): Defines the kind of data to take into account.
|
||||
See description of layers below. (Default: addresses and POIs)
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
distance (Optional[float]): Distance in degree from the input point.
|
||||
"""
|
||||
return self._loop.run_until_complete(self._async_api.reverse(coord, **params))
|
||||
|
||||
|
||||
def search(self, query: str, **params: Any) -> SearchResults:
|
||||
""" Find a place by free-text search. Also known as forward geocoding.
|
||||
|
||||
Parameters:
|
||||
query: Free-form text query searching for a place.
|
||||
|
||||
Other parameters:
|
||||
max_results (int): Maximum number of results to return. The
|
||||
actual number of results may be less. (Default: 10)
|
||||
min_rank (int): Lowest permissible rank for the result.
|
||||
For addressable places this is the minimum
|
||||
[address rank](../customize/Ranking.md#address-rank). For all
|
||||
other places the [search rank](../customize/Ranking.md#search-rank)
|
||||
is used.
|
||||
max_rank (int): Highest permissible rank for the result. See min_rank above.
|
||||
layers (enum): Defines the kind of data to take into account.
|
||||
See [layers section](Input-Parameter-Types.md#layers) for details.
|
||||
(Default: addresses and POIs)
|
||||
countries (list[str]): Restrict search to countries with the given
|
||||
ISO 3166-1 alpha-2 country code. An empty list (the default)
|
||||
disables this filter.
|
||||
excluded (list[int]): A list of internal IDs of places to exclude
|
||||
from the search.
|
||||
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
|
||||
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
|
||||
as a filter and return only results within the bounding box.
|
||||
near (Optional[Point]): Focus search around the given point and
|
||||
return results ordered by distance to the given point.
|
||||
near_radius (Optional[float]): Restrict results to results within
|
||||
the given distance in degrees of `near` point. Ignored, when
|
||||
`near` is not set.
|
||||
categories (list[tuple]): Restrict search to places of the given
|
||||
categories. The category is the main OSM tag assigned to each
|
||||
place. An empty list (the default) disables this filter.
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(
|
||||
self._async_api.search(query, **params))
|
||||
|
||||
|
||||
# pylint: disable=too-many-arguments
|
||||
def search_address(self, amenity: Optional[str] = None,
|
||||
street: Optional[str] = None,
|
||||
city: Optional[str] = None,
|
||||
county: Optional[str] = None,
|
||||
state: Optional[str] = None,
|
||||
country: Optional[str] = None,
|
||||
postalcode: Optional[str] = None,
|
||||
**params: Any) -> SearchResults:
|
||||
""" Find an address using structured search.
|
||||
|
||||
Parameters:
|
||||
amenity: Name of a POI.
|
||||
street: Street and optionally housenumber of the address. If the address
|
||||
does not have a street, then the place the housenumber references to.
|
||||
city: Postal city of the address.
|
||||
county: County equivalent of the address. Does not exist in all
|
||||
jurisdictions.
|
||||
state: State or province of the address.
|
||||
country: Country with its full name or its ISO 3166-1 alpha-2 country code.
|
||||
Do not use together with the country_code filter.
|
||||
postalcode: Post code or ZIP for the place.
|
||||
|
||||
Other parameters:
|
||||
max_results (int): Maximum number of results to return. The
|
||||
actual number of results may be less. (Default: 10)
|
||||
min_rank (int): Lowest permissible rank for the result.
|
||||
For addressable places this is the minimum
|
||||
[address rank](../customize/Ranking.md#address-rank). For all
|
||||
other places the [search rank](../customize/Ranking.md#search-rank)
|
||||
is used.
|
||||
max_rank (int): Highest permissible rank for the result. See min_rank above.
|
||||
layers (enum): Defines the kind of data to take into account.
|
||||
See [layers section](Input-Parameter-Types.md#layers) for details.
|
||||
(Default: addresses and POIs)
|
||||
countries (list[str]): Restrict search to countries with the given
|
||||
ISO 3166-1 alpha-2 country code. An empty list (the default)
|
||||
disables this filter. Do not use, when the country parameter
|
||||
is used.
|
||||
excluded (list[int]): A list of internal IDs of places to exclude
|
||||
from the search.
|
||||
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
|
||||
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
|
||||
as a filter and return only results within the bounding box.
|
||||
near (Optional[Point]): Focus search around the given point and
|
||||
return results ordered by distance to the given point.
|
||||
near_radius (Optional[float]): Restrict results to results within
|
||||
the given distance in degrees of `near` point. Ignored, when
|
||||
`near` is not set.
|
||||
categories (list[tuple]): Restrict search to places of the given
|
||||
categories. The category is the main OSM tag assigned to each
|
||||
place. An empty list (the default) disables this filter.
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(
|
||||
self._async_api.search_address(amenity, street, city, county,
|
||||
state, country, postalcode, **params))
|
||||
|
||||
|
||||
def search_category(self, categories: List[Tuple[str, str]],
|
||||
near_query: Optional[str] = None,
|
||||
**params: Any) -> SearchResults:
|
||||
""" Find an object of a certain category near another place.
|
||||
|
||||
The near place may either be given as an unstructured search
|
||||
query in itself or as a geographic area through the
|
||||
viewbox or near parameters.
|
||||
|
||||
Parameters:
|
||||
categories: Restrict search to places of the given
|
||||
categories. The category is the main OSM tag assigned to each
|
||||
place.
|
||||
near_query: Optional free-text query to define the are to
|
||||
restrict search to.
|
||||
|
||||
Other parameters:
|
||||
max_results (int): Maximum number of results to return. The
|
||||
actual number of results may be less. (Default: 10)
|
||||
min_rank (int): Lowest permissible rank for the result.
|
||||
For addressable places this is the minimum
|
||||
[address rank](../customize/Ranking.md#address-rank). For all
|
||||
other places the [search rank](../customize/Ranking.md#search-rank)
|
||||
is used.
|
||||
max_rank (int): Highest permissible rank for the result. See min_rank above.
|
||||
layers (enum): Defines the kind of data to take into account.
|
||||
See [layers section](Input-Parameter-Types.md#layers) for details.
|
||||
(Default: addresses and POIs)
|
||||
countries (list[str]): Restrict search to countries with the given
|
||||
ISO 3166-1 alpha-2 country code. An empty list (the default)
|
||||
disables this filter.
|
||||
excluded (list[int]): A list of internal IDs of places to exclude
|
||||
from the search.
|
||||
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
|
||||
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
|
||||
as a filter and return only results within the bounding box.
|
||||
near (Optional[Point]): Focus search around the given point and
|
||||
return results ordered by distance to the given point.
|
||||
near_radius (Optional[float]): Restrict results to results within
|
||||
the given distance in degrees of `near` point. Ignored, when
|
||||
`near` is not set.
|
||||
geometry_output (enum): Add the full geometry of the place to the result.
|
||||
Multiple formats may be selected. Note that geometries can become
|
||||
quite large. (Default: none)
|
||||
geometry_simplification (float): Simplification factor to use on
|
||||
the geometries before returning them. The factor expresses
|
||||
the tolerance in degrees from which the geometry may differ.
|
||||
Topology is preserved. (Default: 0.0)
|
||||
address_details (bool): Add detailed information about the places
|
||||
that make up the address of the requested object. (Default: False)
|
||||
linked_places (bool): Add detailed information about the places
|
||||
that link to the result. (Default: False)
|
||||
parented_places (bool): Add detailed information about all places
|
||||
for which the requested object is a parent, i.e. all places for
|
||||
which the object provides the address details.
|
||||
Only POI places can have parents. (Default: False)
|
||||
keywords (bool): Add detailed information about the search terms
|
||||
used for this place.
|
||||
|
||||
Returns:
|
||||
source_table (enum): Data source of the place. See below for possible values.
|
||||
category (tuple): A tuple of two strings with the primary OSM tag
|
||||
and value.
|
||||
centroid (Point): Point position of the place.
|
||||
place_id (Optional[int]): Internal ID of the place. This ID may differ
|
||||
for the same place between different installations.
|
||||
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
|
||||
names (Optional[dict]): Dictionary of names of the place. Keys are
|
||||
usually the corresponding OSM tag keys.
|
||||
address (Optional[dict]): Dictionary of address parts directly
|
||||
attributed to the place. Keys are usually the corresponding
|
||||
OSM tag keys with the `addr:` prefix removed.
|
||||
extratags (Optional[dict]): Dictionary of additional attributes for
|
||||
the place. Usually OSM tag keys and values.
|
||||
housenumber (Optional[str]): House number of the place, normalised
|
||||
for lookup. To get the house number in its original spelling,
|
||||
use `address['housenumber']`.
|
||||
postcode (Optional[str]): Computed postcode for the place. To get
|
||||
directly attributed postcodes, use `address['postcode']` instead.
|
||||
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
|
||||
The string has the format <language code>:<wikipedia title>.
|
||||
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
|
||||
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
|
||||
importance (Optional[float]): Relative importance of the place. This is a measure
|
||||
how likely the place will be searched for.
|
||||
country_code (Optional[str]): Country the feature is in as
|
||||
ISO 3166-1 alpha-2 country code.
|
||||
address_rows (Optional[AddressLines]): List of places that make up the
|
||||
computed address. `None` when `address_details` parameter was False.
|
||||
linked_rows (Optional[AddressLines]): List of places that link to the object.
|
||||
`None` when `linked_places` parameter was False.
|
||||
parented_rows (Optional[AddressLines]): List of direct children of the place.
|
||||
`None` when `parented_places` parameter was False.
|
||||
name_keywords (Optional[WordInfos]): List of search words for the name of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
address_keywords (Optional[WordInfos]): List of search word for the address of
|
||||
the place. `None` when `keywords` parameter is set to False.
|
||||
bbox (Bbox): Bounding box of the full geometry of the place.
|
||||
If the place is a single point, then the size of the bounding
|
||||
box is guessed according to the type of place.
|
||||
geometry (dict): Dictionary containing the full geometry of the place
|
||||
in the formats requested in the `geometry_output` parameter.
|
||||
"""
|
||||
return self._loop.run_until_complete(
|
||||
self._async_api.search_category(categories, near_query, **params))
|
||||
97
nominatim/api/localization.py
Normal file
97
nominatim/api/localization.py
Normal file
@@ -0,0 +1,97 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper functions for localizing names of results.
|
||||
"""
|
||||
from typing import Mapping, List, Optional
|
||||
|
||||
import re
|
||||
|
||||
class Locales:
|
||||
""" Helper class for localization of names.
|
||||
|
||||
It takes a list of language prefixes in their order of preferred
|
||||
usage.
|
||||
"""
|
||||
|
||||
def __init__(self, langs: Optional[List[str]] = None):
|
||||
self.languages = langs or []
|
||||
self.name_tags: List[str] = []
|
||||
|
||||
# Build the list of supported tags. It is currently hard-coded.
|
||||
self._add_lang_tags('name')
|
||||
self._add_tags('name', 'brand')
|
||||
self._add_lang_tags('official_name', 'short_name')
|
||||
self._add_tags('official_name', 'short_name', 'ref')
|
||||
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return len(self.languages) > 0
|
||||
|
||||
|
||||
def _add_tags(self, *tags: str) -> None:
|
||||
for tag in tags:
|
||||
self.name_tags.append(tag)
|
||||
self.name_tags.append(f"_place_{tag}")
|
||||
|
||||
|
||||
def _add_lang_tags(self, *tags: str) -> None:
|
||||
for tag in tags:
|
||||
for lang in self.languages:
|
||||
self.name_tags.append(f"{tag}:{lang}")
|
||||
self.name_tags.append(f"_place_{tag}:{lang}")
|
||||
|
||||
|
||||
def display_name(self, names: Optional[Mapping[str, str]]) -> str:
|
||||
""" Return the best matching name from a dictionary of names
|
||||
containing different name variants.
|
||||
|
||||
If 'names' is null or empty, an empty string is returned. If no
|
||||
appropriate localization is found, the first name is returned.
|
||||
"""
|
||||
if not names:
|
||||
return ''
|
||||
|
||||
if len(names) > 1:
|
||||
for tag in self.name_tags:
|
||||
if tag in names:
|
||||
return names[tag]
|
||||
|
||||
# Nothing? Return any of the other names as a default.
|
||||
return next(iter(names.values()))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_accept_languages(langstr: str) -> 'Locales':
|
||||
""" Create a localization object from a language list in the
|
||||
format of HTTP accept-languages header.
|
||||
|
||||
The functions tries to be forgiving of format errors by first splitting
|
||||
the string into comma-separated parts and then parsing each
|
||||
description separately. Badly formatted parts are then ignored.
|
||||
"""
|
||||
# split string into languages
|
||||
candidates = []
|
||||
for desc in langstr.split(','):
|
||||
m = re.fullmatch(r'\s*([a-z_-]+)(?:;\s*q\s*=\s*([01](?:\.\d+)?))?\s*',
|
||||
desc, flags=re.I)
|
||||
if m:
|
||||
candidates.append((m[1], float(m[2] or 1.0)))
|
||||
|
||||
# sort the results by the weight of each language (preserving order).
|
||||
candidates.sort(reverse=True, key=lambda e: e[1])
|
||||
|
||||
# If a language has a region variant, also add the language without
|
||||
# variant but only if it isn't already in the list to not mess up the weight.
|
||||
languages = []
|
||||
for lid, _ in candidates:
|
||||
languages.append(lid)
|
||||
parts = lid.split('-', 1)
|
||||
if len(parts) > 1 and all(c[0] != parts[0] for c in candidates):
|
||||
languages.append(parts[0])
|
||||
|
||||
return Locales(languages)
|
||||
433
nominatim/api/logging.py
Normal file
433
nominatim/api/logging.py
Normal file
@@ -0,0 +1,433 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Functions for specialised logging with HTML output.
|
||||
"""
|
||||
from typing import Any, Iterator, Optional, List, Tuple, cast, Union, Mapping, Sequence
|
||||
from contextvars import ContextVar
|
||||
import datetime as dt
|
||||
import textwrap
|
||||
import io
|
||||
import re
|
||||
import html
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.asyncio import AsyncConnection
|
||||
|
||||
try:
|
||||
from pygments import highlight
|
||||
from pygments.lexers import PythonLexer, PostgresLexer
|
||||
from pygments.formatters import HtmlFormatter
|
||||
CODE_HIGHLIGHT = True
|
||||
except ModuleNotFoundError:
|
||||
CODE_HIGHLIGHT = False
|
||||
|
||||
|
||||
def _debug_name(res: Any) -> str:
|
||||
if res.names:
|
||||
return cast(str, res.names.get('name', next(iter(res.names.values()))))
|
||||
|
||||
return f"Hnr {res.housenumber}" if res.housenumber is not None else '[NONE]'
|
||||
|
||||
|
||||
class BaseLogger:
|
||||
""" Interface for logging function.
|
||||
|
||||
The base implementation does nothing. Overwrite the functions
|
||||
in derived classes which implement logging functionality.
|
||||
"""
|
||||
def get_buffer(self) -> str:
|
||||
""" Return the current content of the log buffer.
|
||||
"""
|
||||
return ''
|
||||
|
||||
def function(self, func: str, **kwargs: Any) -> None:
|
||||
""" Start a new debug chapter for the given function and its parameters.
|
||||
"""
|
||||
|
||||
|
||||
def section(self, heading: str) -> None:
|
||||
""" Start a new section with the given title.
|
||||
"""
|
||||
|
||||
|
||||
def comment(self, text: str) -> None:
|
||||
""" Add a simple comment to the debug output.
|
||||
"""
|
||||
|
||||
|
||||
def var_dump(self, heading: str, var: Any) -> None:
|
||||
""" Print the content of the variable to the debug output prefixed by
|
||||
the given heading.
|
||||
"""
|
||||
|
||||
|
||||
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
|
||||
""" Print the table generated by the generator function.
|
||||
"""
|
||||
|
||||
|
||||
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
|
||||
""" Print a list of search results generated by the generator function.
|
||||
"""
|
||||
|
||||
|
||||
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
|
||||
""" Print the SQL for the given statement.
|
||||
"""
|
||||
|
||||
def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
extra_params: Union[Mapping[str, Any],
|
||||
Sequence[Mapping[str, Any]], None]) -> str:
|
||||
""" Return the compiled version of the statement.
|
||||
"""
|
||||
compiled = cast('sa.ClauseElement', statement).compile(conn.sync_engine)
|
||||
|
||||
params = dict(compiled.params)
|
||||
if isinstance(extra_params, Mapping):
|
||||
for k, v in extra_params.items():
|
||||
if hasattr(v, 'to_wkt'):
|
||||
params[k] = v.to_wkt()
|
||||
elif isinstance(v, (int, float)):
|
||||
params[k] = v
|
||||
else:
|
||||
params[k] = str(v)
|
||||
elif isinstance(extra_params, Sequence) and extra_params:
|
||||
for k in extra_params[0]:
|
||||
params[k] = f':{k}'
|
||||
|
||||
sqlstr = str(compiled)
|
||||
|
||||
if conn.dialect.name == 'postgresql':
|
||||
if sa.__version__.startswith('1'):
|
||||
try:
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
|
||||
return sqlstr % tuple((repr(params.get(name, None))
|
||||
for name in compiled.positiontup)) # type: ignore
|
||||
except TypeError:
|
||||
return sqlstr
|
||||
|
||||
# Fixes an odd issue with Python 3.7 where percentages are not
|
||||
# quoted correctly.
|
||||
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
|
||||
return sqlstr % params
|
||||
|
||||
assert conn.dialect.name == 'sqlite'
|
||||
|
||||
# params in positional order
|
||||
pparams = (repr(params.get(name, None)) for name in compiled.positiontup) # type: ignore
|
||||
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', '?', sqlstr)
|
||||
sqlstr = re.sub(r"\?", lambda m: next(pparams), sqlstr)
|
||||
|
||||
return sqlstr
|
||||
|
||||
class HTMLLogger(BaseLogger):
|
||||
""" Logger that formats messages in HTML.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.buffer = io.StringIO()
|
||||
|
||||
|
||||
def _timestamp(self) -> None:
|
||||
self._write(f'<p class="timestamp">[{dt.datetime.now()}]</p>')
|
||||
|
||||
|
||||
def get_buffer(self) -> str:
|
||||
return HTML_HEADER + self.buffer.getvalue() + HTML_FOOTER
|
||||
|
||||
|
||||
def function(self, func: str, **kwargs: Any) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"<h1>Debug output for {func}()</h1>\n<p>Parameters:<dl>")
|
||||
for name, value in kwargs.items():
|
||||
self._write(f'<dt>{name}</dt><dd>{self._python_var(value)}</dd>')
|
||||
self._write('</dl></p>')
|
||||
|
||||
|
||||
def section(self, heading: str) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"<h2>{heading}</h2>")
|
||||
|
||||
|
||||
def comment(self, text: str) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"<p>{text}</p>")
|
||||
|
||||
|
||||
def var_dump(self, heading: str, var: Any) -> None:
|
||||
self._timestamp()
|
||||
if callable(var):
|
||||
var = var()
|
||||
|
||||
self._write(f'<h5>{heading}</h5>{self._python_var(var)}')
|
||||
|
||||
|
||||
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
|
||||
self._timestamp()
|
||||
head = next(rows)
|
||||
assert head
|
||||
self._write(f'<table><thead><tr><th colspan="{len(head)}">{heading}</th></tr><tr>')
|
||||
for cell in head:
|
||||
self._write(f'<th>{cell}</th>')
|
||||
self._write('</tr></thead><tbody>')
|
||||
for row in rows:
|
||||
if row is not None:
|
||||
self._write('<tr>')
|
||||
for cell in row:
|
||||
self._write(f'<td>{cell}</td>')
|
||||
self._write('</tr>')
|
||||
self._write('</tbody></table>')
|
||||
|
||||
|
||||
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
|
||||
""" Print a list of search results generated by the generator function.
|
||||
"""
|
||||
self._timestamp()
|
||||
def format_osm(osm_object: Optional[Tuple[str, int]]) -> str:
|
||||
if not osm_object:
|
||||
return '-'
|
||||
|
||||
t, i = osm_object
|
||||
if t == 'N':
|
||||
fullt = 'node'
|
||||
elif t == 'W':
|
||||
fullt = 'way'
|
||||
elif t == 'R':
|
||||
fullt = 'relation'
|
||||
else:
|
||||
return f'{t}{i}'
|
||||
|
||||
return f'<a href="https://www.openstreetmap.org/{fullt}/{i}">{t}{i}</a>'
|
||||
|
||||
self._write(f'<h5>{heading}</h5><p><dl>')
|
||||
total = 0
|
||||
for rank, res in results:
|
||||
self._write(f'<dt>[{rank:.3f}]</dt> <dd>{res.source_table.name}(')
|
||||
self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
|
||||
self._write(f"rank={res.rank_address}, ")
|
||||
self._write(f"osm={format_osm(res.osm_object)}, ")
|
||||
self._write(f'cc={res.country_code}, ')
|
||||
self._write(f'importance={res.importance or float("nan"):.5f})</dd>')
|
||||
total += 1
|
||||
self._write(f'</dl><b>TOTAL:</b> {total}</p>')
|
||||
|
||||
|
||||
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
|
||||
self._timestamp()
|
||||
sqlstr = self.format_sql(conn, statement, params)
|
||||
if CODE_HIGHLIGHT:
|
||||
sqlstr = highlight(sqlstr, PostgresLexer(),
|
||||
HtmlFormatter(nowrap=True, lineseparator='<br />'))
|
||||
self._write(f'<div class="highlight"><code class="lang-sql">{sqlstr}</code></div>')
|
||||
else:
|
||||
self._write(f'<code class="lang-sql">{html.escape(sqlstr)}</code>')
|
||||
|
||||
|
||||
def _python_var(self, var: Any) -> str:
|
||||
if CODE_HIGHLIGHT:
|
||||
fmt = highlight(str(var), PythonLexer(), HtmlFormatter(nowrap=True))
|
||||
return f'<div class="highlight"><code class="lang-python">{fmt}</code></div>'
|
||||
|
||||
return f'<code class="lang-python">{html.escape(str(var))}</code>'
|
||||
|
||||
|
||||
def _write(self, text: str) -> None:
|
||||
""" Add the raw text to the debug output.
|
||||
"""
|
||||
self.buffer.write(text)
|
||||
|
||||
|
||||
class TextLogger(BaseLogger):
|
||||
""" Logger creating output suitable for the console.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.buffer = io.StringIO()
|
||||
|
||||
|
||||
def _timestamp(self) -> None:
|
||||
self._write(f'[{dt.datetime.now()}]\n')
|
||||
|
||||
|
||||
def get_buffer(self) -> str:
|
||||
return self.buffer.getvalue()
|
||||
|
||||
|
||||
def function(self, func: str, **kwargs: Any) -> None:
|
||||
self._write(f"#### Debug output for {func}()\n\nParameters:\n")
|
||||
for name, value in kwargs.items():
|
||||
self._write(f' {name}: {self._python_var(value)}\n')
|
||||
self._write('\n')
|
||||
|
||||
|
||||
def section(self, heading: str) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"\n# {heading}\n\n")
|
||||
|
||||
|
||||
def comment(self, text: str) -> None:
|
||||
self._write(f"{text}\n")
|
||||
|
||||
|
||||
def var_dump(self, heading: str, var: Any) -> None:
|
||||
if callable(var):
|
||||
var = var()
|
||||
|
||||
self._write(f'{heading}:\n {self._python_var(var)}\n\n')
|
||||
|
||||
|
||||
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
|
||||
self._write(f'{heading}:\n')
|
||||
data = [list(map(self._python_var, row)) if row else None for row in rows]
|
||||
assert data[0] is not None
|
||||
num_cols = len(data[0])
|
||||
|
||||
maxlens = [max(len(d[i]) for d in data if d) for i in range(num_cols)]
|
||||
tablewidth = sum(maxlens) + 3 * num_cols + 1
|
||||
row_format = '| ' +' | '.join(f'{{:<{l}}}' for l in maxlens) + ' |\n'
|
||||
self._write('-'*tablewidth + '\n')
|
||||
self._write(row_format.format(*data[0]))
|
||||
self._write('-'*tablewidth + '\n')
|
||||
for row in data[1:]:
|
||||
if row:
|
||||
self._write(row_format.format(*row))
|
||||
else:
|
||||
self._write('-'*tablewidth + '\n')
|
||||
if data[-1]:
|
||||
self._write('-'*tablewidth + '\n')
|
||||
|
||||
|
||||
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
|
||||
self._timestamp()
|
||||
self._write(f'{heading}:\n')
|
||||
total = 0
|
||||
for rank, res in results:
|
||||
self._write(f'[{rank:.3f}] {res.source_table.name}(')
|
||||
self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
|
||||
self._write(f"rank={res.rank_address}, ")
|
||||
self._write(f"osm={''.join(map(str, res.osm_object or []))}, ")
|
||||
self._write(f'cc={res.country_code}, ')
|
||||
self._write(f'importance={res.importance or -1:.5f})\n')
|
||||
total += 1
|
||||
self._write(f'TOTAL: {total}\n\n')
|
||||
|
||||
|
||||
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
|
||||
self._timestamp()
|
||||
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
|
||||
self._write(f"| {sqlstr}\n\n")
|
||||
|
||||
|
||||
def _python_var(self, var: Any) -> str:
|
||||
return str(var)
|
||||
|
||||
|
||||
def _write(self, text: str) -> None:
|
||||
self.buffer.write(text)
|
||||
|
||||
|
||||
logger: ContextVar[BaseLogger] = ContextVar('logger', default=BaseLogger())
|
||||
|
||||
|
||||
def set_log_output(fmt: str) -> None:
|
||||
""" Enable collecting debug information.
|
||||
"""
|
||||
if fmt == 'html':
|
||||
logger.set(HTMLLogger())
|
||||
elif fmt == 'text':
|
||||
logger.set(TextLogger())
|
||||
else:
|
||||
logger.set(BaseLogger())
|
||||
|
||||
|
||||
def log() -> BaseLogger:
|
||||
""" Return the logger for the current context.
|
||||
"""
|
||||
return logger.get()
|
||||
|
||||
|
||||
def get_and_disable() -> str:
|
||||
""" Return the current content of the debug buffer and disable logging.
|
||||
"""
|
||||
buf = logger.get().get_buffer()
|
||||
logger.set(BaseLogger())
|
||||
return buf
|
||||
|
||||
|
||||
HTML_HEADER: str = """<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<title>Nominatim - Debug</title>
|
||||
<style>
|
||||
""" + \
|
||||
(HtmlFormatter(nobackground=True).get_style_defs('.highlight') if CODE_HIGHLIGHT else '') +\
|
||||
"""
|
||||
h2 { font-size: x-large }
|
||||
|
||||
dl {
|
||||
padding-left: 10pt;
|
||||
font-family: monospace
|
||||
}
|
||||
|
||||
dt {
|
||||
float: left;
|
||||
font-weight: bold;
|
||||
margin-right: 0.5em
|
||||
}
|
||||
|
||||
dt::after { content: ": "; }
|
||||
|
||||
dd::after {
|
||||
clear: left;
|
||||
display: block
|
||||
}
|
||||
|
||||
.lang-sql {
|
||||
color: #555;
|
||||
font-size: small
|
||||
}
|
||||
|
||||
h5 {
|
||||
border: solid lightgrey 0.1pt;
|
||||
margin-bottom: 0;
|
||||
background-color: #f7f7f7
|
||||
}
|
||||
|
||||
h5 + .highlight {
|
||||
padding: 3pt;
|
||||
border: solid lightgrey 0.1pt
|
||||
}
|
||||
|
||||
table, th, tbody {
|
||||
border: thin solid;
|
||||
border-collapse: collapse;
|
||||
}
|
||||
td {
|
||||
border-right: thin solid;
|
||||
padding-left: 3pt;
|
||||
padding-right: 3pt;
|
||||
}
|
||||
|
||||
.timestamp {
|
||||
font-size: 0.8em;
|
||||
color: darkblue;
|
||||
width: calc(100% - 5pt);
|
||||
text-align: right;
|
||||
position: absolute;
|
||||
left: 0;
|
||||
margin-top: -5px;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
"""
|
||||
|
||||
HTML_FOOTER: str = "</body></html>"
|
||||
251
nominatim/api/lookup.py
Normal file
251
nominatim/api/lookup.py
Normal file
@@ -0,0 +1,251 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of place lookup by ID.
|
||||
"""
|
||||
from typing import Optional, Callable, Tuple, Type
|
||||
import datetime as dt
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaColumn, SaRow, SaSelect
|
||||
from nominatim.api.connection import SearchConnection
|
||||
import nominatim.api.types as ntyp
|
||||
import nominatim.api.results as nres
|
||||
from nominatim.api.logging import log
|
||||
|
||||
RowFunc = Callable[[Optional[SaRow], Type[nres.BaseResultT]], Optional[nres.BaseResultT]]
|
||||
|
||||
GeomFunc = Callable[[SaSelect, SaColumn], SaSelect]
|
||||
|
||||
|
||||
|
||||
async def find_in_placex(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc) -> Optional[SaRow]:
|
||||
""" Search for the given place in the placex table and return the
|
||||
base information.
|
||||
"""
|
||||
log().section("Find in placex table")
|
||||
t = conn.t.placex
|
||||
sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.admin_level,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.importance, t.c.wikipedia, t.c.indexed_date,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
t.c.linked_place_id,
|
||||
t.c.geometry.ST_Expand(0).label('bbox'),
|
||||
t.c.centroid)
|
||||
|
||||
if isinstance(place, ntyp.PlaceID):
|
||||
sql = sql.where(t.c.place_id == place.place_id)
|
||||
elif isinstance(place, ntyp.OsmID):
|
||||
sql = sql.where(t.c.osm_type == place.osm_type)\
|
||||
.where(t.c.osm_id == place.osm_id)
|
||||
if place.osm_class:
|
||||
sql = sql.where(t.c.class_ == place.osm_class)
|
||||
else:
|
||||
sql = sql.order_by(t.c.class_)
|
||||
sql = sql.limit(1)
|
||||
else:
|
||||
return None
|
||||
|
||||
return (await conn.execute(add_geometries(sql, t.c.geometry))).one_or_none()
|
||||
|
||||
|
||||
async def find_in_osmline(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc) -> Optional[SaRow]:
|
||||
""" Search for the given place in the osmline table and return the
|
||||
base information.
|
||||
"""
|
||||
log().section("Find in interpolation table")
|
||||
t = conn.t.osmline
|
||||
sql = sa.select(t.c.place_id, t.c.osm_id, t.c.parent_place_id,
|
||||
t.c.indexed_date, t.c.startnumber, t.c.endnumber,
|
||||
t.c.step, t.c.address, t.c.postcode, t.c.country_code,
|
||||
t.c.linegeo.ST_Centroid().label('centroid'))
|
||||
|
||||
if isinstance(place, ntyp.PlaceID):
|
||||
sql = sql.where(t.c.place_id == place.place_id)
|
||||
elif isinstance(place, ntyp.OsmID) and place.osm_type == 'W':
|
||||
# There may be multiple interpolations for a single way.
|
||||
# If 'class' contains a number, return the one that belongs to that number.
|
||||
sql = sql.where(t.c.osm_id == place.osm_id).limit(1)
|
||||
if place.osm_class and place.osm_class.isdigit():
|
||||
sql = sql.order_by(sa.func.greatest(0,
|
||||
int(place.osm_class) - t.c.endnumber,
|
||||
t.c.startnumber - int(place.osm_class)))
|
||||
else:
|
||||
return None
|
||||
|
||||
return (await conn.execute(add_geometries(sql, t.c.linegeo))).one_or_none()
|
||||
|
||||
|
||||
async def find_in_tiger(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc) -> Optional[SaRow]:
|
||||
""" Search for the given place in the table of Tiger addresses and return
|
||||
the base information. Only lookup by place ID is supported.
|
||||
"""
|
||||
if not isinstance(place, ntyp.PlaceID):
|
||||
return None
|
||||
|
||||
log().section("Find in TIGER table")
|
||||
t = conn.t.tiger
|
||||
parent = conn.t.placex
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
parent.c.osm_type, parent.c.osm_id,
|
||||
t.c.startnumber, t.c.endnumber, t.c.step,
|
||||
t.c.postcode,
|
||||
t.c.linegeo.ST_Centroid().label('centroid'))\
|
||||
.where(t.c.place_id == place.place_id)\
|
||||
.join(parent, t.c.parent_place_id == parent.c.place_id, isouter=True)
|
||||
|
||||
return (await conn.execute(add_geometries(sql, t.c.linegeo))).one_or_none()
|
||||
|
||||
|
||||
async def find_in_postcode(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc) -> Optional[SaRow]:
|
||||
""" Search for the given place in the postcode table and return the
|
||||
base information. Only lookup by place ID is supported.
|
||||
"""
|
||||
if not isinstance(place, ntyp.PlaceID):
|
||||
return None
|
||||
|
||||
log().section("Find in postcode table")
|
||||
t = conn.t.postcode
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.rank_search, t.c.rank_address,
|
||||
t.c.indexed_date, t.c.postcode, t.c.country_code,
|
||||
t.c.geometry.label('centroid')) \
|
||||
.where(t.c.place_id == place.place_id)
|
||||
|
||||
return (await conn.execute(add_geometries(sql, t.c.geometry))).one_or_none()
|
||||
|
||||
|
||||
async def find_in_all_tables(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
add_geometries: GeomFunc
|
||||
) -> Tuple[Optional[SaRow], RowFunc[nres.BaseResultT]]:
|
||||
""" Search for the given place in all data tables
|
||||
and return the base information.
|
||||
"""
|
||||
row = await find_in_placex(conn, place, add_geometries)
|
||||
log().var_dump('Result (placex)', row)
|
||||
if row is not None:
|
||||
return row, nres.create_from_placex_row
|
||||
|
||||
row = await find_in_osmline(conn, place, add_geometries)
|
||||
log().var_dump('Result (osmline)', row)
|
||||
if row is not None:
|
||||
return row, nres.create_from_osmline_row
|
||||
|
||||
row = await find_in_postcode(conn, place, add_geometries)
|
||||
log().var_dump('Result (postcode)', row)
|
||||
if row is not None:
|
||||
return row, nres.create_from_postcode_row
|
||||
|
||||
row = await find_in_tiger(conn, place, add_geometries)
|
||||
log().var_dump('Result (tiger)', row)
|
||||
return row, nres.create_from_tiger_row
|
||||
|
||||
|
||||
async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
details: ntyp.LookupDetails) -> Optional[nres.DetailedResult]:
|
||||
""" Retrieve a place with additional details from the database.
|
||||
"""
|
||||
log().function('get_detailed_place', place=place, details=details)
|
||||
|
||||
if details.geometry_output and details.geometry_output != ntyp.GeometryFormat.GEOJSON:
|
||||
raise ValueError("lookup only supports geojosn polygon output.")
|
||||
|
||||
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
|
||||
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
|
||||
return sql.add_columns(sa.func.ST_AsGeoJSON(
|
||||
sa.case((sa.func.ST_NPoints(column) > 5000,
|
||||
sa.func.ST_SimplifyPreserveTopology(column, 0.0001)),
|
||||
else_=column), 7).label('geometry_geojson'))
|
||||
else:
|
||||
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
|
||||
return sql.add_columns(sa.func.ST_GeometryType(column).label('geometry_type'))
|
||||
|
||||
row_func: RowFunc[nres.DetailedResult]
|
||||
row, row_func = await find_in_all_tables(conn, place, _add_geometry)
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
result = row_func(row, nres.DetailedResult)
|
||||
assert result is not None
|
||||
|
||||
# add missing details
|
||||
assert result is not None
|
||||
if 'type' in result.geometry:
|
||||
result.geometry['type'] = GEOMETRY_TYPE_MAP.get(result.geometry['type'],
|
||||
result.geometry['type'])
|
||||
indexed_date = getattr(row, 'indexed_date', None)
|
||||
if indexed_date is not None:
|
||||
result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
await nres.add_result_details(conn, [result], details)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
details: ntyp.LookupDetails) -> Optional[nres.SearchResult]:
|
||||
""" Retrieve a place as a simple search result from the database.
|
||||
"""
|
||||
log().function('get_simple_place', place=place, details=details)
|
||||
|
||||
def _add_geometry(sql: SaSelect, col: SaColumn) -> SaSelect:
|
||||
if not details.geometry_output:
|
||||
return sql
|
||||
|
||||
out = []
|
||||
|
||||
if details.geometry_simplification > 0.0:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
|
||||
|
||||
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
|
||||
row_func: RowFunc[nres.SearchResult]
|
||||
row, row_func = await find_in_all_tables(conn, place, _add_geometry)
|
||||
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
result = row_func(row, nres.SearchResult)
|
||||
assert result is not None
|
||||
|
||||
# add missing details
|
||||
assert result is not None
|
||||
if hasattr(row, 'bbox'):
|
||||
result.bbox = ntyp.Bbox.from_wkb(row.bbox)
|
||||
|
||||
await nres.add_result_details(conn, [result], details)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
GEOMETRY_TYPE_MAP = {
|
||||
'POINT': 'ST_Point',
|
||||
'MULTIPOINT': 'ST_MultiPoint',
|
||||
'LINESTRING': 'ST_LineString',
|
||||
'MULTILINESTRING': 'ST_MultiLineString',
|
||||
'POLYGON': 'ST_Polygon',
|
||||
'MULTIPOLYGON': 'ST_MultiPolygon',
|
||||
'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
|
||||
}
|
||||
56
nominatim/api/result_formatting.py
Normal file
56
nominatim/api/result_formatting.py
Normal file
@@ -0,0 +1,56 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper classes and functions for formatting results into API responses.
|
||||
"""
|
||||
from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping
|
||||
from collections import defaultdict
|
||||
|
||||
T = TypeVar('T') # pylint: disable=invalid-name
|
||||
FormatFunc = Callable[[T, Mapping[str, Any]], str]
|
||||
|
||||
|
||||
class FormatDispatcher:
|
||||
""" Helper class to conveniently create formatting functions in
|
||||
a module using decorators.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.format_functions: Dict[Type[Any], Dict[str, FormatFunc[Any]]] = defaultdict(dict)
|
||||
|
||||
|
||||
def format_func(self, result_class: Type[T],
|
||||
fmt: str) -> Callable[[FormatFunc[T]], FormatFunc[T]]:
|
||||
""" Decorator for a function that formats a given type of result into the
|
||||
selected format.
|
||||
"""
|
||||
def decorator(func: FormatFunc[T]) -> FormatFunc[T]:
|
||||
self.format_functions[result_class][fmt] = func
|
||||
return func
|
||||
|
||||
return decorator
|
||||
|
||||
|
||||
def list_formats(self, result_type: Type[Any]) -> List[str]:
|
||||
""" Return a list of formats supported by this formatter.
|
||||
"""
|
||||
return list(self.format_functions[result_type].keys())
|
||||
|
||||
|
||||
def supports_format(self, result_type: Type[Any], fmt: str) -> bool:
|
||||
""" Check if the given format is supported by this formatter.
|
||||
"""
|
||||
return fmt in self.format_functions[result_type]
|
||||
|
||||
|
||||
def format_result(self, result: Any, fmt: str, options: Mapping[str, Any]) -> str:
|
||||
""" Convert the given result into a string using the given format.
|
||||
|
||||
The format is expected to be in the list returned by
|
||||
`list_formats()`.
|
||||
"""
|
||||
return self.format_functions[type(result)][fmt](result, options)
|
||||
752
nominatim/api/results.py
Normal file
752
nominatim/api/results.py
Normal file
@@ -0,0 +1,752 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Dataclasses for search results and helper functions to fill them.
|
||||
|
||||
Data classes are part of the public API while the functions are for
|
||||
internal use only. That's why they are implemented as free-standing functions
|
||||
instead of member functions.
|
||||
"""
|
||||
from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable
|
||||
import enum
|
||||
import dataclasses
|
||||
import datetime as dt
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaSelect, SaRow
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
from nominatim.api.types import Point, Bbox, LookupDetails
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.localization import Locales
|
||||
|
||||
# This file defines complex result data classes.
|
||||
# pylint: disable=too-many-instance-attributes
|
||||
|
||||
def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
|
||||
""" Mix-in names from linked places, so that they show up
|
||||
as standard names where necessary.
|
||||
"""
|
||||
if not names:
|
||||
return None
|
||||
|
||||
out = {}
|
||||
for k, v in names.items():
|
||||
if k.startswith('_place_'):
|
||||
outkey = k[7:]
|
||||
out[k if outkey in names else outkey] = v
|
||||
else:
|
||||
out[k] = v
|
||||
|
||||
return out
|
||||
|
||||
|
||||
class SourceTable(enum.Enum):
|
||||
""" The `SourceTable` type lists the possible sources a result can have.
|
||||
"""
|
||||
PLACEX = 1
|
||||
""" The placex table is the main source for result usually containing
|
||||
OSM data.
|
||||
"""
|
||||
OSMLINE = 2
|
||||
""" The osmline table contains address interpolations from OSM data.
|
||||
Interpolation addresses are always approximate. The OSM id in the
|
||||
result refers to the OSM way with the interpolation line object.
|
||||
"""
|
||||
TIGER = 3
|
||||
""" TIGER address data contains US addresses imported on the side,
|
||||
see [Installing TIGER data](../customize/Tiger.md).
|
||||
TIGER address are also interpolations. The addresses always refer
|
||||
to a street from OSM data. The OSM id in the result refers to
|
||||
that street.
|
||||
"""
|
||||
POSTCODE = 4
|
||||
""" The postcode table contains artificial centroids for postcodes,
|
||||
computed from the postcodes available with address points. Results
|
||||
are always approximate.
|
||||
"""
|
||||
COUNTRY = 5
|
||||
""" The country table provides a fallback, when country data is missing
|
||||
in the OSM data.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class AddressLine:
|
||||
""" The `AddressLine` may contain the following fields about a related place
|
||||
and its function as an address object. Most fields are optional.
|
||||
Their presence depends on the kind and function of the address part.
|
||||
"""
|
||||
category: Tuple[str, str]
|
||||
""" Main category of the place, described by a key-value pair.
|
||||
"""
|
||||
names: Dict[str, str]
|
||||
""" All available names for the place including references, alternative
|
||||
names and translations.
|
||||
"""
|
||||
fromarea: bool
|
||||
""" If true, then the exact area of the place is known. Without area
|
||||
information, Nominatim has to make an educated guess if an address
|
||||
belongs to one place or another.
|
||||
"""
|
||||
isaddress: bool
|
||||
""" If true, this place should be considered for the final address display.
|
||||
Nominatim will sometimes include more than one candidate for
|
||||
the address in the list when it cannot reliably determine where the
|
||||
place belongs. It will consider names of all candidates when searching
|
||||
but when displaying the result, only the most likely candidate should
|
||||
be shown.
|
||||
"""
|
||||
rank_address: int
|
||||
""" [Address rank](../customize/Ranking.md#address-rank) of the place.
|
||||
"""
|
||||
distance: float
|
||||
""" Distance in degrees between the result place and this address part.
|
||||
"""
|
||||
place_id: Optional[int] = None
|
||||
""" Internal ID of the place.
|
||||
"""
|
||||
osm_object: Optional[Tuple[str, int]] = None
|
||||
""" OSM type and ID of the place, if such an object exists.
|
||||
"""
|
||||
extratags: Optional[Dict[str, str]] = None
|
||||
""" Any extra information available about the place. This is a dictionary
|
||||
that usually contains OSM tag key-value pairs.
|
||||
"""
|
||||
|
||||
admin_level: Optional[int] = None
|
||||
""" The administrative level of a boundary as tagged in the input data.
|
||||
This field is only meaningful for places of the category
|
||||
(boundary, administrative).
|
||||
"""
|
||||
|
||||
local_name: Optional[str] = None
|
||||
""" Place holder for localization of this address part. See
|
||||
[Localization](#localization) below.
|
||||
"""
|
||||
|
||||
|
||||
class AddressLines(List[AddressLine]):
|
||||
""" Sequence of address lines order in descending order by their rank.
|
||||
"""
|
||||
|
||||
def localize(self, locales: Locales) -> List[str]:
|
||||
""" Set the local name of address parts according to the chosen
|
||||
locale. Return the list of local names without duplicates.
|
||||
|
||||
Only address parts that are marked as isaddress are localized
|
||||
and returned.
|
||||
"""
|
||||
label_parts: List[str] = []
|
||||
|
||||
for line in self:
|
||||
if line.isaddress and line.names:
|
||||
line.local_name = locales.display_name(line.names)
|
||||
if not label_parts or label_parts[-1] != line.local_name:
|
||||
label_parts.append(line.local_name)
|
||||
|
||||
return label_parts
|
||||
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WordInfo:
|
||||
""" Each entry in the list of search terms contains the
|
||||
following detailed information.
|
||||
"""
|
||||
word_id: int
|
||||
""" Internal identifier for the word.
|
||||
"""
|
||||
word_token: str
|
||||
""" Normalised and transliterated form of the word.
|
||||
This form is used for searching.
|
||||
"""
|
||||
word: Optional[str] = None
|
||||
""" Untransliterated form, if available.
|
||||
"""
|
||||
|
||||
|
||||
WordInfos = Sequence[WordInfo]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class BaseResult:
|
||||
""" Data class collecting information common to all
|
||||
types of search results.
|
||||
"""
|
||||
source_table: SourceTable
|
||||
category: Tuple[str, str]
|
||||
centroid: Point
|
||||
|
||||
place_id : Optional[int] = None
|
||||
osm_object: Optional[Tuple[str, int]] = None
|
||||
parent_place_id: Optional[int] = None
|
||||
linked_place_id: Optional[int] = None
|
||||
admin_level: int = 15
|
||||
|
||||
locale_name: Optional[str] = None
|
||||
display_name: Optional[str] = None
|
||||
|
||||
names: Optional[Dict[str, str]] = None
|
||||
address: Optional[Dict[str, str]] = None
|
||||
extratags: Optional[Dict[str, str]] = None
|
||||
|
||||
housenumber: Optional[str] = None
|
||||
postcode: Optional[str] = None
|
||||
wikipedia: Optional[str] = None
|
||||
|
||||
rank_address: int = 30
|
||||
rank_search: int = 30
|
||||
importance: Optional[float] = None
|
||||
|
||||
country_code: Optional[str] = None
|
||||
|
||||
address_rows: Optional[AddressLines] = None
|
||||
linked_rows: Optional[AddressLines] = None
|
||||
parented_rows: Optional[AddressLines] = None
|
||||
name_keywords: Optional[WordInfos] = None
|
||||
address_keywords: Optional[WordInfos] = None
|
||||
|
||||
geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
|
||||
|
||||
@property
|
||||
def lat(self) -> float:
|
||||
""" Get the latitude (or y) of the center point of the place.
|
||||
"""
|
||||
return self.centroid[1]
|
||||
|
||||
|
||||
@property
|
||||
def lon(self) -> float:
|
||||
""" Get the longitude (or x) of the center point of the place.
|
||||
"""
|
||||
return self.centroid[0]
|
||||
|
||||
|
||||
def calculated_importance(self) -> float:
|
||||
""" Get a valid importance value. This is either the stored importance
|
||||
of the value or an artificial value computed from the place's
|
||||
search rank.
|
||||
"""
|
||||
return self.importance or (0.40001 - (self.rank_search/75.0))
|
||||
|
||||
|
||||
def localize(self, locales: Locales) -> None:
|
||||
""" Fill the locale_name and the display_name field for the
|
||||
place and, if available, its address information.
|
||||
"""
|
||||
self.locale_name = locales.display_name(self.names)
|
||||
if self.address_rows:
|
||||
self.display_name = ', '.join(self.address_rows.localize(locales))
|
||||
else:
|
||||
self.display_name = self.locale_name
|
||||
|
||||
|
||||
|
||||
BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
|
||||
|
||||
@dataclasses.dataclass
|
||||
class DetailedResult(BaseResult):
|
||||
""" A search result with more internal information from the database
|
||||
added.
|
||||
"""
|
||||
indexed_date: Optional[dt.datetime] = None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReverseResult(BaseResult):
|
||||
""" A search result for reverse geocoding.
|
||||
"""
|
||||
distance: Optional[float] = None
|
||||
bbox: Optional[Bbox] = None
|
||||
|
||||
|
||||
class ReverseResults(List[ReverseResult]):
|
||||
""" Sequence of reverse lookup results ordered by distance.
|
||||
May be empty when no result was found.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SearchResult(BaseResult):
|
||||
""" A search result for forward geocoding.
|
||||
"""
|
||||
bbox: Optional[Bbox] = None
|
||||
accuracy: float = 0.0
|
||||
|
||||
|
||||
@property
|
||||
def ranking(self) -> float:
|
||||
""" Return the ranking, a combined measure of accuracy and importance.
|
||||
"""
|
||||
return (self.accuracy if self.accuracy is not None else 1) \
|
||||
- self.calculated_importance()
|
||||
|
||||
|
||||
class SearchResults(List[SearchResult]):
|
||||
""" Sequence of forward lookup results ordered by relevance.
|
||||
May be empty when no result was found.
|
||||
"""
|
||||
|
||||
|
||||
def _filter_geometries(row: SaRow) -> Dict[str, str]:
|
||||
return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
|
||||
if k.startswith('geometry_')}
|
||||
|
||||
|
||||
def create_from_placex_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the placex table. 'class_type' defines the type of result
|
||||
to return. Returns None if the row is None.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
return class_type(source_table=SourceTable.PLACEX,
|
||||
place_id=row.place_id,
|
||||
osm_object=(row.osm_type, row.osm_id),
|
||||
category=(row.class_, row.type),
|
||||
parent_place_id = row.parent_place_id,
|
||||
linked_place_id = getattr(row, 'linked_place_id', None),
|
||||
admin_level = getattr(row, 'admin_level', 15),
|
||||
names=_mingle_name_tags(row.name),
|
||||
address=row.address,
|
||||
extratags=row.extratags,
|
||||
housenumber=row.housenumber,
|
||||
postcode=row.postcode,
|
||||
wikipedia=row.wikipedia,
|
||||
rank_address=row.rank_address,
|
||||
rank_search=row.rank_search,
|
||||
importance=row.importance,
|
||||
country_code=row.country_code,
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
|
||||
def create_from_osmline_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the address interpolation table osmline. 'class_type' defines
|
||||
the type of result to return. Returns None if the row is None.
|
||||
|
||||
If the row contains a housenumber, then the housenumber is filled out.
|
||||
Otherwise the result contains the interpolation information in extratags.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
hnr = getattr(row, 'housenumber', None)
|
||||
|
||||
res = class_type(source_table=SourceTable.OSMLINE,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
osm_object=('W', row.osm_id),
|
||||
category=('place', 'houses' if hnr is None else 'house'),
|
||||
address=row.address,
|
||||
postcode=row.postcode,
|
||||
country_code=row.country_code,
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
if hnr is None:
|
||||
res.extratags = {'startnumber': str(row.startnumber),
|
||||
'endnumber': str(row.endnumber),
|
||||
'step': str(row.step)}
|
||||
else:
|
||||
res.housenumber = str(hnr)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def create_from_tiger_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT],
|
||||
osm_type: Optional[str] = None,
|
||||
osm_id: Optional[int] = None) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the Tiger data interpolation table. 'class_type' defines
|
||||
the type of result to return. Returns None if the row is None.
|
||||
|
||||
If the row contains a housenumber, then the housenumber is filled out.
|
||||
Otherwise the result contains the interpolation information in extratags.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
hnr = getattr(row, 'housenumber', None)
|
||||
|
||||
res = class_type(source_table=SourceTable.TIGER,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
|
||||
category=('place', 'houses' if hnr is None else 'house'),
|
||||
postcode=row.postcode,
|
||||
country_code='us',
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
if hnr is None:
|
||||
res.extratags = {'startnumber': str(row.startnumber),
|
||||
'endnumber': str(row.endnumber),
|
||||
'step': str(row.step)}
|
||||
else:
|
||||
res.housenumber = str(hnr)
|
||||
|
||||
return res
|
||||
|
||||
|
||||
def create_from_postcode_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the postcode table. 'class_type' defines
|
||||
the type of result to return. Returns None if the row is None.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
return class_type(source_table=SourceTable.POSTCODE,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
category=('place', 'postcode'),
|
||||
names={'ref': row.postcode},
|
||||
rank_search=row.rank_search,
|
||||
rank_address=row.rank_address,
|
||||
country_code=row.country_code,
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
|
||||
def create_from_country_row(row: Optional[SaRow],
|
||||
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
|
||||
""" Construct a new result and add the data from the result row
|
||||
from the fallback country tables. 'class_type' defines
|
||||
the type of result to return. Returns None if the row is None.
|
||||
"""
|
||||
if row is None:
|
||||
return None
|
||||
|
||||
return class_type(source_table=SourceTable.COUNTRY,
|
||||
category=('place', 'country'),
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
names=row.name,
|
||||
rank_address=4, rank_search=4,
|
||||
country_code=row.country_code,
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
|
||||
async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
|
||||
details: LookupDetails) -> None:
|
||||
""" Retrieve more details from the database according to the
|
||||
parameters specified in 'details'.
|
||||
"""
|
||||
if results:
|
||||
log().section('Query details for result')
|
||||
if details.address_details:
|
||||
log().comment('Query address details')
|
||||
await complete_address_details(conn, results)
|
||||
if details.linked_places:
|
||||
log().comment('Query linked places')
|
||||
for result in results:
|
||||
await complete_linked_places(conn, result)
|
||||
if details.parented_places:
|
||||
log().comment('Query parent places')
|
||||
for result in results:
|
||||
await complete_parented_places(conn, result)
|
||||
if details.keywords:
|
||||
log().comment('Query keywords')
|
||||
for result in results:
|
||||
await complete_keywords(conn, result)
|
||||
for result in results:
|
||||
result.localize(details.locales)
|
||||
|
||||
|
||||
def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
|
||||
""" Create a new AddressLine from the results of a database query.
|
||||
"""
|
||||
extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
|
||||
if 'linked_place' in extratags:
|
||||
extratags['place'] = extratags['linked_place']
|
||||
|
||||
names = _mingle_name_tags(row.name) or {}
|
||||
if getattr(row, 'housenumber', None) is not None:
|
||||
names['housenumber'] = row.housenumber
|
||||
|
||||
if isaddress is None:
|
||||
isaddress = getattr(row, 'isaddress', True)
|
||||
|
||||
return AddressLine(place_id=row.place_id,
|
||||
osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
|
||||
category=(getattr(row, 'class'), row.type),
|
||||
names=names,
|
||||
extratags=extratags,
|
||||
admin_level=row.admin_level,
|
||||
fromarea=row.fromarea,
|
||||
isaddress=isaddress,
|
||||
rank_address=row.rank_address,
|
||||
distance=row.distance)
|
||||
|
||||
|
||||
def _get_address_lookup_id(result: BaseResultT) -> int:
|
||||
assert result.place_id
|
||||
if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
|
||||
return result.parent_place_id or result.place_id
|
||||
|
||||
return result.linked_place_id or result.place_id
|
||||
|
||||
|
||||
async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
|
||||
assert result.address_rows is not None
|
||||
if result.category[0] not in ('boundary', 'place')\
|
||||
or result.category[1] not in ('postal_code', 'postcode'):
|
||||
postcode = result.postcode
|
||||
if not postcode and result.address:
|
||||
postcode = result.address.get('postcode')
|
||||
if postcode and ',' not in postcode and ';' not in postcode:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'postcode'),
|
||||
names={'ref': postcode},
|
||||
fromarea=False, isaddress=True, rank_address=5,
|
||||
distance=0.0))
|
||||
if result.country_code:
|
||||
async def _get_country_names() -> Optional[Dict[str, str]]:
|
||||
t = conn.t.country_name
|
||||
sql = sa.select(t.c.name, t.c.derived_name)\
|
||||
.where(t.c.country_code == result.country_code)
|
||||
for cres in await conn.execute(sql):
|
||||
names = cast(Dict[str, str], cres[0])
|
||||
if cres[1]:
|
||||
names.update(cast(Dict[str, str], cres[1]))
|
||||
return names
|
||||
return None
|
||||
|
||||
country_names = await conn.get_cached_value('COUNTRY_NAME',
|
||||
result.country_code,
|
||||
_get_country_names)
|
||||
if country_names:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'country'),
|
||||
names=country_names,
|
||||
fromarea=False, isaddress=True, rank_address=4,
|
||||
distance=0.0))
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'country_code'),
|
||||
names={'ref': result.country_code}, extratags = {},
|
||||
fromarea=True, isaddress=False, rank_address=4,
|
||||
distance=0.0))
|
||||
|
||||
|
||||
def _setup_address_details(result: BaseResultT) -> None:
|
||||
""" Retrieve information about places that make up the address of the result.
|
||||
"""
|
||||
result.address_rows = AddressLines()
|
||||
if result.names:
|
||||
result.address_rows.append(AddressLine(
|
||||
place_id=result.place_id,
|
||||
osm_object=result.osm_object,
|
||||
category=result.category,
|
||||
names=result.names,
|
||||
extratags=result.extratags or {},
|
||||
admin_level=result.admin_level,
|
||||
fromarea=True, isaddress=True,
|
||||
rank_address=result.rank_address, distance=0.0))
|
||||
if result.source_table == SourceTable.PLACEX and result.address:
|
||||
housenumber = result.address.get('housenumber')\
|
||||
or result.address.get('streetnumber')\
|
||||
or result.address.get('conscriptionnumber')
|
||||
elif result.housenumber:
|
||||
housenumber = result.housenumber
|
||||
else:
|
||||
housenumber = None
|
||||
if housenumber:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'house_number'),
|
||||
names={'ref': housenumber},
|
||||
fromarea=True, isaddress=True, rank_address=28, distance=0))
|
||||
if result.address and '_unlisted_place' in result.address:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'locality'),
|
||||
names={'name': result.address['_unlisted_place']},
|
||||
fromarea=False, isaddress=True, rank_address=25, distance=0))
|
||||
|
||||
|
||||
async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
|
||||
""" Retrieve information about places that make up the address of the result.
|
||||
"""
|
||||
for result in results:
|
||||
_setup_address_details(result)
|
||||
|
||||
### Lookup entries from place_address line
|
||||
|
||||
lookup_ids = [{'pid': r.place_id,
|
||||
'lid': _get_address_lookup_id(r),
|
||||
'names': list(r.address.values()) if r.address else [],
|
||||
'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
|
||||
for r in results if r.place_id]
|
||||
|
||||
if not lookup_ids:
|
||||
return
|
||||
|
||||
ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
|
||||
t = conn.t.placex
|
||||
taddr = conn.t.addressline
|
||||
|
||||
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
|
||||
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.extratags,
|
||||
t.c.admin_level, taddr.c.fromarea,
|
||||
sa.case((t.c.rank_address == 11, 5),
|
||||
else_=t.c.rank_address).label('rank_address'),
|
||||
taddr.c.distance, t.c.country_code, t.c.postcode)\
|
||||
.join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
|
||||
taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
|
||||
.join(t, taddr.c.address_place_id == t.c.place_id)\
|
||||
.order_by('src_place_id')\
|
||||
.order_by(sa.column('rank_address').desc())\
|
||||
.order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
|
||||
.order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
|
||||
(taddr.c.isaddress, 0),
|
||||
(sa.and_(taddr.c.fromarea,
|
||||
t.c.geometry.ST_Contains(
|
||||
sa.func.ST_GeomFromEWKT(
|
||||
ltab.c.value['c'].as_string()))), 1),
|
||||
else_=-1).desc())\
|
||||
.order_by(taddr.c.fromarea.desc())\
|
||||
.order_by(taddr.c.distance.desc())\
|
||||
.order_by(t.c.rank_search.desc())
|
||||
|
||||
|
||||
current_result = None
|
||||
current_rank_address = -1
|
||||
for row in await conn.execute(sql):
|
||||
if current_result is None or row.src_place_id != current_result.place_id:
|
||||
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
|
||||
assert current_result is not None
|
||||
current_rank_address = -1
|
||||
|
||||
location_isaddress = row.rank_address != current_rank_address
|
||||
|
||||
if current_result.country_code is None and row.country_code:
|
||||
current_result.country_code = row.country_code
|
||||
|
||||
if row.type in ('postcode', 'postal_code') and location_isaddress:
|
||||
if not row.fromarea or \
|
||||
(current_result.address and 'postcode' in current_result.address):
|
||||
location_isaddress = False
|
||||
else:
|
||||
current_result.postcode = None
|
||||
|
||||
assert current_result.address_rows is not None
|
||||
current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
|
||||
current_rank_address = row.rank_address
|
||||
|
||||
for result in results:
|
||||
await _finalize_entry(conn, result)
|
||||
|
||||
|
||||
### Finally add the record for the parent entry where necessary.
|
||||
|
||||
parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
|
||||
if parent_lookup_ids:
|
||||
ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
|
||||
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.extratags,
|
||||
t.c.admin_level,
|
||||
t.c.rank_address)\
|
||||
.where(t.c.place_id == ltab.c.value['lid'].as_integer())
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
|
||||
assert current_result is not None
|
||||
assert current_result.address_rows is not None
|
||||
|
||||
current_result.address_rows.append(AddressLine(
|
||||
place_id=row.place_id,
|
||||
osm_object=(row.osm_type, row.osm_id),
|
||||
category=(row.class_, row.type),
|
||||
names=row.name, extratags=row.extratags or {},
|
||||
admin_level=row.admin_level,
|
||||
fromarea=True, isaddress=True,
|
||||
rank_address=row.rank_address, distance=0.0))
|
||||
|
||||
### Now sort everything
|
||||
def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
|
||||
return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
|
||||
|
||||
for result in results:
|
||||
assert result.address_rows is not None
|
||||
result.address_rows.sort(key=mk_sort_key(result.place_id))
|
||||
|
||||
|
||||
def _placex_select_address_row(conn: SearchConnection,
|
||||
centroid: Point) -> SaSelect:
|
||||
t = conn.t.placex
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_.label('class'), t.c.type,
|
||||
t.c.admin_level, t.c.housenumber,
|
||||
t.c.geometry.is_area().label('fromarea'),
|
||||
t.c.rank_address,
|
||||
t.c.geometry.distance_spheroid(
|
||||
sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
|
||||
|
||||
|
||||
async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
|
||||
""" Retrieve information about places that link to the result.
|
||||
"""
|
||||
result.linked_rows = AddressLines()
|
||||
if result.source_table != SourceTable.PLACEX:
|
||||
return
|
||||
|
||||
sql = _placex_select_address_row(conn, result.centroid)\
|
||||
.where(conn.t.placex.c.linked_place_id == result.place_id)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result.linked_rows.append(_result_row_to_address_row(row))
|
||||
|
||||
|
||||
async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
|
||||
""" Retrieve information about the search terms used for this place.
|
||||
|
||||
Requires that the query analyzer was initialised to get access to
|
||||
the word table.
|
||||
"""
|
||||
t = conn.t.search_name
|
||||
sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
|
||||
.where(t.c.place_id == result.place_id)
|
||||
|
||||
result.name_keywords = []
|
||||
result.address_keywords = []
|
||||
|
||||
t = conn.t.meta.tables['word']
|
||||
sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
|
||||
|
||||
for name_tokens, address_tokens in await conn.execute(sql):
|
||||
for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
|
||||
result.name_keywords.append(WordInfo(*row))
|
||||
|
||||
for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
|
||||
result.address_keywords.append(WordInfo(*row))
|
||||
|
||||
|
||||
async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
|
||||
""" Retrieve information about places that the result provides the
|
||||
address for.
|
||||
"""
|
||||
result.parented_rows = AddressLines()
|
||||
if result.source_table != SourceTable.PLACEX:
|
||||
return
|
||||
|
||||
sql = _placex_select_address_row(conn, result.centroid)\
|
||||
.where(conn.t.placex.c.parent_place_id == result.place_id)\
|
||||
.where(conn.t.placex.c.rank_search == 30)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result.parented_rows.append(_result_row_to_address_row(row))
|
||||
590
nominatim/api/reverse.py
Normal file
590
nominatim/api/reverse.py
Normal file
@@ -0,0 +1,590 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of reverse geocoding.
|
||||
"""
|
||||
from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
|
||||
import functools
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
|
||||
SaBind, SaLambdaSelect
|
||||
from nominatim.api.connection import SearchConnection
|
||||
import nominatim.api.results as nres
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
|
||||
# In SQLAlchemy expression which compare with NULL need to be expressed with
|
||||
# the equal sign.
|
||||
# pylint: disable=singleton-comparison
|
||||
|
||||
RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
|
||||
|
||||
WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
|
||||
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
|
||||
|
||||
def no_index(expr: SaColumn) -> SaColumn:
|
||||
""" Wrap the given expression, so that the query planner will
|
||||
refrain from using the expression for index lookup.
|
||||
"""
|
||||
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
|
||||
|
||||
|
||||
def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
|
||||
""" Create a select statement with the columns relevant for reverse
|
||||
results.
|
||||
"""
|
||||
if not use_wkt:
|
||||
distance = t.c.distance
|
||||
centroid = t.c.centroid
|
||||
else:
|
||||
distance = t.c.geometry.ST_Distance(WKT_PARAM)
|
||||
centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
|
||||
else_=t.c.centroid).label('centroid')
|
||||
|
||||
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.importance, t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
centroid,
|
||||
t.c.linked_place_id, t.c.admin_level,
|
||||
distance.label('distance'),
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
|
||||
|
||||
def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
|
||||
return sa.cast(table.c.startnumber
|
||||
+ sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
|
||||
/ table.c.step) * table.c.step,
|
||||
sa.Integer).label('housenumber')
|
||||
|
||||
|
||||
def _interpolated_position(table: SaFromClause) -> SaLabel:
|
||||
fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
|
||||
rounded_pos = sa.func.round(table.c.position / fac) * fac
|
||||
return sa.case(
|
||||
(table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
|
||||
else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
|
||||
|
||||
|
||||
def _locate_interpolation(table: SaFromClause) -> SaLabel:
|
||||
""" Given a position, locate the closest point on the line.
|
||||
"""
|
||||
return sa.case((table.c.linegeo.is_line_like(),
|
||||
table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
|
||||
else_=0).label('position')
|
||||
|
||||
|
||||
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
|
||||
return min(rows, key=lambda row: 1000 if row is None else row.distance)
|
||||
|
||||
|
||||
class ReverseGeocoder:
|
||||
""" Class implementing the logic for looking up a place from a
|
||||
coordinate.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: SearchConnection, params: ReverseDetails,
|
||||
restrict_to_country_areas: bool = False) -> None:
|
||||
self.conn = conn
|
||||
self.params = params
|
||||
self.restrict_to_country_areas = restrict_to_country_areas
|
||||
|
||||
self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
|
||||
|
||||
|
||||
@property
|
||||
def max_rank(self) -> int:
|
||||
""" Return the maximum configured rank.
|
||||
"""
|
||||
return self.params.max_rank
|
||||
|
||||
|
||||
def has_geometries(self) -> bool:
|
||||
""" Check if any geometries are requested.
|
||||
"""
|
||||
return bool(self.params.geometry_output)
|
||||
|
||||
|
||||
def layer_enabled(self, *layer: DataLayer) -> bool:
|
||||
""" Return true when any of the given layer types are requested.
|
||||
"""
|
||||
return any(self.params.layers & l for l in layer)
|
||||
|
||||
|
||||
def layer_disabled(self, *layer: DataLayer) -> bool:
|
||||
""" Return true when none of the given layer types is requested.
|
||||
"""
|
||||
return not any(self.params.layers & l for l in layer)
|
||||
|
||||
|
||||
def has_feature_layers(self) -> bool:
|
||||
""" Return true if any layer other than ADDRESS or POI is requested.
|
||||
"""
|
||||
return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
|
||||
|
||||
|
||||
def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
|
||||
out = []
|
||||
|
||||
if self.params.geometry_simplification > 0.0:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
|
||||
|
||||
if self.params.geometry_output & GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if self.params.geometry_output & GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if self.params.geometry_output & GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if self.params.geometry_output & GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
|
||||
def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
|
||||
if self.layer_enabled(DataLayer.MANMADE):
|
||||
exclude = []
|
||||
if self.layer_disabled(DataLayer.RAILWAY):
|
||||
exclude.append('railway')
|
||||
if self.layer_disabled(DataLayer.NATURAL):
|
||||
exclude.extend(('natural', 'water', 'waterway'))
|
||||
return table.c.class_.not_in(tuple(exclude))
|
||||
|
||||
include = []
|
||||
if self.layer_enabled(DataLayer.RAILWAY):
|
||||
include.append('railway')
|
||||
if self.layer_enabled(DataLayer.NATURAL):
|
||||
include.extend(('natural', 'water', 'waterway'))
|
||||
return table.c.class_.in_(tuple(include))
|
||||
|
||||
|
||||
async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
|
||||
""" Look up the closest rank 26+ place in the database, which
|
||||
is closer than the given distance.
|
||||
"""
|
||||
t = self.conn.t.placex
|
||||
|
||||
# PostgreSQL must not get the distance as a parameter because
|
||||
# there is a danger it won't be able to properly estimate index use
|
||||
# when used with prepared statements
|
||||
diststr = sa.text(f"{distance}")
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
|
||||
.where(t.c.geometry.within_distance(WKT_PARAM, diststr))
|
||||
.where(t.c.indexed_status == 0)
|
||||
.where(t.c.linked_place_id == None)
|
||||
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
|
||||
t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
|
||||
.order_by('distance')
|
||||
.limit(1))
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, t.c.geometry)
|
||||
|
||||
restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
|
||||
|
||||
if self.layer_enabled(DataLayer.ADDRESS):
|
||||
max_rank = min(29, self.max_rank)
|
||||
restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
|
||||
if self.max_rank == 30:
|
||||
restrict.append(lambda: sa.func.IsAddressPoint(t))
|
||||
if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
|
||||
restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
|
||||
t.c.class_.not_in(('place', 'building')),
|
||||
sa.not_(t.c.geometry.is_line_like())))
|
||||
if self.has_feature_layers():
|
||||
restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
|
||||
no_index(t.c.rank_address) == 0,
|
||||
self._filter_by_layer(t)))
|
||||
|
||||
if not restrict:
|
||||
return None
|
||||
|
||||
sql = sql.where(sa.or_(*restrict))
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
|
||||
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
|
||||
t = self.conn.t.placex
|
||||
|
||||
def _base_query() -> SaSelect:
|
||||
return _select_from_placex(t)\
|
||||
.where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
|
||||
.where(t.c.parent_place_id == parent_place_id)\
|
||||
.where(sa.func.IsAddressPoint(t))\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(_base_query(), t.c.geometry)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
|
||||
async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
|
||||
distance: float) -> Optional[SaRow]:
|
||||
t = self.conn.t.osmline
|
||||
|
||||
sql = sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))\
|
||||
.where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
|
||||
.where(t.c.startnumber != None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
if parent_place_id is not None:
|
||||
sql = sql.where(t.c.parent_place_id == parent_place_id)
|
||||
|
||||
inner = sql.subquery('ipol')
|
||||
|
||||
sql = sa.select(inner.c.place_id, inner.c.osm_id,
|
||||
inner.c.parent_place_id, inner.c.address,
|
||||
_interpolated_housenumber(inner),
|
||||
_interpolated_position(inner),
|
||||
inner.c.postcode, inner.c.country_code,
|
||||
inner.c.distance)
|
||||
|
||||
if self.has_geometries():
|
||||
sub = sql.subquery('geom')
|
||||
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
|
||||
async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
|
||||
t = self.conn.t.tiger
|
||||
|
||||
def _base_query() -> SaSelect:
|
||||
inner = sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))\
|
||||
.where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
|
||||
.where(t.c.parent_place_id == parent_place_id)\
|
||||
.order_by('distance')\
|
||||
.limit(1)\
|
||||
.subquery('tiger')
|
||||
|
||||
return sa.select(inner.c.place_id,
|
||||
inner.c.parent_place_id,
|
||||
_interpolated_housenumber(inner),
|
||||
_interpolated_position(inner),
|
||||
inner.c.postcode,
|
||||
inner.c.distance)
|
||||
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sub = _base_query().subquery('geom')
|
||||
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
|
||||
async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
|
||||
""" Find a street or POI/address for the given WKT point.
|
||||
"""
|
||||
log().section('Reverse lookup on street/address level')
|
||||
distance = 0.006
|
||||
parent_place_id = None
|
||||
|
||||
row = await self._find_closest_street_or_poi(distance)
|
||||
row_func: RowFunc = nres.create_from_placex_row
|
||||
log().var_dump('Result (street/building)', row)
|
||||
|
||||
# If the closest result was a street, but an address was requested,
|
||||
# check for a housenumber nearby which is part of the street.
|
||||
if row is not None:
|
||||
if self.max_rank > 27 \
|
||||
and self.layer_enabled(DataLayer.ADDRESS) \
|
||||
and row.rank_address <= 27:
|
||||
distance = 0.001
|
||||
parent_place_id = row.place_id
|
||||
log().comment('Find housenumber for street')
|
||||
addr_row = await self._find_housenumber_for_street(parent_place_id)
|
||||
log().var_dump('Result (street housenumber)', addr_row)
|
||||
|
||||
if addr_row is not None:
|
||||
row = addr_row
|
||||
row_func = nres.create_from_placex_row
|
||||
distance = addr_row.distance
|
||||
elif row.country_code == 'us' and parent_place_id is not None:
|
||||
log().comment('Find TIGER housenumber for street')
|
||||
addr_row = await self._find_tiger_number_for_street(parent_place_id)
|
||||
log().var_dump('Result (street Tiger housenumber)', addr_row)
|
||||
|
||||
if addr_row is not None:
|
||||
row_func = cast(RowFunc,
|
||||
functools.partial(nres.create_from_tiger_row,
|
||||
osm_type=row.osm_type,
|
||||
osm_id=row.osm_id))
|
||||
row = addr_row
|
||||
else:
|
||||
distance = row.distance
|
||||
|
||||
# Check for an interpolation that is either closer than our result
|
||||
# or belongs to a close street found.
|
||||
if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
|
||||
log().comment('Find interpolation for street')
|
||||
addr_row = await self._find_interpolation_for_street(parent_place_id,
|
||||
distance)
|
||||
log().var_dump('Result (street interpolation)', addr_row)
|
||||
if addr_row is not None:
|
||||
row = addr_row
|
||||
row_func = nres.create_from_osmline_row
|
||||
|
||||
return row, row_func
|
||||
|
||||
|
||||
async def _lookup_area_address(self) -> Optional[SaRow]:
|
||||
""" Lookup large addressable areas for the given WKT point.
|
||||
"""
|
||||
log().comment('Reverse lookup by larger address area features')
|
||||
t = self.conn.t.placex
|
||||
|
||||
def _base_query() -> SaSelect:
|
||||
# The inner SQL brings results in the right order, so that
|
||||
# later only a minimum of results needs to be checked with ST_Contains.
|
||||
inner = sa.select(t, sa.literal(0.0).label('distance'))\
|
||||
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
|
||||
.where(t.c.geometry.intersects(WKT_PARAM))\
|
||||
.where(sa.func.PlacexGeometryReverseLookuppolygon())\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('area')
|
||||
|
||||
return _select_from_placex(inner, False)\
|
||||
.where(inner.c.geometry.ST_Contains(WKT_PARAM))\
|
||||
.order_by(sa.desc(inner.c.rank_search))\
|
||||
.limit(1)
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
|
||||
|
||||
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (area)', address_row)
|
||||
|
||||
if address_row is not None and address_row.rank_search < self.max_rank:
|
||||
log().comment('Search for better matching place nodes inside the area')
|
||||
|
||||
address_rank = address_row.rank_search
|
||||
address_id = address_row.place_id
|
||||
|
||||
def _place_inside_area_query() -> SaSelect:
|
||||
inner = \
|
||||
sa.select(t,
|
||||
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
|
||||
.where(t.c.rank_search > address_rank)\
|
||||
.where(t.c.rank_search <= MAX_RANK_PARAM)\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('places')
|
||||
|
||||
touter = t.alias('outer')
|
||||
return _select_from_placex(inner, False)\
|
||||
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
|
||||
.where(touter.c.place_id == address_id)\
|
||||
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
|
||||
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
|
||||
.limit(1)
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(_place_inside_area_query(),
|
||||
sa.literal_column('places.geometry'))
|
||||
else:
|
||||
sql = sa.lambda_stmt(_place_inside_area_query)
|
||||
|
||||
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (place node)', place_address_row)
|
||||
|
||||
if place_address_row is not None:
|
||||
return place_address_row
|
||||
|
||||
return address_row
|
||||
|
||||
|
||||
async def _lookup_area_others(self) -> Optional[SaRow]:
|
||||
t = self.conn.t.placex
|
||||
|
||||
inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
|
||||
.where(t.c.rank_address == 0)\
|
||||
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
|
||||
.where(t.c.name != None)\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.where(self._filter_by_layer(t))\
|
||||
.where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.order_by('distance')\
|
||||
.limit(50)\
|
||||
.subquery()
|
||||
|
||||
sql = _select_from_placex(inner, False)\
|
||||
.where(sa.or_(sa.not_(inner.c.geometry.is_area()),
|
||||
inner.c.geometry.ST_Contains(WKT_PARAM)))\
|
||||
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
|
||||
.limit(1)
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, inner.c.geometry)
|
||||
|
||||
row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (non-address feature)', row)
|
||||
|
||||
return row
|
||||
|
||||
|
||||
async def lookup_area(self) -> Optional[SaRow]:
|
||||
""" Lookup large areas for the current search.
|
||||
"""
|
||||
log().section('Reverse lookup by larger area features')
|
||||
|
||||
if self.layer_enabled(DataLayer.ADDRESS):
|
||||
address_row = await self._lookup_area_address()
|
||||
else:
|
||||
address_row = None
|
||||
|
||||
if self.has_feature_layers():
|
||||
other_row = await self._lookup_area_others()
|
||||
else:
|
||||
other_row = None
|
||||
|
||||
return _get_closest(address_row, other_row)
|
||||
|
||||
|
||||
async def lookup_country_codes(self) -> List[str]:
|
||||
""" Lookup the country for the current search.
|
||||
"""
|
||||
log().section('Reverse lookup by country code')
|
||||
t = self.conn.t.country_grid
|
||||
sql = sa.select(t.c.country_code).distinct()\
|
||||
.where(t.c.geometry.ST_Contains(WKT_PARAM))
|
||||
|
||||
ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
|
||||
log().var_dump('Country codes', ccodes)
|
||||
return ccodes
|
||||
|
||||
|
||||
async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
|
||||
""" Lookup the country for the current search.
|
||||
"""
|
||||
if not ccodes:
|
||||
ccodes = await self.lookup_country_codes()
|
||||
|
||||
if not ccodes:
|
||||
return None
|
||||
|
||||
t = self.conn.t.placex
|
||||
if self.max_rank > 4:
|
||||
log().comment('Search for place nodes in country')
|
||||
|
||||
def _base_query() -> SaSelect:
|
||||
inner = \
|
||||
sa.select(t,
|
||||
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
|
||||
.where(t.c.rank_search > 4)\
|
||||
.where(t.c.rank_search <= MAX_RANK_PARAM)\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('area')
|
||||
|
||||
return _select_from_placex(inner, False)\
|
||||
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
|
||||
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
|
||||
.limit(1)
|
||||
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(_base_query(),
|
||||
sa.literal_column('area.geometry'))
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (addressable place node)', address_row)
|
||||
else:
|
||||
address_row = None
|
||||
|
||||
if address_row is None:
|
||||
# Still nothing, then return a country with the appropriate country code.
|
||||
def _country_base_query() -> SaSelect:
|
||||
return _select_from_placex(t)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)\
|
||||
.where(t.c.rank_search == 4)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_country_base_query)
|
||||
|
||||
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
return address_row
|
||||
|
||||
|
||||
async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
|
||||
""" Look up a single coordinate. Returns the place information,
|
||||
if a place was found near the coordinates or None otherwise.
|
||||
"""
|
||||
log().function('reverse_lookup', coord=coord, params=self.params)
|
||||
|
||||
|
||||
self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
|
||||
|
||||
row: Optional[SaRow] = None
|
||||
row_func: RowFunc = nres.create_from_placex_row
|
||||
|
||||
if self.max_rank >= 26:
|
||||
row, tmp_row_func = await self.lookup_street_poi()
|
||||
if row is not None:
|
||||
row_func = tmp_row_func
|
||||
|
||||
if row is None:
|
||||
if self.restrict_to_country_areas:
|
||||
ccodes = await self.lookup_country_codes()
|
||||
if not ccodes:
|
||||
return None
|
||||
else:
|
||||
ccodes = []
|
||||
|
||||
if self.max_rank > 4:
|
||||
row = await self.lookup_area()
|
||||
if row is None and self.layer_enabled(DataLayer.ADDRESS):
|
||||
row = await self.lookup_country(ccodes)
|
||||
|
||||
result = row_func(row, nres.ReverseResult)
|
||||
if result is not None:
|
||||
assert row is not None
|
||||
result.distance = row.distance
|
||||
if hasattr(row, 'bbox'):
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
await nres.add_result_details(self.conn, [result], self.params)
|
||||
|
||||
return result
|
||||
15
nominatim/api/search/__init__.py
Normal file
15
nominatim/api/search/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Module for forward search.
|
||||
"""
|
||||
# pylint: disable=useless-import-alias
|
||||
|
||||
from .geocoder import (ForwardGeocoder as ForwardGeocoder)
|
||||
from .query import (Phrase as Phrase,
|
||||
PhraseType as PhraseType)
|
||||
from .query_analyzer_factory import (make_query_analyzer as make_query_analyzer)
|
||||
411
nominatim/api/search/db_search_builder.py
Normal file
411
nominatim/api/search/db_search_builder.py
Normal file
@@ -0,0 +1,411 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Conversion from token assignment to an abstract DB search.
|
||||
"""
|
||||
from typing import Optional, List, Tuple, Iterator, Dict
|
||||
import heapq
|
||||
|
||||
from nominatim.api.types import SearchDetails, DataLayer
|
||||
from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange, BreakType
|
||||
from nominatim.api.search.token_assignment import TokenAssignment
|
||||
import nominatim.api.search.db_search_fields as dbf
|
||||
import nominatim.api.search.db_searches as dbs
|
||||
import nominatim.api.search.db_search_lookups as lookups
|
||||
|
||||
|
||||
def wrap_near_search(categories: List[Tuple[str, str]],
|
||||
search: dbs.AbstractSearch) -> dbs.NearSearch:
|
||||
""" Create a new search that wraps the given search in a search
|
||||
for near places of the given category.
|
||||
"""
|
||||
return dbs.NearSearch(penalty=search.penalty,
|
||||
categories=dbf.WeightedCategories(categories,
|
||||
[0.0] * len(categories)),
|
||||
search=search)
|
||||
|
||||
|
||||
def build_poi_search(category: List[Tuple[str, str]],
|
||||
countries: Optional[List[str]]) -> dbs.PoiSearch:
|
||||
""" Create a new search for places by the given category, possibly
|
||||
constraint to the given countries.
|
||||
"""
|
||||
if countries:
|
||||
ccs = dbf.WeightedStrings(countries, [0.0] * len(countries))
|
||||
else:
|
||||
ccs = dbf.WeightedStrings([], [])
|
||||
|
||||
class _PoiData(dbf.SearchData):
|
||||
penalty = 0.0
|
||||
qualifiers = dbf.WeightedCategories(category, [0.0] * len(category))
|
||||
countries=ccs
|
||||
|
||||
return dbs.PoiSearch(_PoiData())
|
||||
|
||||
|
||||
class SearchBuilder:
|
||||
""" Build the abstract search queries from token assignments.
|
||||
"""
|
||||
|
||||
def __init__(self, query: QueryStruct, details: SearchDetails) -> None:
|
||||
self.query = query
|
||||
self.details = details
|
||||
|
||||
|
||||
@property
|
||||
def configured_for_country(self) -> bool:
|
||||
""" Return true if the search details are configured to
|
||||
allow countries in the result.
|
||||
"""
|
||||
return self.details.min_rank <= 4 and self.details.max_rank >= 4 \
|
||||
and self.details.layer_enabled(DataLayer.ADDRESS)
|
||||
|
||||
|
||||
@property
|
||||
def configured_for_postcode(self) -> bool:
|
||||
""" Return true if the search details are configured to
|
||||
allow postcodes in the result.
|
||||
"""
|
||||
return self.details.min_rank <= 5 and self.details.max_rank >= 11\
|
||||
and self.details.layer_enabled(DataLayer.ADDRESS)
|
||||
|
||||
|
||||
@property
|
||||
def configured_for_housenumbers(self) -> bool:
|
||||
""" Return true if the search details are configured to
|
||||
allow addresses in the result.
|
||||
"""
|
||||
return self.details.max_rank >= 30 \
|
||||
and self.details.layer_enabled(DataLayer.ADDRESS)
|
||||
|
||||
|
||||
def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Yield all possible abstract searches for the given token assignment.
|
||||
"""
|
||||
sdata = self.get_search_data(assignment)
|
||||
if sdata is None:
|
||||
return
|
||||
|
||||
near_items = self.get_near_items(assignment)
|
||||
if near_items is not None and not near_items:
|
||||
return # impossible compbination of near items and category parameter
|
||||
|
||||
if assignment.name is None:
|
||||
if near_items and not sdata.postcodes:
|
||||
sdata.qualifiers = near_items
|
||||
near_items = None
|
||||
builder = self.build_poi_search(sdata)
|
||||
elif assignment.housenumber:
|
||||
hnr_tokens = self.query.get_tokens(assignment.housenumber,
|
||||
TokenType.HOUSENUMBER)
|
||||
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
|
||||
else:
|
||||
builder = self.build_special_search(sdata, assignment.address,
|
||||
bool(near_items))
|
||||
else:
|
||||
builder = self.build_name_search(sdata, assignment.name, assignment.address,
|
||||
bool(near_items))
|
||||
|
||||
if near_items:
|
||||
penalty = min(near_items.penalties)
|
||||
near_items.penalties = [p - penalty for p in near_items.penalties]
|
||||
for search in builder:
|
||||
search_penalty = search.penalty
|
||||
search.penalty = 0.0
|
||||
yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
|
||||
near_items, search)
|
||||
else:
|
||||
for search in builder:
|
||||
search.penalty += assignment.penalty
|
||||
yield search
|
||||
|
||||
|
||||
def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build abstract search query for a simple category search.
|
||||
This kind of search requires an additional geographic constraint.
|
||||
"""
|
||||
if not sdata.housenumbers \
|
||||
and ((self.details.viewbox and self.details.bounded_viewbox) or self.details.near):
|
||||
yield dbs.PoiSearch(sdata)
|
||||
|
||||
|
||||
def build_special_search(self, sdata: dbf.SearchData,
|
||||
address: List[TokenRange],
|
||||
is_category: bool) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build abstract search queries for searches that do not involve
|
||||
a named place.
|
||||
"""
|
||||
if sdata.qualifiers:
|
||||
# No special searches over qualifiers supported.
|
||||
return
|
||||
|
||||
if sdata.countries and not address and not sdata.postcodes \
|
||||
and self.configured_for_country:
|
||||
yield dbs.CountrySearch(sdata)
|
||||
|
||||
if sdata.postcodes and (is_category or self.configured_for_postcode):
|
||||
penalty = 0.0 if sdata.countries else 0.1
|
||||
if address:
|
||||
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for r in address
|
||||
for t in self.query.get_partials_list(r)],
|
||||
lookups.Restrict)]
|
||||
penalty += 0.2
|
||||
yield dbs.PostcodeSearch(penalty, sdata)
|
||||
|
||||
|
||||
def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
|
||||
address: List[TokenRange]) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build a simple address search for special entries where the
|
||||
housenumber is the main name token.
|
||||
"""
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
|
||||
expected_count = sum(t.count for t in hnrs)
|
||||
|
||||
partials = {t.token: t.count for trange in address
|
||||
for t in self.query.get_partials_list(trange)}
|
||||
|
||||
if expected_count < 8000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
list(partials), lookups.Restrict))
|
||||
elif len(partials) != 1 or list(partials.values())[0] < 10000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
list(partials), lookups.LookupAll))
|
||||
else:
|
||||
addr_fulls = [t.token for t
|
||||
in self.query.get_tokens(address[0], TokenType.WORD)]
|
||||
if len(addr_fulls) > 5:
|
||||
return
|
||||
sdata.lookups.append(
|
||||
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny))
|
||||
|
||||
sdata.housenumbers = dbf.WeightedStrings([], [])
|
||||
yield dbs.PlaceSearch(0.05, sdata, expected_count)
|
||||
|
||||
|
||||
def build_name_search(self, sdata: dbf.SearchData,
|
||||
name: TokenRange, address: List[TokenRange],
|
||||
is_category: bool) -> Iterator[dbs.AbstractSearch]:
|
||||
""" Build abstract search queries for simple name or address searches.
|
||||
"""
|
||||
if is_category or not sdata.housenumbers or self.configured_for_housenumbers:
|
||||
ranking = self.get_name_ranking(name)
|
||||
name_penalty = ranking.normalize_penalty()
|
||||
if ranking.rankings:
|
||||
sdata.rankings.append(ranking)
|
||||
for penalty, count, lookup in self.yield_lookups(name, address):
|
||||
sdata.lookups = lookup
|
||||
yield dbs.PlaceSearch(penalty + name_penalty, sdata, count)
|
||||
|
||||
|
||||
def yield_lookups(self, name: TokenRange, address: List[TokenRange])\
|
||||
-> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]:
|
||||
""" Yield all variants how the given name and address should best
|
||||
be searched for. This takes into account how frequent the terms
|
||||
are and tries to find a lookup that optimizes index use.
|
||||
"""
|
||||
penalty = 0.0 # extra penalty
|
||||
name_partials = {t.token: t for t in self.query.get_partials_list(name)}
|
||||
|
||||
addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
|
||||
addr_tokens = list({t.token for t in addr_partials})
|
||||
|
||||
partials_indexed = all(t.is_indexed for t in name_partials.values()) \
|
||||
and all(t.is_indexed for t in addr_partials)
|
||||
exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
|
||||
|
||||
if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
|
||||
yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
|
||||
return
|
||||
|
||||
# Partial term to frequent. Try looking up by rare full names first.
|
||||
name_fulls = self.query.get_tokens(name, TokenType.WORD)
|
||||
if name_fulls:
|
||||
fulls_count = sum(t.count for t in name_fulls)
|
||||
# At this point drop unindexed partials from the address.
|
||||
# This might yield wrong results, nothing we can do about that.
|
||||
if not partials_indexed:
|
||||
addr_tokens = [t.token for t in addr_partials if t.is_indexed]
|
||||
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
|
||||
# Any of the full names applies with all of the partials from the address
|
||||
yield penalty, fulls_count / (2**len(addr_tokens)),\
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls],
|
||||
addr_tokens,
|
||||
fulls_count > 30000 / max(1, len(addr_tokens)))
|
||||
|
||||
# To catch remaining results, lookup by name and address
|
||||
# We only do this if there is a reasonable number of results expected.
|
||||
exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
|
||||
if exp_count < 10000 and all(t.is_indexed for t in name_partials.values()):
|
||||
lookup = [dbf.FieldLookup('name_vector', list(name_partials.keys()), lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll))
|
||||
penalty += 0.35 * max(1 if name_fulls else 0.1,
|
||||
5 - len(name_partials) - len(addr_tokens))
|
||||
yield penalty, exp_count, lookup
|
||||
|
||||
|
||||
def get_name_ranking(self, trange: TokenRange,
|
||||
db_field: str = 'name_vector') -> dbf.FieldRanking:
|
||||
""" Create a ranking expression for a name term in the given range.
|
||||
"""
|
||||
name_fulls = self.query.get_tokens(trange, TokenType.WORD)
|
||||
ranks = [dbf.RankedTokens(t.penalty, [t.token]) for t in name_fulls]
|
||||
ranks.sort(key=lambda r: r.penalty)
|
||||
# Fallback, sum of penalty for partials
|
||||
name_partials = self.query.get_partials_list(trange)
|
||||
default = sum(t.penalty for t in name_partials) + 0.2
|
||||
return dbf.FieldRanking(db_field, default, ranks)
|
||||
|
||||
|
||||
def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
|
||||
""" Create a list of ranking expressions for an address term
|
||||
for the given ranges.
|
||||
"""
|
||||
todo: List[Tuple[int, int, dbf.RankedTokens]] = []
|
||||
heapq.heappush(todo, (0, trange.start, dbf.RankedTokens(0.0, [])))
|
||||
ranks: List[dbf.RankedTokens] = []
|
||||
|
||||
while todo: # pylint: disable=too-many-nested-blocks
|
||||
neglen, pos, rank = heapq.heappop(todo)
|
||||
for tlist in self.query.nodes[pos].starting:
|
||||
if tlist.ttype in (TokenType.PARTIAL, TokenType.WORD):
|
||||
if tlist.end < trange.end:
|
||||
chgpenalty = PENALTY_WORDCHANGE[self.query.nodes[tlist.end].btype]
|
||||
if tlist.ttype == TokenType.PARTIAL:
|
||||
penalty = rank.penalty + chgpenalty \
|
||||
+ max(t.penalty for t in tlist.tokens)
|
||||
heapq.heappush(todo, (neglen - 1, tlist.end,
|
||||
dbf.RankedTokens(penalty, rank.tokens)))
|
||||
else:
|
||||
for t in tlist.tokens:
|
||||
heapq.heappush(todo, (neglen - 1, tlist.end,
|
||||
rank.with_token(t, chgpenalty)))
|
||||
elif tlist.end == trange.end:
|
||||
if tlist.ttype == TokenType.PARTIAL:
|
||||
ranks.append(dbf.RankedTokens(rank.penalty
|
||||
+ max(t.penalty for t in tlist.tokens),
|
||||
rank.tokens))
|
||||
else:
|
||||
ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
|
||||
if len(ranks) >= 10:
|
||||
# Too many variants, bail out and only add
|
||||
# Worst-case Fallback: sum of penalty of partials
|
||||
name_partials = self.query.get_partials_list(trange)
|
||||
default = sum(t.penalty for t in name_partials) + 0.2
|
||||
ranks.append(dbf.RankedTokens(rank.penalty + default, []))
|
||||
# Bail out of outer loop
|
||||
todo.clear()
|
||||
break
|
||||
|
||||
ranks.sort(key=lambda r: len(r.tokens))
|
||||
default = ranks[0].penalty + 0.3
|
||||
del ranks[0]
|
||||
ranks.sort(key=lambda r: r.penalty)
|
||||
|
||||
return dbf.FieldRanking('nameaddress_vector', default, ranks)
|
||||
|
||||
|
||||
def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchData]:
|
||||
""" Collect the tokens for the non-name search fields in the
|
||||
assignment.
|
||||
"""
|
||||
sdata = dbf.SearchData()
|
||||
sdata.penalty = assignment.penalty
|
||||
if assignment.country:
|
||||
tokens = self.get_country_tokens(assignment.country)
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_strings('countries', tokens)
|
||||
elif self.details.countries:
|
||||
sdata.countries = dbf.WeightedStrings(self.details.countries,
|
||||
[0.0] * len(self.details.countries))
|
||||
if assignment.housenumber:
|
||||
sdata.set_strings('housenumbers',
|
||||
self.query.get_tokens(assignment.housenumber,
|
||||
TokenType.HOUSENUMBER))
|
||||
if assignment.postcode:
|
||||
sdata.set_strings('postcodes',
|
||||
self.query.get_tokens(assignment.postcode,
|
||||
TokenType.POSTCODE))
|
||||
if assignment.qualifier:
|
||||
tokens = self.get_qualifier_tokens(assignment.qualifier)
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_qualifiers(tokens)
|
||||
elif self.details.categories:
|
||||
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
|
||||
[0.0] * len(self.details.categories))
|
||||
|
||||
if assignment.address:
|
||||
if not assignment.name and assignment.housenumber:
|
||||
# housenumber search: the first item needs to be handled like
|
||||
# a name in ranking or penalties are not comparable with
|
||||
# normal searches.
|
||||
sdata.set_ranking([self.get_name_ranking(assignment.address[0],
|
||||
db_field='nameaddress_vector')]
|
||||
+ [self.get_addr_ranking(r) for r in assignment.address[1:]])
|
||||
else:
|
||||
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
|
||||
else:
|
||||
sdata.rankings = []
|
||||
|
||||
return sdata
|
||||
|
||||
|
||||
def get_country_tokens(self, trange: TokenRange) -> List[Token]:
|
||||
""" Return the list of country tokens for the given range,
|
||||
optionally filtered by the country list from the details
|
||||
parameters.
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, TokenType.COUNTRY)
|
||||
if self.details.countries:
|
||||
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def get_qualifier_tokens(self, trange: TokenRange) -> List[Token]:
|
||||
""" Return the list of qualifier tokens for the given range,
|
||||
optionally filtered by the qualifier list from the details
|
||||
parameters.
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, TokenType.QUALIFIER)
|
||||
if self.details.categories:
|
||||
tokens = [t for t in tokens if t.get_category() in self.details.categories]
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
|
||||
""" Collect tokens for near items search or use the categories
|
||||
requested per parameter.
|
||||
Returns None if no category search is requested.
|
||||
"""
|
||||
if assignment.near_item:
|
||||
tokens: Dict[Tuple[str, str], float] = {}
|
||||
for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
|
||||
cat = t.get_category()
|
||||
# The category of a near search will be that of near_item.
|
||||
# Thus, if search is restricted to a category parameter,
|
||||
# the two sets must intersect.
|
||||
if (not self.details.categories or cat in self.details.categories)\
|
||||
and t.penalty < tokens.get(cat, 1000.0):
|
||||
tokens[cat] = t.penalty
|
||||
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
PENALTY_WORDCHANGE = {
|
||||
BreakType.START: 0.0,
|
||||
BreakType.END: 0.0,
|
||||
BreakType.PHRASE: 0.0,
|
||||
BreakType.WORD: 0.1,
|
||||
BreakType.PART: 0.2,
|
||||
BreakType.TOKEN: 0.4
|
||||
}
|
||||
253
nominatim/api/search/db_search_fields.py
Normal file
253
nominatim/api/search/db_search_fields.py
Normal file
@@ -0,0 +1,253 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Data structures for more complex fields in abstract search descriptions.
|
||||
"""
|
||||
from typing import List, Tuple, Iterator, Dict, Type
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaFromClause, SaColumn, SaExpression
|
||||
from nominatim.api.search.query import Token
|
||||
import nominatim.api.search.db_search_lookups as lookups
|
||||
from nominatim.utils.json_writer import JsonWriter
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WeightedStrings:
|
||||
""" A list of strings together with a penalty.
|
||||
"""
|
||||
values: List[str]
|
||||
penalties: List[float]
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return bool(self.values)
|
||||
|
||||
|
||||
def __iter__(self) -> Iterator[Tuple[str, float]]:
|
||||
return iter(zip(self.values, self.penalties))
|
||||
|
||||
|
||||
def get_penalty(self, value: str, default: float = 1000.0) -> float:
|
||||
""" Get the penalty for the given value. Returns the given default
|
||||
if the value does not exist.
|
||||
"""
|
||||
try:
|
||||
return self.penalties[self.values.index(value)]
|
||||
except ValueError:
|
||||
pass
|
||||
return default
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WeightedCategories:
|
||||
""" A list of class/type tuples together with a penalty.
|
||||
"""
|
||||
values: List[Tuple[str, str]]
|
||||
penalties: List[float]
|
||||
|
||||
def __bool__(self) -> bool:
|
||||
return bool(self.values)
|
||||
|
||||
|
||||
def __iter__(self) -> Iterator[Tuple[Tuple[str, str], float]]:
|
||||
return iter(zip(self.values, self.penalties))
|
||||
|
||||
|
||||
def get_penalty(self, value: Tuple[str, str], default: float = 1000.0) -> float:
|
||||
""" Get the penalty for the given value. Returns the given default
|
||||
if the value does not exist.
|
||||
"""
|
||||
try:
|
||||
return self.penalties[self.values.index(value)]
|
||||
except ValueError:
|
||||
pass
|
||||
return default
|
||||
|
||||
|
||||
def sql_restrict(self, table: SaFromClause) -> SaExpression:
|
||||
""" Return an SQLAlcheny expression that restricts the
|
||||
class and type columns of the given table to the values
|
||||
in the list.
|
||||
Must not be used with an empty list.
|
||||
"""
|
||||
assert self.values
|
||||
if len(self.values) == 1:
|
||||
return sa.and_(table.c.class_ == self.values[0][0],
|
||||
table.c.type == self.values[0][1])
|
||||
|
||||
return sa.or_(*(sa.and_(table.c.class_ == c, table.c.type == t)
|
||||
for c, t in self.values))
|
||||
|
||||
|
||||
@dataclasses.dataclass(order=True)
|
||||
class RankedTokens:
|
||||
""" List of tokens together with the penalty of using it.
|
||||
"""
|
||||
penalty: float
|
||||
tokens: List[int]
|
||||
|
||||
def with_token(self, t: Token, transition_penalty: float) -> 'RankedTokens':
|
||||
""" Create a new RankedTokens list with the given token appended.
|
||||
The tokens penalty as well as the given transition penalty
|
||||
are added to the overall penalty.
|
||||
"""
|
||||
return RankedTokens(self.penalty + t.penalty + transition_penalty,
|
||||
self.tokens + [t.token])
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FieldRanking:
|
||||
""" A list of rankings to be applied sequentially until one matches.
|
||||
The matched ranking determines the penalty. If none matches a
|
||||
default penalty is applied.
|
||||
"""
|
||||
column: str
|
||||
default: float
|
||||
rankings: List[RankedTokens]
|
||||
|
||||
def normalize_penalty(self) -> float:
|
||||
""" Reduce the default and ranking penalties, such that the minimum
|
||||
penalty is 0. Return the penalty that was subtracted.
|
||||
"""
|
||||
if self.rankings:
|
||||
min_penalty = min(self.default, min(r.penalty for r in self.rankings))
|
||||
else:
|
||||
min_penalty = self.default
|
||||
if min_penalty > 0.0:
|
||||
self.default -= min_penalty
|
||||
for ranking in self.rankings:
|
||||
ranking.penalty -= min_penalty
|
||||
return min_penalty
|
||||
|
||||
|
||||
def sql_penalty(self, table: SaFromClause) -> SaColumn:
|
||||
""" Create an SQL expression for the rankings.
|
||||
"""
|
||||
assert self.rankings
|
||||
|
||||
rout = JsonWriter().start_array()
|
||||
for rank in self.rankings:
|
||||
rout.start_array().value(rank.penalty).next()
|
||||
rout.start_array()
|
||||
for token in rank.tokens:
|
||||
rout.value(token).next()
|
||||
rout.end_array()
|
||||
rout.end_array().next()
|
||||
rout.end_array()
|
||||
|
||||
return sa.func.weigh_search(table.c[self.column], rout(), self.default)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class FieldLookup:
|
||||
""" A list of tokens to be searched for. The column names the database
|
||||
column to search in and the lookup_type the operator that is applied.
|
||||
'lookup_all' requires all tokens to match. 'lookup_any' requires
|
||||
one of the tokens to match. 'restrict' requires to match all tokens
|
||||
but avoids the use of indexes.
|
||||
"""
|
||||
column: str
|
||||
tokens: List[int]
|
||||
lookup_type: Type[lookups.LookupType]
|
||||
|
||||
def sql_condition(self, table: SaFromClause) -> SaColumn:
|
||||
""" Create an SQL expression for the given match condition.
|
||||
"""
|
||||
return self.lookup_type(table, self.column, self.tokens)
|
||||
|
||||
|
||||
class SearchData:
|
||||
""" Search fields derived from query and token assignment
|
||||
to be used with the SQL queries.
|
||||
"""
|
||||
penalty: float
|
||||
|
||||
lookups: List[FieldLookup] = []
|
||||
rankings: List[FieldRanking]
|
||||
|
||||
housenumbers: WeightedStrings = WeightedStrings([], [])
|
||||
postcodes: WeightedStrings = WeightedStrings([], [])
|
||||
countries: WeightedStrings = WeightedStrings([], [])
|
||||
|
||||
qualifiers: WeightedCategories = WeightedCategories([], [])
|
||||
|
||||
|
||||
def set_strings(self, field: str, tokens: List[Token]) -> None:
|
||||
""" Set on of the WeightedStrings properties from the given
|
||||
token list. Adapt the global penalty, so that the
|
||||
minimum penalty is 0.
|
||||
"""
|
||||
if tokens:
|
||||
min_penalty = min(t.penalty for t in tokens)
|
||||
self.penalty += min_penalty
|
||||
wstrs = WeightedStrings([t.lookup_word for t in tokens],
|
||||
[t.penalty - min_penalty for t in tokens])
|
||||
|
||||
setattr(self, field, wstrs)
|
||||
|
||||
|
||||
def set_qualifiers(self, tokens: List[Token]) -> None:
|
||||
""" Set the qulaifier field from the given tokens.
|
||||
"""
|
||||
if tokens:
|
||||
categories: Dict[Tuple[str, str], float] = {}
|
||||
min_penalty = 1000.0
|
||||
for t in tokens:
|
||||
min_penalty = min(min_penalty, t.penalty)
|
||||
cat = t.get_category()
|
||||
if t.penalty < categories.get(cat, 1000.0):
|
||||
categories[cat] = t.penalty
|
||||
self.penalty += min_penalty
|
||||
self.qualifiers = WeightedCategories(list(categories.keys()),
|
||||
list(categories.values()))
|
||||
|
||||
|
||||
def set_ranking(self, rankings: List[FieldRanking]) -> None:
|
||||
""" Set the list of rankings and normalize the ranking.
|
||||
"""
|
||||
self.rankings = []
|
||||
for ranking in rankings:
|
||||
if ranking.rankings:
|
||||
self.penalty += ranking.normalize_penalty()
|
||||
self.rankings.append(ranking)
|
||||
else:
|
||||
self.penalty += ranking.default
|
||||
|
||||
|
||||
def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
|
||||
use_index_for_addr: bool) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and only one of the name tokens must be present.
|
||||
Potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens,
|
||||
lookups.LookupAll if use_index_for_addr else lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
|
||||
""" Create a lookup list where address tokens are looked up via index
|
||||
and the name tokens are only used to restrict the search further.
|
||||
"""
|
||||
return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]
|
||||
114
nominatim/api/search/db_search_lookups.py
Normal file
114
nominatim/api/search/db_search_lookups.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of lookup functions for the search_name table.
|
||||
"""
|
||||
from typing import List, Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
|
||||
from nominatim.typing import SaFromClause
|
||||
from nominatim.db.sqlalchemy_types import IntArray
|
||||
|
||||
# pylint: disable=consider-using-f-string
|
||||
|
||||
LookupType = sa.sql.expression.FunctionElement[Any]
|
||||
|
||||
class LookupAll(LookupType):
|
||||
""" Find all entries in search_name table that contain all of
|
||||
a given list of tokens using an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(table.c.place_id, getattr(table.c, column), column,
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
_, col, _, tokens = list(element.clauses)
|
||||
return "(%s @> %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
|
||||
@compiles(LookupAll, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
place, col, colname, tokens = list(element.clauses)
|
||||
return "(%s IN (SELECT CAST(value as bigint) FROM"\
|
||||
" (SELECT array_intersect_fuzzy(places) as p FROM"\
|
||||
" (SELECT places FROM reverse_search_name"\
|
||||
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
|
||||
" AND column = %s"\
|
||||
" ORDER BY length(places)) as x) as u,"\
|
||||
" json_each('[' || u.p || ']'))"\
|
||||
" AND array_contains(%s, %s))"\
|
||||
% (compiler.process(place, **kw),
|
||||
compiler.process(tokens, **kw),
|
||||
compiler.process(colname, **kw),
|
||||
compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw)
|
||||
)
|
||||
|
||||
|
||||
|
||||
class LookupAny(LookupType):
|
||||
""" Find all entries that contain at least one of the given tokens.
|
||||
Use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(table.c.place_id, getattr(table.c, column), column,
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
_, col, _, tokens = list(element.clauses)
|
||||
return "(%s && %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
@compiles(LookupAny, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
place, _, colname, tokens = list(element.clauses)
|
||||
return "%s IN (SELECT CAST(value as bigint) FROM"\
|
||||
" (SELECT array_union(places) as p FROM reverse_search_name"\
|
||||
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
|
||||
" AND column = %s) as u,"\
|
||||
" json_each('[' || u.p || ']'))" % (compiler.process(place, **kw),
|
||||
compiler.process(tokens, **kw),
|
||||
compiler.process(colname, **kw))
|
||||
|
||||
|
||||
|
||||
class Restrict(LookupType):
|
||||
""" Find all entries that contain all of the given tokens.
|
||||
Do not use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(getattr(table.c, column),
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(Restrict) # type: ignore[no-untyped-call, misc]
|
||||
def _default_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
|
||||
compiler.process(arg2, **kw))
|
||||
|
||||
@compiles(Restrict, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "array_contains(%s)" % compiler.process(element.clauses, **kw)
|
||||
816
nominatim/api/search/db_searches.py
Normal file
816
nominatim/api/search/db_searches.py
Normal file
@@ -0,0 +1,816 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the actual database accesses for forward search.
|
||||
"""
|
||||
from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
|
||||
import abc
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
|
||||
SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
|
||||
import nominatim.api.results as nres
|
||||
from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
|
||||
from nominatim.db.sqlalchemy_types import Geometry, IntArray
|
||||
|
||||
#pylint: disable=singleton-comparison,not-callable
|
||||
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
|
||||
|
||||
def no_index(expr: SaColumn) -> SaColumn:
|
||||
""" Wrap the given expression, so that the query planner will
|
||||
refrain from using the expression for index lookup.
|
||||
"""
|
||||
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
|
||||
|
||||
|
||||
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
|
||||
""" Create a dictionary from search parameters that can be used
|
||||
as bind parameter for SQL execute.
|
||||
"""
|
||||
return {'limit': details.max_results,
|
||||
'min_rank': details.min_rank,
|
||||
'max_rank': details.max_rank,
|
||||
'viewbox': details.viewbox,
|
||||
'viewbox2': details.viewbox_x2,
|
||||
'near': details.near,
|
||||
'near_radius': details.near_radius,
|
||||
'excluded': details.excluded,
|
||||
'countries': details.countries}
|
||||
|
||||
|
||||
LIMIT_PARAM: SaBind = sa.bindparam('limit')
|
||||
MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
|
||||
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
|
||||
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
|
||||
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
|
||||
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
|
||||
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
|
||||
|
||||
|
||||
def filter_by_area(sql: SaSelect, t: SaFromClause,
|
||||
details: SearchDetails, avoid_index: bool = False) -> SaSelect:
|
||||
""" Apply SQL statements for filtering by viewbox and near point,
|
||||
if applicable.
|
||||
"""
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1 and not avoid_index:
|
||||
sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
|
||||
use_index=not avoid_index and
|
||||
details.viewbox.area < 0.2))
|
||||
|
||||
return sql
|
||||
|
||||
|
||||
def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
|
||||
return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
|
||||
|
||||
|
||||
def _select_placex(t: SaFromClause) -> SaSelect:
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
t.c.linked_place_id, t.c.admin_level,
|
||||
t.c.centroid,
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
|
||||
|
||||
def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
|
||||
out = []
|
||||
|
||||
if details.geometry_simplification > 0.0:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
|
||||
|
||||
if details.geometry_output & GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if details.geometry_output & GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if details.geometry_output & GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if details.geometry_output & GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
|
||||
def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
|
||||
numerals: List[int], details: SearchDetails) -> SaScalarSelect:
|
||||
all_ids = sa.func.ArrayAgg(table.c.place_id)
|
||||
sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
|
||||
|
||||
if len(numerals) == 1:
|
||||
sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
|
||||
.where((numerals[0] - table.c.startnumber) % table.c.step == 0)
|
||||
else:
|
||||
sql = sql.where(sa.or_(
|
||||
*(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
|
||||
(n - table.c.startnumber) % table.c.step == 0)
|
||||
for n in numerals)))
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(table))
|
||||
|
||||
return sql.scalar_subquery()
|
||||
|
||||
|
||||
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
|
||||
orexpr: List[SaExpression] = []
|
||||
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 30))
|
||||
elif layers & DataLayer.ADDRESS:
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 29))
|
||||
orexpr.append(sa.func.IsAddressPoint(table))
|
||||
elif layers & DataLayer.POI:
|
||||
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
|
||||
table.c.class_.not_in(('place', 'building'))))
|
||||
|
||||
if layers & DataLayer.MANMADE:
|
||||
exclude = []
|
||||
if not layers & DataLayer.RAILWAY:
|
||||
exclude.append('railway')
|
||||
if not layers & DataLayer.NATURAL:
|
||||
exclude.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
|
||||
no_index(table.c.rank_address) == 0))
|
||||
else:
|
||||
include = []
|
||||
if layers & DataLayer.RAILWAY:
|
||||
include.append('railway')
|
||||
if layers & DataLayer.NATURAL:
|
||||
include.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
|
||||
no_index(table.c.rank_address) == 0))
|
||||
|
||||
if len(orexpr) == 1:
|
||||
return orexpr[0]
|
||||
|
||||
return sa.or_(*orexpr)
|
||||
|
||||
|
||||
def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
|
||||
pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
|
||||
return sa.case(
|
||||
(table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
|
||||
else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
|
||||
|
||||
|
||||
async def _get_placex_housenumbers(conn: SearchConnection,
|
||||
place_ids: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.placex
|
||||
sql = _select_placex(t).add_columns(t.c.importance)\
|
||||
.where(t.c.place_id.in_(place_ids))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
yield result
|
||||
|
||||
|
||||
def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
|
||||
""" Create a subselect that returns the given list of integers
|
||||
as rows in the column 'nr'.
|
||||
"""
|
||||
vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
|
||||
|
||||
|
||||
async def _get_osmline(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.osmline
|
||||
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.osm_id,
|
||||
t.c.parent_place_id, t.c.address,
|
||||
values.c.nr.label('housenumber'),
|
||||
_interpolated_position(t, values.c.nr),
|
||||
t.c.postcode, t.c.country_code)\
|
||||
.where(t.c.place_id.in_(place_ids))\
|
||||
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
|
||||
|
||||
if details.geometry_output:
|
||||
sub = sql.subquery()
|
||||
sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_osmline_row(row, nres.SearchResult)
|
||||
assert result
|
||||
yield result
|
||||
|
||||
|
||||
async def _get_tiger(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int], osm_id: int,
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.tiger
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
sa.literal('W').label('osm_type'),
|
||||
sa.literal(osm_id).label('osm_id'),
|
||||
values.c.nr.label('housenumber'),
|
||||
_interpolated_position(t, values.c.nr),
|
||||
t.c.postcode)\
|
||||
.where(t.c.place_id.in_(place_ids))\
|
||||
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
|
||||
|
||||
if details.geometry_output:
|
||||
sub = sql.subquery()
|
||||
sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
result = nres.create_from_tiger_row(row, nres.SearchResult)
|
||||
assert result
|
||||
yield result
|
||||
|
||||
|
||||
class AbstractSearch(abc.ABC):
|
||||
""" Encapuslation of a single lookup in the database.
|
||||
"""
|
||||
SEARCH_PRIO: int = 2
|
||||
|
||||
def __init__(self, penalty: float) -> None:
|
||||
self.penalty = penalty
|
||||
|
||||
@abc.abstractmethod
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
|
||||
|
||||
class NearSearch(AbstractSearch):
|
||||
""" Category search of a place type near the result of another search.
|
||||
"""
|
||||
def __init__(self, penalty: float, categories: WeightedCategories,
|
||||
search: AbstractSearch) -> None:
|
||||
super().__init__(penalty)
|
||||
self.search = search
|
||||
self.categories = categories
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
results = nres.SearchResults()
|
||||
base = await self.search.lookup(conn, details)
|
||||
|
||||
if not base:
|
||||
return results
|
||||
|
||||
base.sort(key=lambda r: (r.accuracy, r.rank_search))
|
||||
max_accuracy = base[0].accuracy + 0.5
|
||||
if base[0].rank_address == 0:
|
||||
min_rank = 0
|
||||
max_rank = 0
|
||||
elif base[0].rank_address < 26:
|
||||
min_rank = 1
|
||||
max_rank = min(25, base[0].rank_address + 4)
|
||||
else:
|
||||
min_rank = 26
|
||||
max_rank = 30
|
||||
base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
|
||||
and r.accuracy <= max_accuracy
|
||||
and r.bbox and r.bbox.area < 20
|
||||
and r.rank_address >= min_rank
|
||||
and r.rank_address <= max_rank)
|
||||
|
||||
if base:
|
||||
baseids = [b.place_id for b in base[:5] if b.place_id]
|
||||
|
||||
for category, penalty in self.categories:
|
||||
await self.lookup_category(results, conn, baseids, category, penalty, details)
|
||||
if len(results) >= details.max_results:
|
||||
break
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def lookup_category(self, results: nres.SearchResults,
|
||||
conn: SearchConnection, ids: List[int],
|
||||
category: Tuple[str, str], penalty: float,
|
||||
details: SearchDetails) -> None:
|
||||
""" Find places of the given category near the list of
|
||||
place ids and add the results to 'results'.
|
||||
"""
|
||||
table = await conn.get_class_table(*category)
|
||||
|
||||
tgeom = conn.t.placex.alias('pgeom')
|
||||
|
||||
if table is None:
|
||||
# No classtype table available, do a simplified lookup in placex.
|
||||
table = conn.t.placex
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
|
||||
.where(table.c.class_ == category[0])\
|
||||
.where(table.c.type == category[1])
|
||||
else:
|
||||
# Use classtype table. We can afford to use a larger
|
||||
# radius for the lookup.
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom,
|
||||
table.c.centroid.ST_CoveredBy(
|
||||
sa.case((sa.and_(tgeom.c.rank_address > 9,
|
||||
tgeom.c.geometry.is_area()),
|
||||
tgeom.c.geometry),
|
||||
else_ = tgeom.c.centroid.ST_Expand(0.05))))
|
||||
|
||||
inner = sql.where(tgeom.c.place_id.in_(ids))\
|
||||
.group_by(table.c.place_id).subquery()
|
||||
|
||||
t = conn.t.placex
|
||||
sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
|
||||
.join(inner, inner.c.place_id == t.c.place_id)\
|
||||
.order_by(inner.c.dist)
|
||||
|
||||
sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
|
||||
if details.countries:
|
||||
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
if details.layers is not None:
|
||||
sql = sql.where(_filter_by_layer(t, details.layers))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + penalty
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
|
||||
|
||||
class PoiSearch(AbstractSearch):
|
||||
""" Category search in a geographic area.
|
||||
"""
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.countries = sdata.countries
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
bind_params = _details_to_bind_params(details)
|
||||
t = conn.t.placex
|
||||
|
||||
rows: List[SaRow] = []
|
||||
|
||||
if details.near and details.near_radius is not None and details.near_radius < 0.2:
|
||||
# simply search in placex table
|
||||
def _base_query() -> SaSelect:
|
||||
return _select_placex(t) \
|
||||
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))\
|
||||
.where(t.c.linked_place_id == None) \
|
||||
.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
|
||||
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
|
||||
.limit(LIMIT_PARAM)
|
||||
|
||||
classtype = self.qualifiers.values
|
||||
if len(classtype) == 1:
|
||||
cclass, ctype = classtype[0]
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
|
||||
.where(t.c.class_ == cclass)
|
||||
.where(t.c.type == ctype))
|
||||
else:
|
||||
sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
|
||||
for cls, typ in classtype)))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
|
||||
rows.extend(await conn.execute(sql, bind_params))
|
||||
else:
|
||||
# use the class type tables
|
||||
for category in self.qualifiers.values:
|
||||
table = await conn.get_class_table(*category)
|
||||
if table is not None:
|
||||
sql = _select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.join(table, t.c.place_id == table.c.place_id)\
|
||||
.where(t.c.class_ == category[0])\
|
||||
.where(t.c.type == category[1])
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
|
||||
|
||||
if details.near and details.near_radius is not None:
|
||||
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
|
||||
.where(table.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
rows.extend(await conn.execute(sql, bind_params))
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in rows:
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
class CountrySearch(AbstractSearch):
|
||||
""" Search for a country name or country code.
|
||||
"""
|
||||
SEARCH_PRIO = 0
|
||||
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.countries = sdata.countries
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
|
||||
ccodes = self.countries.values
|
||||
sql = _select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
if not results:
|
||||
results = await self.lookup_in_country_table(conn, details)
|
||||
|
||||
if results:
|
||||
details.min_rank = min(5, details.max_rank)
|
||||
details.max_rank = min(25, details.max_rank)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def lookup_in_country_table(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Look up the country in the fallback country tables.
|
||||
"""
|
||||
# Avoid the fallback search when this is a more search. Country results
|
||||
# usually are in the first batch of results and it is not possible
|
||||
# to exclude these fallbacks.
|
||||
if details.excluded:
|
||||
return nres.SearchResults()
|
||||
|
||||
t = conn.t.country_name
|
||||
tgrid = conn.t.country_grid
|
||||
|
||||
sql = sa.select(tgrid.c.country_code,
|
||||
tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
|
||||
.label('centroid'),
|
||||
tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
|
||||
.where(tgrid.c.country_code.in_(self.countries.values))\
|
||||
.group_by(tgrid.c.country_code)
|
||||
|
||||
sql = filter_by_area(sql, tgrid, details, avoid_index=True)
|
||||
|
||||
sub = sql.subquery('grid')
|
||||
|
||||
sql = sa.select(t.c.country_code,
|
||||
t.c.name.merge(t.c.derived_name).label('name'),
|
||||
sub.c.centroid, sub.c.bbox)\
|
||||
.join(sub, t.c.country_code == sub.c.country_code)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, sub.c.centroid, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_country_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
class PostcodeSearch(AbstractSearch):
|
||||
""" Search for a postcode.
|
||||
"""
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.rank_search, t.c.rank_address,
|
||||
t.c.postcode, t.c.country_code,
|
||||
t.c.geometry.label('centroid'))\
|
||||
.where(t.c.postcode.in_(pcs))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
|
||||
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
|
||||
if self.lookups:
|
||||
assert len(self.lookups) == 1
|
||||
tsearch = conn.t.search_name
|
||||
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
|
||||
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
|
||||
.contains(sa.type_coerce(self.lookups[0].tokens,
|
||||
IntArray)))
|
||||
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(conn.t.search_name)
|
||||
penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
|
||||
else_=1.0)
|
||||
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))
|
||||
sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_postcode_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.accuracy = row.accuracy
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
|
||||
class PlaceSearch(AbstractSearch):
|
||||
""" Generic search for an address or named place.
|
||||
"""
|
||||
SEARCH_PRIO = 1
|
||||
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
self.postcodes = sdata.postcodes
|
||||
self.housenumbers = sdata.housenumbers
|
||||
self.qualifiers = sdata.qualifiers
|
||||
self.lookups = sdata.lookups
|
||||
self.rankings = sdata.rankings
|
||||
self.expected_count = expected_count
|
||||
|
||||
|
||||
async def lookup(self, conn: SearchConnection,
|
||||
details: SearchDetails) -> nres.SearchResults:
|
||||
""" Find results for the search in the database.
|
||||
"""
|
||||
t = conn.t.placex
|
||||
tsearch = conn.t.search_name
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda:
|
||||
_select_placex(t).where(t.c.place_id == tsearch.c.place_id))
|
||||
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(tsearch)
|
||||
|
||||
for lookup in self.lookups:
|
||||
sql = sql.where(lookup.sql_condition(tsearch))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
|
||||
|
||||
if self.postcodes:
|
||||
# if a postcode is given, don't search for state or country level objects
|
||||
sql = sql.where(tsearch.c.address_rank > 9)
|
||||
tpc = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
if self.expected_count > 5000:
|
||||
# Many results expected. Restrict by postcode.
|
||||
sql = sql.where(sa.select(tpc.c.postcode)
|
||||
.where(tpc.c.postcode.in_(pcs))
|
||||
.where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
|
||||
.exists())
|
||||
|
||||
# Less results, only have a preference for close postcodes
|
||||
pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
|
||||
.where(tpc.c.postcode.in_(pcs))\
|
||||
.scalar_subquery()
|
||||
penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
|
||||
else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.intersects(VIEWBOX_PARAM,
|
||||
use_index=details.viewbox.area < 0.2))
|
||||
elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.intersects(VIEWBOX2_PARAM,
|
||||
use_index=details.viewbox.area < 0.5))
|
||||
else:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
if details.near_radius is not None:
|
||||
if details.near_radius < 0.1:
|
||||
sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
|
||||
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))
|
||||
sql = sql.order_by(sa.desc(sa.text('importance')))
|
||||
else:
|
||||
if self.expected_count < 10000\
|
||||
or (details.viewbox is not None and details.viewbox.area < 0.5):
|
||||
sql = sql.order_by(
|
||||
penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
|
||||
else_=0.40001-(sa.cast(tsearch.c.search_rank, sa.Float())/75)))
|
||||
sql = sql.add_columns(t.c.importance)
|
||||
|
||||
|
||||
sql = sql.add_columns(penalty.label('accuracy'))
|
||||
|
||||
if self.expected_count < 10000:
|
||||
sql = sql.order_by(sa.text('accuracy'))
|
||||
|
||||
if self.housenumbers:
|
||||
hnr_list = '|'.join(self.housenumbers.values)
|
||||
sql = sql.where(tsearch.c.address_rank.between(16, 30))\
|
||||
.where(sa.or_(tsearch.c.address_rank < 30,
|
||||
sa.func.RegexpWord(hnr_list, t.c.housenumber)))
|
||||
|
||||
# Cross check for housenumbers, need to do that on a rather large
|
||||
# set. Worst case there are 40.000 main streets in OSM.
|
||||
inner = sql.limit(10000).subquery()
|
||||
|
||||
# Housenumbers from placex
|
||||
thnr = conn.t.placex.alias('hnr')
|
||||
pid_list = sa.func.ArrayAgg(thnr.c.place_id)
|
||||
place_sql = sa.select(pid_list)\
|
||||
.where(thnr.c.parent_place_id == inner.c.place_id)\
|
||||
.where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
|
||||
.where(thnr.c.linked_place_id == None)\
|
||||
.where(thnr.c.indexed_status == 0)
|
||||
|
||||
if details.excluded:
|
||||
place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
|
||||
if self.qualifiers:
|
||||
place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
|
||||
|
||||
numerals = [int(n) for n in self.housenumbers.values
|
||||
if n.isdigit() and len(n) < 8]
|
||||
interpol_sql: SaColumn
|
||||
tiger_sql: SaColumn
|
||||
if numerals and \
|
||||
(not self.qualifiers or ('place', 'house') in self.qualifiers.values):
|
||||
# Housenumbers from interpolations
|
||||
interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
|
||||
numerals, details)
|
||||
# Housenumbers from Tiger
|
||||
tiger_sql = sa.case((inner.c.country_code == 'us',
|
||||
_make_interpolation_subquery(conn.t.tiger, inner,
|
||||
numerals, details)
|
||||
), else_=None)
|
||||
else:
|
||||
interpol_sql = sa.null()
|
||||
tiger_sql = sa.null()
|
||||
|
||||
unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
|
||||
interpol_sql.label('interpol_hnr'),
|
||||
tiger_sql.label('tiger_hnr')).subquery('unsort')
|
||||
sql = sa.select(unsort)\
|
||||
.order_by(sa.case((unsort.c.placex_hnr != None, 1),
|
||||
(unsort.c.interpol_hnr != None, 2),
|
||||
(unsort.c.tiger_hnr != None, 3),
|
||||
else_=4),
|
||||
unsort.c.accuracy)
|
||||
else:
|
||||
sql = sql.where(t.c.linked_place_id == None)\
|
||||
.where(t.c.indexed_status == 0)
|
||||
if self.qualifiers:
|
||||
sql = sql.where(self.qualifiers.sql_restrict(t))
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(tsearch))
|
||||
if details.min_rank > 0:
|
||||
sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
|
||||
tsearch.c.search_rank >= MIN_RANK_PARAM))
|
||||
if details.max_rank < 30:
|
||||
sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM,
|
||||
tsearch.c.search_rank <= MAX_RANK_PARAM))
|
||||
if details.layers is not None:
|
||||
sql = sql.where(_filter_by_layer(t, details.layers))
|
||||
|
||||
sql = sql.limit(LIMIT_PARAM)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_placex_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = row.accuracy
|
||||
if self.housenumbers and row.rank_address < 30:
|
||||
if row.placex_hnr:
|
||||
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
|
||||
elif row.interpol_hnr:
|
||||
subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
|
||||
elif row.tiger_hnr:
|
||||
subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
|
||||
else:
|
||||
subs = None
|
||||
|
||||
if subs is not None:
|
||||
async for sub in subs:
|
||||
assert sub.housenumber
|
||||
sub.accuracy = result.accuracy
|
||||
if not any(nr in self.housenumbers.values
|
||||
for nr in sub.housenumber.split(';')):
|
||||
sub.accuracy += 0.6
|
||||
results.append(sub)
|
||||
|
||||
# Only add the street as a result, if it meets all other
|
||||
# filter conditions.
|
||||
if (not details.excluded or result.place_id not in details.excluded)\
|
||||
and (not self.qualifiers or result.category in self.qualifiers.values)\
|
||||
and result.rank_address >= details.min_rank:
|
||||
result.accuracy += 1.0 # penalty for missing housenumber
|
||||
results.append(result)
|
||||
else:
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
274
nominatim/api/search/geocoder.py
Normal file
274
nominatim/api/search/geocoder.py
Normal file
@@ -0,0 +1,274 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Public interface to the search code.
|
||||
"""
|
||||
from typing import List, Any, Optional, Iterator, Tuple, Dict
|
||||
import itertools
|
||||
import re
|
||||
import datetime as dt
|
||||
import difflib
|
||||
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.types import SearchDetails
|
||||
from nominatim.api.results import SearchResult, SearchResults, add_result_details
|
||||
from nominatim.api.search.token_assignment import yield_token_assignments
|
||||
from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
|
||||
from nominatim.api.search.db_searches import AbstractSearch
|
||||
from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
|
||||
from nominatim.api.search.query import Phrase, QueryStruct
|
||||
from nominatim.api.logging import log
|
||||
|
||||
class ForwardGeocoder:
|
||||
""" Main class responsible for place search.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: SearchConnection,
|
||||
params: SearchDetails, timeout: Optional[int]) -> None:
|
||||
self.conn = conn
|
||||
self.params = params
|
||||
self.timeout = dt.timedelta(seconds=timeout or 1000000)
|
||||
self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
|
||||
|
||||
|
||||
@property
|
||||
def limit(self) -> int:
|
||||
""" Return the configured maximum number of search results.
|
||||
"""
|
||||
return self.params.max_results
|
||||
|
||||
|
||||
async def build_searches(self,
|
||||
phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
|
||||
""" Analyse the query and return the tokenized query and list of
|
||||
possible searches over it.
|
||||
"""
|
||||
if self.query_analyzer is None:
|
||||
self.query_analyzer = await make_query_analyzer(self.conn)
|
||||
|
||||
query = await self.query_analyzer.analyze_query(phrases)
|
||||
|
||||
searches: List[AbstractSearch] = []
|
||||
if query.num_token_slots() > 0:
|
||||
# 2. Compute all possible search interpretations
|
||||
log().section('Compute abstract searches')
|
||||
search_builder = SearchBuilder(query, self.params)
|
||||
num_searches = 0
|
||||
for assignment in yield_token_assignments(query):
|
||||
searches.extend(search_builder.build(assignment))
|
||||
if num_searches < len(searches):
|
||||
log().table_dump('Searches for assignment',
|
||||
_dump_searches(searches, query, num_searches))
|
||||
num_searches = len(searches)
|
||||
searches.sort(key=lambda s: (s.penalty, s.SEARCH_PRIO))
|
||||
|
||||
return query, searches
|
||||
|
||||
|
||||
async def execute_searches(self, query: QueryStruct,
|
||||
searches: List[AbstractSearch]) -> SearchResults:
|
||||
""" Run the abstract searches against the database until a result
|
||||
is found.
|
||||
"""
|
||||
log().section('Execute database searches')
|
||||
results: Dict[Any, SearchResult] = {}
|
||||
|
||||
end_time = dt.datetime.now() + self.timeout
|
||||
|
||||
min_ranking = searches[0].penalty + 2.0
|
||||
prev_penalty = 0.0
|
||||
for i, search in enumerate(searches):
|
||||
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
|
||||
break
|
||||
log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
|
||||
log().var_dump('Params', self.params)
|
||||
lookup_results = await search.lookup(self.conn, self.params)
|
||||
for result in lookup_results:
|
||||
rhash = (result.source_table, result.place_id,
|
||||
result.housenumber, result.country_code)
|
||||
prevresult = results.get(rhash)
|
||||
if prevresult:
|
||||
prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
|
||||
else:
|
||||
results[rhash] = result
|
||||
min_ranking = min(min_ranking, result.accuracy * 1.2)
|
||||
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
|
||||
prev_penalty = search.penalty
|
||||
if dt.datetime.now() >= end_time:
|
||||
break
|
||||
|
||||
return SearchResults(results.values())
|
||||
|
||||
|
||||
def pre_filter_results(self, results: SearchResults) -> SearchResults:
|
||||
""" Remove results that are significantly worse than the
|
||||
best match.
|
||||
"""
|
||||
if results:
|
||||
max_ranking = min(r.ranking for r in results) + 0.5
|
||||
results = SearchResults(r for r in results if r.ranking < max_ranking)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
|
||||
""" Remove badly matching results, sort by ranking and
|
||||
limit to the configured number of results.
|
||||
"""
|
||||
if results:
|
||||
results.sort(key=lambda r: r.ranking)
|
||||
min_rank = results[0].rank_search
|
||||
min_ranking = results[0].ranking
|
||||
results = SearchResults(r for r in results
|
||||
if r.ranking + 0.03 * (r.rank_search - min_rank)
|
||||
< min_ranking + 0.5)
|
||||
|
||||
results = SearchResults(results[:self.limit])
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
|
||||
""" Adjust the accuracy of the localized result according to how well
|
||||
they match the original query.
|
||||
"""
|
||||
assert self.query_analyzer is not None
|
||||
qwords = [word for phrase in query.source
|
||||
for word in re.split('[, ]+', phrase.text) if word]
|
||||
if not qwords:
|
||||
return
|
||||
|
||||
for result in results:
|
||||
# Negative importance indicates ordering by distance, which is
|
||||
# more important than word matching.
|
||||
if not result.display_name\
|
||||
or (result.importance is not None and result.importance < 0):
|
||||
continue
|
||||
distance = 0.0
|
||||
norm = self.query_analyzer.normalize_text(' '.join((result.display_name,
|
||||
result.country_code or '')))
|
||||
words = set((w for w in norm.split(' ') if w))
|
||||
if not words:
|
||||
continue
|
||||
for qword in qwords:
|
||||
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
|
||||
if wdist < 0.5:
|
||||
distance += len(qword)
|
||||
else:
|
||||
distance += (1.0 - wdist) * len(qword)
|
||||
# Compensate for the fact that country names do not get a
|
||||
# match penalty yet by the tokenizer.
|
||||
# Temporary hack that needs to be removed!
|
||||
if result.rank_address == 4:
|
||||
distance *= 2
|
||||
result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
|
||||
|
||||
|
||||
async def lookup_pois(self, categories: List[Tuple[str, str]],
|
||||
phrases: List[Phrase]) -> SearchResults:
|
||||
""" Look up places by category. If phrase is given, a place search
|
||||
over the phrase will be executed first and places close to the
|
||||
results returned.
|
||||
"""
|
||||
log().function('forward_lookup_pois', categories=categories, params=self.params)
|
||||
|
||||
if phrases:
|
||||
query, searches = await self.build_searches(phrases)
|
||||
|
||||
if query:
|
||||
searches = [wrap_near_search(categories, s) for s in searches[:50]]
|
||||
results = await self.execute_searches(query, searches)
|
||||
results = self.pre_filter_results(results)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
|
||||
results = self.sort_and_cut_results(results)
|
||||
else:
|
||||
results = SearchResults()
|
||||
else:
|
||||
search = build_poi_search(categories, self.params.countries)
|
||||
results = await search.lookup(self.conn, self.params)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
|
||||
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def lookup(self, phrases: List[Phrase]) -> SearchResults:
|
||||
""" Look up a single free-text query.
|
||||
"""
|
||||
log().function('forward_lookup', phrases=phrases, params=self.params)
|
||||
results = SearchResults()
|
||||
|
||||
if self.params.is_impossible():
|
||||
return results
|
||||
|
||||
query, searches = await self.build_searches(phrases)
|
||||
|
||||
if searches:
|
||||
# Execute SQL until an appropriate result is found.
|
||||
results = await self.execute_searches(query, searches[:50])
|
||||
results = self.pre_filter_results(results)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
|
||||
self.rerank_by_query(query, results)
|
||||
log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
|
||||
results = self.sort_and_cut_results(results)
|
||||
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# pylint: disable=invalid-name,too-many-locals
|
||||
def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
|
||||
start: int = 0) -> Iterator[Optional[List[Any]]]:
|
||||
yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
|
||||
'Qualifier', 'Catgeory', 'Rankings']
|
||||
|
||||
def tk(tl: List[int]) -> str:
|
||||
tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
|
||||
|
||||
return f"[{','.join(tstr)}]"
|
||||
|
||||
def fmt_ranking(f: Any) -> str:
|
||||
if not f:
|
||||
return ''
|
||||
ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
|
||||
if len(ranks) > 100:
|
||||
ranks = ranks[:100] + '...'
|
||||
return f"{f.column}({ranks},def={f.default:.3g})"
|
||||
|
||||
def fmt_lookup(l: Any) -> str:
|
||||
if not l:
|
||||
return ''
|
||||
|
||||
return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
|
||||
|
||||
|
||||
def fmt_cstr(c: Any) -> str:
|
||||
if not c:
|
||||
return ''
|
||||
|
||||
return f'{c[0]}^{c[1]}'
|
||||
|
||||
for search in searches[start:]:
|
||||
fields = ('lookups', 'rankings', 'countries', 'housenumbers',
|
||||
'postcodes', 'qualifiers')
|
||||
if hasattr(search, 'search'):
|
||||
iters = itertools.zip_longest([f"{search.penalty:.3g}"],
|
||||
*(getattr(search.search, attr, []) for attr in fields),
|
||||
getattr(search, 'categories', []),
|
||||
fillvalue='')
|
||||
else:
|
||||
iters = itertools.zip_longest([f"{search.penalty:.3g}"],
|
||||
*(getattr(search, attr, []) for attr in fields),
|
||||
[],
|
||||
fillvalue='')
|
||||
for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
|
||||
yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
|
||||
fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
|
||||
yield None
|
||||
312
nominatim/api/search/icu_tokenizer.py
Normal file
312
nominatim/api/search/icu_tokenizer.py
Normal file
@@ -0,0 +1,312 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of query analysis for the ICU tokenizer.
|
||||
"""
|
||||
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
|
||||
from collections import defaultdict
|
||||
import dataclasses
|
||||
import difflib
|
||||
|
||||
from icu import Transliterator
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaRow
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.search import query as qmod
|
||||
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
|
||||
from nominatim.db.sqlalchemy_types import Json
|
||||
|
||||
|
||||
DB_TO_TOKEN_TYPE = {
|
||||
'W': qmod.TokenType.WORD,
|
||||
'w': qmod.TokenType.PARTIAL,
|
||||
'H': qmod.TokenType.HOUSENUMBER,
|
||||
'P': qmod.TokenType.POSTCODE,
|
||||
'C': qmod.TokenType.COUNTRY
|
||||
}
|
||||
|
||||
|
||||
class QueryPart(NamedTuple):
|
||||
""" Normalized and transliterated form of a single term in the query.
|
||||
When the term came out of a split during the transliteration,
|
||||
the normalized string is the full word before transliteration.
|
||||
The word number keeps track of the word before transliteration
|
||||
and can be used to identify partial transliterated terms.
|
||||
"""
|
||||
token: str
|
||||
normalized: str
|
||||
word_number: int
|
||||
|
||||
|
||||
QueryParts = List[QueryPart]
|
||||
WordDict = Dict[str, List[qmod.TokenRange]]
|
||||
|
||||
def yield_words(terms: List[QueryPart], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
|
||||
""" Return all combinations of words in the terms list after the
|
||||
given position.
|
||||
"""
|
||||
total = len(terms)
|
||||
for first in range(start, total):
|
||||
word = terms[first].token
|
||||
yield word, qmod.TokenRange(first, first + 1)
|
||||
for last in range(first + 1, min(first + 20, total)):
|
||||
word = ' '.join((word, terms[last].token))
|
||||
yield word, qmod.TokenRange(first, last + 1)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ICUToken(qmod.Token):
|
||||
""" Specialised token for ICU tokenizer.
|
||||
"""
|
||||
word_token: str
|
||||
info: Optional[Dict[str, Any]]
|
||||
|
||||
def get_category(self) -> Tuple[str, str]:
|
||||
assert self.info
|
||||
return self.info.get('class', ''), self.info.get('type', '')
|
||||
|
||||
|
||||
def rematch(self, norm: str) -> None:
|
||||
""" Check how well the token matches the given normalized string
|
||||
and add a penalty, if necessary.
|
||||
"""
|
||||
if not self.lookup_word:
|
||||
return
|
||||
|
||||
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
|
||||
distance = 0
|
||||
for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
|
||||
if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
|
||||
distance += 1
|
||||
elif tag == 'replace':
|
||||
distance += max((ato-afrom), (bto-bfrom))
|
||||
elif tag != 'equal':
|
||||
distance += abs((ato-afrom) - (bto-bfrom))
|
||||
self.penalty += (distance/len(self.lookup_word))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_db_row(row: SaRow) -> 'ICUToken':
|
||||
""" Create a ICUToken from the row of the word table.
|
||||
"""
|
||||
count = 1 if row.info is None else row.info.get('count', 1)
|
||||
|
||||
penalty = 0.0
|
||||
if row.type == 'w':
|
||||
penalty = 0.3
|
||||
elif row.type == 'W':
|
||||
if len(row.word_token) == 1 and row.word_token == row.word:
|
||||
penalty = 0.2 if row.word.isdigit() else 0.3
|
||||
elif row.type == 'H':
|
||||
penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
|
||||
if all(not c.isdigit() for c in row.word_token):
|
||||
penalty += 0.2 * (len(row.word_token) - 1)
|
||||
elif row.type == 'C':
|
||||
if len(row.word_token) == 1:
|
||||
penalty = 0.3
|
||||
|
||||
if row.info is None:
|
||||
lookup_word = row.word
|
||||
else:
|
||||
lookup_word = row.info.get('lookup', row.word)
|
||||
if lookup_word:
|
||||
lookup_word = lookup_word.split('@', 1)[0]
|
||||
else:
|
||||
lookup_word = row.word_token
|
||||
|
||||
return ICUToken(penalty=penalty, token=row.word_id, count=count,
|
||||
lookup_word=lookup_word, is_indexed=True,
|
||||
word_token=row.word_token, info=row.info)
|
||||
|
||||
|
||||
|
||||
class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
""" Converter for query strings into a tokenized query
|
||||
using the tokens created by a ICU tokenizer.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: SearchConnection) -> None:
|
||||
self.conn = conn
|
||||
|
||||
|
||||
async def setup(self) -> None:
|
||||
""" Set up static data structures needed for the analysis.
|
||||
"""
|
||||
async def _make_normalizer() -> Any:
|
||||
rules = await self.conn.get_property('tokenizer_import_normalisation')
|
||||
return Transliterator.createFromRules("normalization", rules)
|
||||
|
||||
self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
|
||||
_make_normalizer)
|
||||
|
||||
async def _make_transliterator() -> Any:
|
||||
rules = await self.conn.get_property('tokenizer_import_transliteration')
|
||||
return Transliterator.createFromRules("transliteration", rules)
|
||||
|
||||
self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
|
||||
_make_transliterator)
|
||||
|
||||
if 'word' not in self.conn.t.meta.tables:
|
||||
sa.Table('word', self.conn.t.meta,
|
||||
sa.Column('word_id', sa.Integer),
|
||||
sa.Column('word_token', sa.Text, nullable=False),
|
||||
sa.Column('type', sa.Text, nullable=False),
|
||||
sa.Column('word', sa.Text),
|
||||
sa.Column('info', Json))
|
||||
|
||||
|
||||
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
|
||||
""" Analyze the given list of phrases and return the
|
||||
tokenized query.
|
||||
"""
|
||||
log().section('Analyze query (using ICU tokenizer)')
|
||||
normalized = list(filter(lambda p: p.text,
|
||||
(qmod.Phrase(p.ptype, self.normalize_text(p.text))
|
||||
for p in phrases)))
|
||||
query = qmod.QueryStruct(normalized)
|
||||
log().var_dump('Normalized query', query.source)
|
||||
if not query.source:
|
||||
return query
|
||||
|
||||
parts, words = self.split_query(query)
|
||||
log().var_dump('Transliterated query', lambda: _dump_transliterated(query, parts))
|
||||
|
||||
for row in await self.lookup_in_db(list(words.keys())):
|
||||
for trange in words[row.word_token]:
|
||||
token = ICUToken.from_db_row(row)
|
||||
if row.type == 'S':
|
||||
if row.info['op'] in ('in', 'near'):
|
||||
if trange.start == 0:
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
else:
|
||||
if trange.start == 0 and trange.end == query.num_token_slots():
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
else:
|
||||
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
|
||||
else:
|
||||
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
|
||||
|
||||
self.add_extra_tokens(query, parts)
|
||||
self.rerank_tokens(query, parts)
|
||||
|
||||
log().table_dump('Word tokens', _dump_word_tokens(query))
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def normalize_text(self, text: str) -> str:
|
||||
""" Bring the given text into a normalized form. That is the
|
||||
standardized form search will work with. All information removed
|
||||
at this stage is inevitably lost.
|
||||
"""
|
||||
return cast(str, self.normalizer.transliterate(text))
|
||||
|
||||
|
||||
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
|
||||
""" Transliterate the phrases and split them into tokens.
|
||||
|
||||
Returns the list of transliterated tokens together with their
|
||||
normalized form and a dictionary of words for lookup together
|
||||
with their position.
|
||||
"""
|
||||
parts: QueryParts = []
|
||||
phrase_start = 0
|
||||
words = defaultdict(list)
|
||||
wordnr = 0
|
||||
for phrase in query.source:
|
||||
query.nodes[-1].ptype = phrase.ptype
|
||||
for word in phrase.text.split(' '):
|
||||
trans = self.transliterator.transliterate(word)
|
||||
if trans:
|
||||
for term in trans.split(' '):
|
||||
if term:
|
||||
parts.append(QueryPart(term, word, wordnr))
|
||||
query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
|
||||
query.nodes[-1].btype = qmod.BreakType.WORD
|
||||
wordnr += 1
|
||||
query.nodes[-1].btype = qmod.BreakType.PHRASE
|
||||
|
||||
for word, wrange in yield_words(parts, phrase_start):
|
||||
words[word].append(wrange)
|
||||
|
||||
phrase_start = len(parts)
|
||||
query.nodes[-1].btype = qmod.BreakType.END
|
||||
|
||||
return parts, words
|
||||
|
||||
|
||||
async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
|
||||
""" Return the token information from the database for the
|
||||
given word tokens.
|
||||
"""
|
||||
t = self.conn.t.meta.tables['word']
|
||||
return await self.conn.execute(t.select().where(t.c.word_token.in_(words)))
|
||||
|
||||
|
||||
def add_extra_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
|
||||
""" Add tokens to query that are not saved in the database.
|
||||
"""
|
||||
for part, node, i in zip(parts, query.nodes, range(1000)):
|
||||
if len(part.token) <= 4 and part[0].isdigit()\
|
||||
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
|
||||
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
|
||||
ICUToken(0.5, 0, 1, part.token, True, part.token, None))
|
||||
|
||||
|
||||
def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
|
||||
""" Add penalties to tokens that depend on presence of other token.
|
||||
"""
|
||||
for i, node, tlist in query.iter_token_lists():
|
||||
if tlist.ttype == qmod.TokenType.POSTCODE:
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
|
||||
and (repl.ttype != qmod.TokenType.HOUSENUMBER
|
||||
or len(tlist.tokens[0].lookup_word) > 4):
|
||||
repl.add_penalty(0.39)
|
||||
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
|
||||
and len(tlist.tokens[0].lookup_word) <= 3:
|
||||
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
|
||||
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
|
||||
elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL):
|
||||
norm = parts[i].normalized
|
||||
for j in range(i + 1, tlist.end):
|
||||
if parts[j - 1].word_number != parts[j].word_number:
|
||||
norm += ' ' + parts[j].normalized
|
||||
for token in tlist.tokens:
|
||||
cast(ICUToken, token).rematch(norm)
|
||||
|
||||
|
||||
def _dump_transliterated(query: qmod.QueryStruct, parts: QueryParts) -> str:
|
||||
out = query.nodes[0].btype.value
|
||||
for node, part in zip(query.nodes[1:], parts):
|
||||
out += part.token + node.btype.value
|
||||
return out
|
||||
|
||||
|
||||
def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
|
||||
yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
|
||||
for node in query.nodes:
|
||||
for tlist in node.starting:
|
||||
for token in tlist.tokens:
|
||||
t = cast(ICUToken, token)
|
||||
yield [tlist.ttype.name, t.token, t.word_token or '',
|
||||
t.lookup_word or '', t.penalty, t.count, t.info]
|
||||
|
||||
|
||||
async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||
""" Create and set up a new query analyzer for a database based
|
||||
on the ICU tokenizer.
|
||||
"""
|
||||
out = ICUQueryAnalyzer(conn)
|
||||
await out.setup()
|
||||
|
||||
return out
|
||||
271
nominatim/api/search/legacy_tokenizer.py
Normal file
271
nominatim/api/search/legacy_tokenizer.py
Normal file
@@ -0,0 +1,271 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of query analysis for the legacy tokenizer.
|
||||
"""
|
||||
from typing import Tuple, Dict, List, Optional, Iterator, Any, cast
|
||||
from copy import copy
|
||||
from collections import defaultdict
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaRow
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.search import query as qmod
|
||||
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
|
||||
|
||||
def yield_words(terms: List[str], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
|
||||
""" Return all combinations of words in the terms list after the
|
||||
given position.
|
||||
"""
|
||||
total = len(terms)
|
||||
for first in range(start, total):
|
||||
word = terms[first]
|
||||
yield word, qmod.TokenRange(first, first + 1)
|
||||
for last in range(first + 1, min(first + 20, total)):
|
||||
word = ' '.join((word, terms[last]))
|
||||
yield word, qmod.TokenRange(first, last + 1)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class LegacyToken(qmod.Token):
|
||||
""" Specialised token for legacy tokenizer.
|
||||
"""
|
||||
word_token: str
|
||||
category: Optional[Tuple[str, str]]
|
||||
country: Optional[str]
|
||||
operator: Optional[str]
|
||||
|
||||
@property
|
||||
def info(self) -> Dict[str, Any]:
|
||||
""" Dictionary of additional properties of the token.
|
||||
Should only be used for debugging purposes.
|
||||
"""
|
||||
return {'category': self.category,
|
||||
'country': self.country,
|
||||
'operator': self.operator}
|
||||
|
||||
|
||||
def get_category(self) -> Tuple[str, str]:
|
||||
assert self.category
|
||||
return self.category
|
||||
|
||||
|
||||
class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
""" Converter for query strings into a tokenized query
|
||||
using the tokens created by a legacy tokenizer.
|
||||
"""
|
||||
|
||||
def __init__(self, conn: SearchConnection) -> None:
|
||||
self.conn = conn
|
||||
|
||||
async def setup(self) -> None:
|
||||
""" Set up static data structures needed for the analysis.
|
||||
"""
|
||||
self.max_word_freq = int(await self.conn.get_property('tokenizer_maxwordfreq'))
|
||||
if 'word' not in self.conn.t.meta.tables:
|
||||
sa.Table('word', self.conn.t.meta,
|
||||
sa.Column('word_id', sa.Integer),
|
||||
sa.Column('word_token', sa.Text, nullable=False),
|
||||
sa.Column('word', sa.Text),
|
||||
sa.Column('class', sa.Text),
|
||||
sa.Column('type', sa.Text),
|
||||
sa.Column('country_code', sa.Text),
|
||||
sa.Column('search_name_count', sa.Integer),
|
||||
sa.Column('operator', sa.Text))
|
||||
|
||||
|
||||
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
|
||||
""" Analyze the given list of phrases and return the
|
||||
tokenized query.
|
||||
"""
|
||||
log().section('Analyze query (using Legacy tokenizer)')
|
||||
|
||||
normalized = []
|
||||
if phrases:
|
||||
for row in await self.conn.execute(sa.select(*(sa.func.make_standard_name(p.text)
|
||||
for p in phrases))):
|
||||
normalized = [qmod.Phrase(p.ptype, r) for r, p in zip(row, phrases) if r]
|
||||
break
|
||||
|
||||
query = qmod.QueryStruct(normalized)
|
||||
log().var_dump('Normalized query', query.source)
|
||||
if not query.source:
|
||||
return query
|
||||
|
||||
parts, words = self.split_query(query)
|
||||
lookup_words = list(words.keys())
|
||||
log().var_dump('Split query', parts)
|
||||
log().var_dump('Extracted words', lookup_words)
|
||||
|
||||
for row in await self.lookup_in_db(lookup_words):
|
||||
for trange in words[row.word_token.strip()]:
|
||||
token, ttype = self.make_token(row)
|
||||
if ttype == qmod.TokenType.NEAR_ITEM:
|
||||
if trange.start == 0:
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
elif ttype == qmod.TokenType.QUALIFIER:
|
||||
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
|
||||
if trange.start == 0 or trange.end == query.num_token_slots():
|
||||
token = copy(token)
|
||||
token.penalty += 0.1 * (query.num_token_slots())
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
|
||||
query.add_token(trange, ttype, token)
|
||||
|
||||
self.add_extra_tokens(query, parts)
|
||||
self.rerank_tokens(query)
|
||||
|
||||
log().table_dump('Word tokens', _dump_word_tokens(query))
|
||||
|
||||
return query
|
||||
|
||||
|
||||
def normalize_text(self, text: str) -> str:
|
||||
""" Bring the given text into a normalized form.
|
||||
|
||||
This only removes case, so some difference with the normalization
|
||||
in the phrase remains.
|
||||
"""
|
||||
return text.lower()
|
||||
|
||||
|
||||
def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
|
||||
Dict[str, List[qmod.TokenRange]]]:
|
||||
""" Transliterate the phrases and split them into tokens.
|
||||
|
||||
Returns a list of transliterated tokens and a dictionary
|
||||
of words for lookup together with their position.
|
||||
"""
|
||||
parts: List[str] = []
|
||||
phrase_start = 0
|
||||
words = defaultdict(list)
|
||||
for phrase in query.source:
|
||||
query.nodes[-1].ptype = phrase.ptype
|
||||
for trans in phrase.text.split(' '):
|
||||
if trans:
|
||||
for term in trans.split(' '):
|
||||
if term:
|
||||
parts.append(trans)
|
||||
query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
|
||||
query.nodes[-1].btype = qmod.BreakType.WORD
|
||||
query.nodes[-1].btype = qmod.BreakType.PHRASE
|
||||
for word, wrange in yield_words(parts, phrase_start):
|
||||
words[word].append(wrange)
|
||||
phrase_start = len(parts)
|
||||
query.nodes[-1].btype = qmod.BreakType.END
|
||||
|
||||
return parts, words
|
||||
|
||||
|
||||
async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
|
||||
""" Return the token information from the database for the
|
||||
given word tokens.
|
||||
"""
|
||||
t = self.conn.t.meta.tables['word']
|
||||
|
||||
sql = t.select().where(t.c.word_token.in_(words + [' ' + w for w in words]))
|
||||
|
||||
return await self.conn.execute(sql)
|
||||
|
||||
|
||||
def make_token(self, row: SaRow) -> Tuple[LegacyToken, qmod.TokenType]:
|
||||
""" Create a LegacyToken from the row of the word table.
|
||||
Also determines the type of token.
|
||||
"""
|
||||
penalty = 0.0
|
||||
is_indexed = True
|
||||
|
||||
rowclass = getattr(row, 'class')
|
||||
|
||||
if row.country_code is not None:
|
||||
ttype = qmod.TokenType.COUNTRY
|
||||
lookup_word = row.country_code
|
||||
elif rowclass is not None:
|
||||
if rowclass == 'place' and row.type == 'house':
|
||||
ttype = qmod.TokenType.HOUSENUMBER
|
||||
lookup_word = row.word_token[1:]
|
||||
elif rowclass == 'place' and row.type == 'postcode':
|
||||
ttype = qmod.TokenType.POSTCODE
|
||||
lookup_word = row.word_token[1:]
|
||||
else:
|
||||
ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
|
||||
else qmod.TokenType.QUALIFIER
|
||||
lookup_word = row.word
|
||||
elif row.word_token.startswith(' '):
|
||||
ttype = qmod.TokenType.WORD
|
||||
lookup_word = row.word or row.word_token[1:]
|
||||
else:
|
||||
ttype = qmod.TokenType.PARTIAL
|
||||
lookup_word = row.word_token
|
||||
penalty = 0.21
|
||||
if row.search_name_count > self.max_word_freq:
|
||||
is_indexed = False
|
||||
|
||||
return LegacyToken(penalty=penalty, token=row.word_id,
|
||||
count=row.search_name_count or 1,
|
||||
lookup_word=lookup_word,
|
||||
word_token=row.word_token.strip(),
|
||||
category=(rowclass, row.type) if rowclass is not None else None,
|
||||
country=row.country_code,
|
||||
operator=row.operator,
|
||||
is_indexed=is_indexed),\
|
||||
ttype
|
||||
|
||||
|
||||
def add_extra_tokens(self, query: qmod.QueryStruct, parts: List[str]) -> None:
|
||||
""" Add tokens to query that are not saved in the database.
|
||||
"""
|
||||
for part, node, i in zip(parts, query.nodes, range(1000)):
|
||||
if len(part) <= 4 and part.isdigit()\
|
||||
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
|
||||
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
|
||||
LegacyToken(penalty=0.5, token=0, count=1,
|
||||
lookup_word=part, word_token=part,
|
||||
category=None, country=None,
|
||||
operator=None, is_indexed=True))
|
||||
|
||||
|
||||
def rerank_tokens(self, query: qmod.QueryStruct) -> None:
|
||||
""" Add penalties to tokens that depend on presence of other token.
|
||||
"""
|
||||
for _, node, tlist in query.iter_token_lists():
|
||||
if tlist.ttype == qmod.TokenType.POSTCODE:
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
|
||||
and (repl.ttype != qmod.TokenType.HOUSENUMBER
|
||||
or len(tlist.tokens[0].lookup_word) > 4):
|
||||
repl.add_penalty(0.39)
|
||||
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
|
||||
and len(tlist.tokens[0].lookup_word) <= 3:
|
||||
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
|
||||
for repl in node.starting:
|
||||
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
|
||||
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
|
||||
|
||||
|
||||
|
||||
def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
|
||||
yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
|
||||
for node in query.nodes:
|
||||
for tlist in node.starting:
|
||||
for token in tlist.tokens:
|
||||
t = cast(LegacyToken, token)
|
||||
yield [tlist.ttype.name, t.token, t.word_token or '',
|
||||
t.lookup_word or '', t.penalty, t.count, t.info]
|
||||
|
||||
|
||||
async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||
""" Create and set up a new query analyzer for a database based
|
||||
on the ICU tokenizer.
|
||||
"""
|
||||
out = LegacyQueryAnalyzer(conn)
|
||||
await out.setup()
|
||||
|
||||
return out
|
||||
296
nominatim/api/search/query.py
Normal file
296
nominatim/api/search/query.py
Normal file
@@ -0,0 +1,296 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Datastructures for a tokenized query.
|
||||
"""
|
||||
from typing import List, Tuple, Optional, Iterator
|
||||
from abc import ABC, abstractmethod
|
||||
import dataclasses
|
||||
import enum
|
||||
|
||||
class BreakType(enum.Enum):
|
||||
""" Type of break between tokens.
|
||||
"""
|
||||
START = '<'
|
||||
""" Begin of the query. """
|
||||
END = '>'
|
||||
""" End of the query. """
|
||||
PHRASE = ','
|
||||
""" Break between two phrases. """
|
||||
WORD = ' '
|
||||
""" Break between words. """
|
||||
PART = '-'
|
||||
""" Break inside a word, for example a hyphen or apostrophe. """
|
||||
TOKEN = '`'
|
||||
""" Break created as a result of tokenization.
|
||||
This may happen in languages without spaces between words.
|
||||
"""
|
||||
|
||||
|
||||
class TokenType(enum.Enum):
|
||||
""" Type of token.
|
||||
"""
|
||||
WORD = enum.auto()
|
||||
""" Full name of a place. """
|
||||
PARTIAL = enum.auto()
|
||||
""" Word term without breaks, does not necessarily represent a full name. """
|
||||
HOUSENUMBER = enum.auto()
|
||||
""" Housenumber term. """
|
||||
POSTCODE = enum.auto()
|
||||
""" Postal code term. """
|
||||
COUNTRY = enum.auto()
|
||||
""" Country name or reference. """
|
||||
QUALIFIER = enum.auto()
|
||||
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
|
||||
NEAR_ITEM = enum.auto()
|
||||
""" Special term used as searchable object(e.g. supermarket in ...). """
|
||||
|
||||
|
||||
class PhraseType(enum.Enum):
|
||||
""" Designation of a phrase.
|
||||
"""
|
||||
NONE = 0
|
||||
""" No specific designation (i.e. source is free-form query). """
|
||||
AMENITY = enum.auto()
|
||||
""" Contains name or type of a POI. """
|
||||
STREET = enum.auto()
|
||||
""" Contains a street name optionally with a housenumber. """
|
||||
CITY = enum.auto()
|
||||
""" Contains the postal city. """
|
||||
COUNTY = enum.auto()
|
||||
""" Contains the equivalent of a county. """
|
||||
STATE = enum.auto()
|
||||
""" Contains a state or province. """
|
||||
POSTCODE = enum.auto()
|
||||
""" Contains a postal code. """
|
||||
COUNTRY = enum.auto()
|
||||
""" Contains the country name or code. """
|
||||
|
||||
def compatible_with(self, ttype: TokenType,
|
||||
is_full_phrase: bool) -> bool:
|
||||
""" Check if the given token type can be used with the phrase type.
|
||||
"""
|
||||
if self == PhraseType.NONE:
|
||||
return not is_full_phrase or ttype != TokenType.QUALIFIER
|
||||
if self == PhraseType.AMENITY:
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL)\
|
||||
or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
|
||||
or (not is_full_phrase and ttype == TokenType.QUALIFIER)
|
||||
if self == PhraseType.STREET:
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
|
||||
if self == PhraseType.POSTCODE:
|
||||
return ttype == TokenType.POSTCODE
|
||||
if self == PhraseType.COUNTRY:
|
||||
return ttype == TokenType.COUNTRY
|
||||
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Token(ABC):
|
||||
""" Base type for tokens.
|
||||
Specific query analyzers must implement the concrete token class.
|
||||
"""
|
||||
|
||||
penalty: float
|
||||
token: int
|
||||
count: int
|
||||
lookup_word: str
|
||||
is_indexed: bool
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def get_category(self) -> Tuple[str, str]:
|
||||
""" Return the category restriction for qualifier terms and
|
||||
category objects.
|
||||
"""
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TokenRange:
|
||||
""" Indexes of query nodes over which a token spans.
|
||||
"""
|
||||
start: int
|
||||
end: int
|
||||
|
||||
def __lt__(self, other: 'TokenRange') -> bool:
|
||||
return self.end <= other.start
|
||||
|
||||
|
||||
def __le__(self, other: 'TokenRange') -> bool:
|
||||
return NotImplemented
|
||||
|
||||
|
||||
def __gt__(self, other: 'TokenRange') -> bool:
|
||||
return self.start >= other.end
|
||||
|
||||
|
||||
def __ge__(self, other: 'TokenRange') -> bool:
|
||||
return NotImplemented
|
||||
|
||||
|
||||
def replace_start(self, new_start: int) -> 'TokenRange':
|
||||
""" Return a new token range with the new start.
|
||||
"""
|
||||
return TokenRange(new_start, self.end)
|
||||
|
||||
|
||||
def replace_end(self, new_end: int) -> 'TokenRange':
|
||||
""" Return a new token range with the new end.
|
||||
"""
|
||||
return TokenRange(self.start, new_end)
|
||||
|
||||
|
||||
def split(self, index: int) -> Tuple['TokenRange', 'TokenRange']:
|
||||
""" Split the span into two spans at the given index.
|
||||
The index must be within the span.
|
||||
"""
|
||||
return self.replace_end(index), self.replace_start(index)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TokenList:
|
||||
""" List of all tokens of a given type going from one breakpoint to another.
|
||||
"""
|
||||
end: int
|
||||
ttype: TokenType
|
||||
tokens: List[Token]
|
||||
|
||||
|
||||
def add_penalty(self, penalty: float) -> None:
|
||||
""" Add the given penalty to all tokens in the list.
|
||||
"""
|
||||
for token in self.tokens:
|
||||
token.penalty += penalty
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class QueryNode:
|
||||
""" A node of the query representing a break between terms.
|
||||
"""
|
||||
btype: BreakType
|
||||
ptype: PhraseType
|
||||
starting: List[TokenList] = dataclasses.field(default_factory=list)
|
||||
|
||||
def has_tokens(self, end: int, *ttypes: TokenType) -> bool:
|
||||
""" Check if there are tokens of the given types ending at the
|
||||
given node.
|
||||
"""
|
||||
return any(tl.end == end and tl.ttype in ttypes for tl in self.starting)
|
||||
|
||||
|
||||
def get_tokens(self, end: int, ttype: TokenType) -> Optional[List[Token]]:
|
||||
""" Get the list of tokens of the given type starting at this node
|
||||
and ending at the node 'end'. Returns 'None' if no such
|
||||
tokens exist.
|
||||
"""
|
||||
for tlist in self.starting:
|
||||
if tlist.end == end and tlist.ttype == ttype:
|
||||
return tlist.tokens
|
||||
return None
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class Phrase:
|
||||
""" A normalized query part. Phrases may be typed which means that
|
||||
they then represent a specific part of the address.
|
||||
"""
|
||||
ptype: PhraseType
|
||||
text: str
|
||||
|
||||
|
||||
class QueryStruct:
|
||||
""" A tokenized search query together with the normalized source
|
||||
from which the tokens have been parsed.
|
||||
|
||||
The query contains a list of nodes that represent the breaks
|
||||
between words. Tokens span between nodes, which don't necessarily
|
||||
need to be direct neighbours. Thus the query is represented as a
|
||||
directed acyclic graph.
|
||||
|
||||
When created, a query contains a single node: the start of the
|
||||
query. Further nodes can be added by appending to 'nodes'.
|
||||
"""
|
||||
|
||||
def __init__(self, source: List[Phrase]) -> None:
|
||||
self.source = source
|
||||
self.nodes: List[QueryNode] = \
|
||||
[QueryNode(BreakType.START, source[0].ptype if source else PhraseType.NONE)]
|
||||
|
||||
|
||||
def num_token_slots(self) -> int:
|
||||
""" Return the length of the query in vertice steps.
|
||||
"""
|
||||
return len(self.nodes) - 1
|
||||
|
||||
|
||||
def add_node(self, btype: BreakType, ptype: PhraseType) -> None:
|
||||
""" Append a new break node with the given break type.
|
||||
The phrase type denotes the type for any tokens starting
|
||||
at the node.
|
||||
"""
|
||||
self.nodes.append(QueryNode(btype, ptype))
|
||||
|
||||
|
||||
def add_token(self, trange: TokenRange, ttype: TokenType, token: Token) -> None:
|
||||
""" Add a token to the query. 'start' and 'end' are the indexes of the
|
||||
nodes from which to which the token spans. The indexes must exist
|
||||
and are expected to be in the same phrase.
|
||||
'ttype' denotes the type of the token and 'token' the token to
|
||||
be inserted.
|
||||
|
||||
If the token type is not compatible with the phrase it should
|
||||
be added to, then the token is silently dropped.
|
||||
"""
|
||||
snode = self.nodes[trange.start]
|
||||
full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
|
||||
and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
|
||||
if snode.ptype.compatible_with(ttype, full_phrase):
|
||||
tlist = snode.get_tokens(trange.end, ttype)
|
||||
if tlist is None:
|
||||
snode.starting.append(TokenList(trange.end, ttype, [token]))
|
||||
else:
|
||||
tlist.append(token)
|
||||
|
||||
|
||||
def get_tokens(self, trange: TokenRange, ttype: TokenType) -> List[Token]:
|
||||
""" Get the list of tokens of a given type, spanning the given
|
||||
nodes. The nodes must exist. If no tokens exist, an
|
||||
empty list is returned.
|
||||
"""
|
||||
return self.nodes[trange.start].get_tokens(trange.end, ttype) or []
|
||||
|
||||
|
||||
def get_partials_list(self, trange: TokenRange) -> List[Token]:
|
||||
""" Create a list of partial tokens between the given nodes.
|
||||
The list is composed of the first token of type PARTIAL
|
||||
going to the subsequent node. Such PARTIAL tokens are
|
||||
assumed to exist.
|
||||
"""
|
||||
return [next(iter(self.get_tokens(TokenRange(i, i+1), TokenType.PARTIAL)))
|
||||
for i in range(trange.start, trange.end)]
|
||||
|
||||
|
||||
def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
|
||||
""" Iterator over all token lists in the query.
|
||||
"""
|
||||
for i, node in enumerate(self.nodes):
|
||||
for tlist in node.starting:
|
||||
yield i, node, tlist
|
||||
|
||||
|
||||
def find_lookup_word_by_id(self, token: int) -> str:
|
||||
""" Find the first token with the given token ID and return
|
||||
its lookup word. Returns 'None' if no such token exists.
|
||||
The function is very slow and must only be used for
|
||||
debugging.
|
||||
"""
|
||||
for node in self.nodes:
|
||||
for tlist in node.starting:
|
||||
for t in tlist.tokens:
|
||||
if t.token == token:
|
||||
return f"[{tlist.ttype.name[0]}]{t.lookup_word}"
|
||||
return 'None'
|
||||
54
nominatim/api/search/query_analyzer_factory.py
Normal file
54
nominatim/api/search/query_analyzer_factory.py
Normal file
@@ -0,0 +1,54 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Factory for creating a query analyzer for the configured tokenizer.
|
||||
"""
|
||||
from typing import List, cast, TYPE_CHECKING
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
import importlib
|
||||
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.connection import SearchConnection
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nominatim.api.search.query import Phrase, QueryStruct
|
||||
|
||||
class AbstractQueryAnalyzer(ABC):
|
||||
""" Class for analysing incoming queries.
|
||||
|
||||
Query analyzers are tied to the tokenizer used on import.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def analyze_query(self, phrases: List['Phrase']) -> 'QueryStruct':
|
||||
""" Analyze the given phrases and return the tokenized query.
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def normalize_text(self, text: str) -> str:
|
||||
""" Bring the given text into a normalized form. That is the
|
||||
standardized form search will work with. All information removed
|
||||
at this stage is inevitably lost.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||
""" Create a query analyzer for the tokenizer used by the database.
|
||||
"""
|
||||
name = await conn.get_property('tokenizer')
|
||||
|
||||
src_file = Path(__file__).parent / f'{name}_tokenizer.py'
|
||||
if not src_file.is_file():
|
||||
log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
|
||||
raise RuntimeError('Tokenizer not found')
|
||||
|
||||
module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer')
|
||||
|
||||
return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))
|
||||
421
nominatim/api/search/token_assignment.py
Normal file
421
nominatim/api/search/token_assignment.py
Normal file
@@ -0,0 +1,421 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Create query interpretations where each vertice in the query is assigned
|
||||
a specific function (expressed as a token type).
|
||||
"""
|
||||
from typing import Optional, List, Iterator
|
||||
import dataclasses
|
||||
|
||||
import nominatim.api.search.query as qmod
|
||||
from nominatim.api.logging import log
|
||||
|
||||
# pylint: disable=too-many-return-statements,too-many-branches
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TypedRange:
|
||||
""" A token range for a specific type of tokens.
|
||||
"""
|
||||
ttype: qmod.TokenType
|
||||
trange: qmod.TokenRange
|
||||
|
||||
|
||||
PENALTY_TOKENCHANGE = {
|
||||
qmod.BreakType.START: 0.0,
|
||||
qmod.BreakType.END: 0.0,
|
||||
qmod.BreakType.PHRASE: 0.0,
|
||||
qmod.BreakType.WORD: 0.1,
|
||||
qmod.BreakType.PART: 0.2,
|
||||
qmod.BreakType.TOKEN: 0.4
|
||||
}
|
||||
|
||||
TypedRangeSeq = List[TypedRange]
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TokenAssignment: # pylint: disable=too-many-instance-attributes
|
||||
""" Representation of a possible assignment of token types
|
||||
to the tokens in a tokenized query.
|
||||
"""
|
||||
penalty: float = 0.0
|
||||
name: Optional[qmod.TokenRange] = None
|
||||
address: List[qmod.TokenRange] = dataclasses.field(default_factory=list)
|
||||
housenumber: Optional[qmod.TokenRange] = None
|
||||
postcode: Optional[qmod.TokenRange] = None
|
||||
country: Optional[qmod.TokenRange] = None
|
||||
near_item: Optional[qmod.TokenRange] = None
|
||||
qualifier: Optional[qmod.TokenRange] = None
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_ranges(ranges: TypedRangeSeq) -> 'TokenAssignment':
|
||||
""" Create a new token assignment from a sequence of typed spans.
|
||||
"""
|
||||
out = TokenAssignment()
|
||||
for token in ranges:
|
||||
if token.ttype == qmod.TokenType.PARTIAL:
|
||||
out.address.append(token.trange)
|
||||
elif token.ttype == qmod.TokenType.HOUSENUMBER:
|
||||
out.housenumber = token.trange
|
||||
elif token.ttype == qmod.TokenType.POSTCODE:
|
||||
out.postcode = token.trange
|
||||
elif token.ttype == qmod.TokenType.COUNTRY:
|
||||
out.country = token.trange
|
||||
elif token.ttype == qmod.TokenType.NEAR_ITEM:
|
||||
out.near_item = token.trange
|
||||
elif token.ttype == qmod.TokenType.QUALIFIER:
|
||||
out.qualifier = token.trange
|
||||
return out
|
||||
|
||||
|
||||
class _TokenSequence:
|
||||
""" Working state used to put together the token assignments.
|
||||
|
||||
Represents an intermediate state while traversing the tokenized
|
||||
query.
|
||||
"""
|
||||
def __init__(self, seq: TypedRangeSeq,
|
||||
direction: int = 0, penalty: float = 0.0) -> None:
|
||||
self.seq = seq
|
||||
self.direction = direction
|
||||
self.penalty = penalty
|
||||
|
||||
|
||||
def __str__(self) -> str:
|
||||
seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype.name}]' for r in self.seq)
|
||||
return f'{seq} (dir: {self.direction}, penalty: {self.penalty})'
|
||||
|
||||
|
||||
@property
|
||||
def end_pos(self) -> int:
|
||||
""" Return the index of the global end of the current sequence.
|
||||
"""
|
||||
return self.seq[-1].trange.end if self.seq else 0
|
||||
|
||||
|
||||
def has_types(self, *ttypes: qmod.TokenType) -> bool:
|
||||
""" Check if the current sequence contains any typed ranges of
|
||||
the given types.
|
||||
"""
|
||||
return any(s.ttype in ttypes for s in self.seq)
|
||||
|
||||
|
||||
def is_final(self) -> bool:
|
||||
""" Return true when the sequence cannot be extended by any
|
||||
form of token anymore.
|
||||
"""
|
||||
# Country and category must be the final term for left-to-right
|
||||
return len(self.seq) > 1 and \
|
||||
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
|
||||
|
||||
|
||||
def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
|
||||
""" Check if the give token type is appendable to the existing sequence.
|
||||
|
||||
Returns None if the token type is not appendable, otherwise the
|
||||
new direction of the sequence after adding such a type. The
|
||||
token is not added.
|
||||
"""
|
||||
if ttype == qmod.TokenType.WORD:
|
||||
return None
|
||||
|
||||
if not self.seq:
|
||||
# Append unconditionally to the empty list
|
||||
if ttype == qmod.TokenType.COUNTRY:
|
||||
return -1
|
||||
if ttype in (qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return 1
|
||||
return self.direction
|
||||
|
||||
# Name tokens are always acceptable and don't change direction
|
||||
if ttype == qmod.TokenType.PARTIAL:
|
||||
# qualifiers cannot appear in the middle of the query. They need
|
||||
# to be near the next phrase.
|
||||
if self.direction == -1 \
|
||||
and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]):
|
||||
return None
|
||||
return self.direction
|
||||
|
||||
# Other tokens may only appear once
|
||||
if self.has_types(ttype):
|
||||
return None
|
||||
|
||||
if ttype == qmod.TokenType.HOUSENUMBER:
|
||||
if self.direction == 1:
|
||||
if len(self.seq) == 1 and self.seq[0].ttype == qmod.TokenType.QUALIFIER:
|
||||
return None
|
||||
if len(self.seq) > 2 \
|
||||
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
|
||||
return None # direction left-to-right: housenumber must come before anything
|
||||
elif self.direction == -1 \
|
||||
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
|
||||
return -1 # force direction right-to-left if after other terms
|
||||
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TokenType.POSTCODE:
|
||||
if self.direction == -1:
|
||||
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return None
|
||||
return -1
|
||||
if self.direction == 1:
|
||||
return None if self.has_types(qmod.TokenType.COUNTRY) else 1
|
||||
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
|
||||
return 1
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TokenType.COUNTRY:
|
||||
return None if self.direction == -1 else 1
|
||||
|
||||
if ttype == qmod.TokenType.NEAR_ITEM:
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TokenType.QUALIFIER:
|
||||
if self.direction == 1:
|
||||
if (len(self.seq) == 1
|
||||
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
|
||||
or (len(self.seq) == 2
|
||||
and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
|
||||
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
|
||||
return 1
|
||||
return None
|
||||
if self.direction == -1:
|
||||
return -1
|
||||
|
||||
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
|
||||
if len(tempseq) == 0:
|
||||
return 1
|
||||
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
|
||||
return None
|
||||
if len(tempseq) > 1 or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
|
||||
return -1
|
||||
return 0
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def advance(self, ttype: qmod.TokenType, end_pos: int,
|
||||
btype: qmod.BreakType) -> Optional['_TokenSequence']:
|
||||
""" Return a new token sequence state with the given token type
|
||||
extended.
|
||||
"""
|
||||
newdir = self.appendable(ttype)
|
||||
if newdir is None:
|
||||
return None
|
||||
|
||||
if not self.seq:
|
||||
newseq = [TypedRange(ttype, qmod.TokenRange(0, end_pos))]
|
||||
new_penalty = 0.0
|
||||
else:
|
||||
last = self.seq[-1]
|
||||
if btype != qmod.BreakType.PHRASE and last.ttype == ttype:
|
||||
# extend the existing range
|
||||
newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))]
|
||||
new_penalty = 0.0
|
||||
else:
|
||||
# start a new range
|
||||
newseq = list(self.seq) + [TypedRange(ttype,
|
||||
qmod.TokenRange(last.trange.end, end_pos))]
|
||||
new_penalty = PENALTY_TOKENCHANGE[btype]
|
||||
|
||||
return _TokenSequence(newseq, newdir, self.penalty + new_penalty)
|
||||
|
||||
|
||||
def _adapt_penalty_from_priors(self, priors: int, new_dir: int) -> bool:
|
||||
if priors == 2:
|
||||
self.penalty += 1.0
|
||||
elif priors > 2:
|
||||
if self.direction == 0:
|
||||
self.direction = new_dir
|
||||
else:
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def recheck_sequence(self) -> bool:
|
||||
""" Check that the sequence is a fully valid token assignment
|
||||
and adapt direction and penalties further if necessary.
|
||||
|
||||
This function catches some impossible assignments that need
|
||||
forward context and can therefore not be excluded when building
|
||||
the assignment.
|
||||
"""
|
||||
# housenumbers may not be further than 2 words from the beginning.
|
||||
# If there are two words in front, give it a penalty.
|
||||
hnrpos = next((i for i, tr in enumerate(self.seq)
|
||||
if tr.ttype == qmod.TokenType.HOUSENUMBER),
|
||||
None)
|
||||
if hnrpos is not None:
|
||||
if self.direction != -1:
|
||||
priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TokenType.PARTIAL)
|
||||
if not self._adapt_penalty_from_priors(priors, -1):
|
||||
return False
|
||||
if self.direction != 1:
|
||||
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
|
||||
if not self._adapt_penalty_from_priors(priors, 1):
|
||||
return False
|
||||
if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
|
||||
self.penalty += 1.0
|
||||
|
||||
return True
|
||||
|
||||
|
||||
def _get_assignments_postcode(self, base: TokenAssignment,
|
||||
query_len: int) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments of Postcode searches with an
|
||||
address component.
|
||||
"""
|
||||
assert base.postcode is not None
|
||||
|
||||
if (base.postcode.start == 0 and self.direction != -1)\
|
||||
or (base.postcode.end == query_len and self.direction != 1):
|
||||
log().comment('postcode search')
|
||||
# <address>,<postcode> should give preference to address search
|
||||
if base.postcode.start == 0:
|
||||
penalty = self.penalty
|
||||
self.direction = -1 # name searches are only possible backwards
|
||||
else:
|
||||
penalty = self.penalty + 0.1
|
||||
self.direction = 1 # name searches are only possible forwards
|
||||
yield dataclasses.replace(base, penalty=penalty)
|
||||
|
||||
|
||||
def _get_assignments_address_forward(self, base: TokenAssignment,
|
||||
query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments of address searches with
|
||||
left-to-right reading.
|
||||
"""
|
||||
first = base.address[0]
|
||||
|
||||
log().comment('first word = name')
|
||||
yield dataclasses.replace(base, penalty=self.penalty,
|
||||
name=first, address=base.address[1:])
|
||||
|
||||
# To paraphrase:
|
||||
# * if another name term comes after the first one and before the
|
||||
# housenumber
|
||||
# * a qualifier comes after the name
|
||||
# * the containing phrase is strictly typed
|
||||
if (base.housenumber and first.end < base.housenumber.start)\
|
||||
or (base.qualifier and base.qualifier > first)\
|
||||
or (query.nodes[first.start].ptype != qmod.PhraseType.NONE):
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
|
||||
# Penalty for:
|
||||
# * <name>, <street>, <housenumber> , ...
|
||||
# * queries that are comma-separated
|
||||
if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
|
||||
penalty += 0.25
|
||||
|
||||
for i in range(first.start + 1, first.end):
|
||||
name, addr = first.split(i)
|
||||
log().comment(f'split first word = name ({i - first.start})')
|
||||
yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:],
|
||||
penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
|
||||
|
||||
|
||||
def _get_assignments_address_backward(self, base: TokenAssignment,
|
||||
query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments of address searches with
|
||||
right-to-left reading.
|
||||
"""
|
||||
last = base.address[-1]
|
||||
|
||||
if self.direction == -1 or len(base.address) > 1:
|
||||
log().comment('last word = name')
|
||||
yield dataclasses.replace(base, penalty=self.penalty,
|
||||
name=last, address=base.address[:-1])
|
||||
|
||||
# To paraphrase:
|
||||
# * if another name term comes before the last one and after the
|
||||
# housenumber
|
||||
# * a qualifier comes before the name
|
||||
# * the containing phrase is strictly typed
|
||||
if (base.housenumber and last.start > base.housenumber.end)\
|
||||
or (base.qualifier and base.qualifier < last)\
|
||||
or (query.nodes[last.start].ptype != qmod.PhraseType.NONE):
|
||||
return
|
||||
|
||||
penalty = self.penalty
|
||||
if base.housenumber and base.housenumber < last:
|
||||
penalty += 0.4
|
||||
if len(query.source) > 1:
|
||||
penalty += 0.25
|
||||
|
||||
for i in range(last.start + 1, last.end):
|
||||
addr, name = last.split(i)
|
||||
log().comment(f'split last word = name ({i - last.start})')
|
||||
yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr],
|
||||
penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
|
||||
|
||||
|
||||
def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Yield possible assignments for the current sequence.
|
||||
|
||||
This function splits up general name assignments into name
|
||||
and address and yields all possible variants of that.
|
||||
"""
|
||||
base = TokenAssignment.from_ranges(self.seq)
|
||||
|
||||
num_addr_tokens = sum(t.end - t.start for t in base.address)
|
||||
if num_addr_tokens > 50:
|
||||
return
|
||||
|
||||
# Postcode search (postcode-only search is covered in next case)
|
||||
if base.postcode is not None and base.address:
|
||||
yield from self._get_assignments_postcode(base, query.num_token_slots())
|
||||
|
||||
# Postcode or country-only search
|
||||
if not base.address:
|
||||
if not base.housenumber and (base.postcode or base.country or base.near_item):
|
||||
log().comment('postcode/country search')
|
||||
yield dataclasses.replace(base, penalty=self.penalty)
|
||||
else:
|
||||
# <postcode>,<address> should give preference to postcode search
|
||||
if base.postcode and base.postcode.start == 0:
|
||||
self.penalty += 0.1
|
||||
|
||||
# Right-to-left reading of the address
|
||||
if self.direction != -1:
|
||||
yield from self._get_assignments_address_forward(base, query)
|
||||
|
||||
# Left-to-right reading of the address
|
||||
if self.direction != 1:
|
||||
yield from self._get_assignments_address_backward(base, query)
|
||||
|
||||
# variant for special housenumber searches
|
||||
if base.housenumber and not base.qualifier:
|
||||
yield dataclasses.replace(base, penalty=self.penalty)
|
||||
|
||||
|
||||
def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
|
||||
""" Return possible word type assignments to word positions.
|
||||
|
||||
The assignments are computed from the concrete tokens listed
|
||||
in the tokenized query.
|
||||
|
||||
The result includes the penalty for transitions from one word type to
|
||||
another. It does not include penalties for transitions within a
|
||||
type.
|
||||
"""
|
||||
todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PhraseType.NONE else 1)]
|
||||
|
||||
while todo:
|
||||
state = todo.pop()
|
||||
node = query.nodes[state.end_pos]
|
||||
|
||||
for tlist in node.starting:
|
||||
newstate = state.advance(tlist.ttype, tlist.end, node.btype)
|
||||
if newstate is not None:
|
||||
if newstate.end_pos == query.num_token_slots():
|
||||
if newstate.recheck_sequence():
|
||||
log().var_dump('Assignment', newstate)
|
||||
yield from newstate.get_assignments(query)
|
||||
elif not newstate.is_final():
|
||||
todo.append(newstate)
|
||||
52
nominatim/api/status.py
Normal file
52
nominatim/api/status.py
Normal file
@@ -0,0 +1,52 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Classes and function related to status call.
|
||||
"""
|
||||
from typing import Optional
|
||||
import datetime as dt
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim import version
|
||||
|
||||
@dataclasses.dataclass
|
||||
class StatusResult:
|
||||
""" Result of a call to the status API.
|
||||
"""
|
||||
status: int
|
||||
message: str
|
||||
software_version = version.NOMINATIM_VERSION
|
||||
data_updated: Optional[dt.datetime] = None
|
||||
database_version: Optional[version.NominatimVersion] = None
|
||||
|
||||
|
||||
async def get_status(conn: SearchConnection) -> StatusResult:
|
||||
""" Execute a status API call.
|
||||
"""
|
||||
status = StatusResult(0, 'OK')
|
||||
|
||||
# Last update date
|
||||
sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
|
||||
status.data_updated = await conn.scalar(sql)
|
||||
|
||||
if status.data_updated is not None:
|
||||
if status.data_updated.tzinfo is None:
|
||||
status.data_updated = status.data_updated.replace(tzinfo=dt.timezone.utc)
|
||||
else:
|
||||
status.data_updated = status.data_updated.astimezone(dt.timezone.utc)
|
||||
|
||||
# Database version
|
||||
try:
|
||||
verstr = await conn.get_property('database_version')
|
||||
status.database_version = version.parse_version(verstr)
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
return status
|
||||
550
nominatim/api/types.py
Normal file
550
nominatim/api/types.py
Normal file
@@ -0,0 +1,550 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Complex datatypes used by the Nominatim API.
|
||||
"""
|
||||
from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \
|
||||
Any, List, Sequence
|
||||
from collections import abc
|
||||
import dataclasses
|
||||
import enum
|
||||
import math
|
||||
from struct import unpack
|
||||
from binascii import unhexlify
|
||||
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.api.localization import Locales
|
||||
|
||||
# pylint: disable=no-member,too-many-boolean-expressions,too-many-instance-attributes
|
||||
|
||||
@dataclasses.dataclass
|
||||
class PlaceID:
|
||||
""" Reference a place by Nominatim's internal ID.
|
||||
|
||||
A PlaceID may reference place from the main table placex, from
|
||||
the interpolation tables or the postcode tables. Place IDs are not
|
||||
stable between installations. You may use this type theefore only
|
||||
with place IDs obtained from the same database.
|
||||
"""
|
||||
place_id: int
|
||||
"""
|
||||
The internal ID of the place to reference.
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class OsmID:
|
||||
""" Reference a place by its OSM ID and potentially the basic category.
|
||||
|
||||
The OSM ID may refer to places in the main table placex and OSM
|
||||
interpolation lines.
|
||||
"""
|
||||
osm_type: str
|
||||
""" OSM type of the object. Must be one of `N`(node), `W`(way) or
|
||||
`R`(relation).
|
||||
"""
|
||||
osm_id: int
|
||||
""" The OSM ID of the object.
|
||||
"""
|
||||
osm_class: Optional[str] = None
|
||||
""" The same OSM object may appear multiple times in the database under
|
||||
different categories. The optional class parameter allows to distinguish
|
||||
the different categories and corresponds to the key part of the category.
|
||||
If there are multiple objects in the database and `osm_class` is
|
||||
left out, then one of the objects is returned at random.
|
||||
"""
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.osm_type not in ('N', 'W', 'R'):
|
||||
raise ValueError(f"Illegal OSM type '{self.osm_type}'. Must be one of N, W, R.")
|
||||
|
||||
|
||||
PlaceRef = Union[PlaceID, OsmID]
|
||||
|
||||
|
||||
class Point(NamedTuple):
|
||||
""" A geographic point in WGS84 projection.
|
||||
"""
|
||||
x: float
|
||||
y: float
|
||||
|
||||
|
||||
@property
|
||||
def lat(self) -> float:
|
||||
""" Return the latitude of the point.
|
||||
"""
|
||||
return self.y
|
||||
|
||||
|
||||
@property
|
||||
def lon(self) -> float:
|
||||
""" Return the longitude of the point.
|
||||
"""
|
||||
return self.x
|
||||
|
||||
|
||||
def to_geojson(self) -> str:
|
||||
""" Return the point in GeoJSON format.
|
||||
"""
|
||||
return f'{{"type": "Point","coordinates": [{self.x}, {self.y}]}}'
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_wkb(wkb: Union[str, bytes]) -> 'Point':
|
||||
""" Create a point from EWKB as returned from the database.
|
||||
"""
|
||||
if isinstance(wkb, str):
|
||||
wkb = unhexlify(wkb)
|
||||
if len(wkb) != 25:
|
||||
raise ValueError(f"Point wkb has unexpected length {len(wkb)}")
|
||||
if wkb[0] == 0:
|
||||
gtype, srid, x, y = unpack('>iidd', wkb[1:])
|
||||
elif wkb[0] == 1:
|
||||
gtype, srid, x, y = unpack('<iidd', wkb[1:])
|
||||
else:
|
||||
raise ValueError("WKB has unknown endian value.")
|
||||
|
||||
if gtype != 0x20000001:
|
||||
raise ValueError("WKB must be a point geometry.")
|
||||
if srid != 4326:
|
||||
raise ValueError("Only WGS84 WKB supported.")
|
||||
|
||||
return Point(x, y)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_param(inp: Any) -> 'Point':
|
||||
""" Create a point from an input parameter. The parameter
|
||||
may be given as a point, a string or a sequence of
|
||||
strings or floats. Raises a UsageError if the format is
|
||||
not correct.
|
||||
"""
|
||||
if isinstance(inp, Point):
|
||||
return inp
|
||||
|
||||
seq: Sequence[str]
|
||||
if isinstance(inp, str):
|
||||
seq = inp.split(',')
|
||||
elif isinstance(inp, abc.Sequence):
|
||||
seq = inp
|
||||
|
||||
if len(seq) != 2:
|
||||
raise UsageError('Point parameter needs 2 coordinates.')
|
||||
try:
|
||||
x, y = filter(math.isfinite, map(float, seq))
|
||||
except ValueError as exc:
|
||||
raise UsageError('Point parameter needs to be numbers.') from exc
|
||||
|
||||
if x < -180.0 or x > 180.0 or y < -90.0 or y > 90.0:
|
||||
raise UsageError('Point coordinates invalid.')
|
||||
|
||||
return Point(x, y)
|
||||
|
||||
|
||||
def to_wkt(self) -> str:
|
||||
""" Return the WKT representation of the point.
|
||||
"""
|
||||
return f'POINT({self.x} {self.y})'
|
||||
|
||||
|
||||
|
||||
AnyPoint = Union[Point, Tuple[float, float]]
|
||||
|
||||
WKB_BBOX_HEADER_LE = b'\x01\x03\x00\x00\x20\xE6\x10\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00'
|
||||
WKB_BBOX_HEADER_BE = b'\x00\x20\x00\x00\x03\x00\x00\x10\xe6\x00\x00\x00\x01\x00\x00\x00\x05'
|
||||
|
||||
class Bbox:
|
||||
""" A bounding box in WGS84 projection.
|
||||
|
||||
The coordinates are available as an array in the 'coord'
|
||||
property in the order (minx, miny, maxx, maxy).
|
||||
"""
|
||||
def __init__(self, minx: float, miny: float, maxx: float, maxy: float) -> None:
|
||||
""" Create a new bounding box with the given coordinates in WGS84
|
||||
projection.
|
||||
"""
|
||||
self.coords = (minx, miny, maxx, maxy)
|
||||
|
||||
|
||||
@property
|
||||
def minlat(self) -> float:
|
||||
""" Southern-most latitude, corresponding to the minimum y coordinate.
|
||||
"""
|
||||
return self.coords[1]
|
||||
|
||||
|
||||
@property
|
||||
def maxlat(self) -> float:
|
||||
""" Northern-most latitude, corresponding to the maximum y coordinate.
|
||||
"""
|
||||
return self.coords[3]
|
||||
|
||||
|
||||
@property
|
||||
def minlon(self) -> float:
|
||||
""" Western-most longitude, corresponding to the minimum x coordinate.
|
||||
"""
|
||||
return self.coords[0]
|
||||
|
||||
|
||||
@property
|
||||
def maxlon(self) -> float:
|
||||
""" Eastern-most longitude, corresponding to the maximum x coordinate.
|
||||
"""
|
||||
return self.coords[2]
|
||||
|
||||
|
||||
@property
|
||||
def area(self) -> float:
|
||||
""" Return the area of the box in WGS84.
|
||||
"""
|
||||
return (self.coords[2] - self.coords[0]) * (self.coords[3] - self.coords[1])
|
||||
|
||||
|
||||
def contains(self, pt: Point) -> bool:
|
||||
""" Check if the point is inside or on the boundary of the box.
|
||||
"""
|
||||
return self.coords[0] <= pt[0] and self.coords[1] <= pt[1]\
|
||||
and self.coords[2] >= pt[0] and self.coords[3] >= pt[1]
|
||||
|
||||
|
||||
def to_wkt(self) -> str:
|
||||
""" Return the WKT representation of the Bbox. This
|
||||
is a simple polygon with four points.
|
||||
"""
|
||||
return 'POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))'\
|
||||
.format(*self.coords) # pylint: disable=consider-using-f-string
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_wkb(wkb: Union[None, str, bytes]) -> 'Optional[Bbox]':
|
||||
""" Create a Bbox from a bounding box polygon as returned by
|
||||
the database. Returns `None` if the input value is None.
|
||||
"""
|
||||
if wkb is None:
|
||||
return None
|
||||
|
||||
if isinstance(wkb, str):
|
||||
wkb = unhexlify(wkb)
|
||||
|
||||
if len(wkb) != 97:
|
||||
raise ValueError("WKB must be a bounding box polygon")
|
||||
if wkb.startswith(WKB_BBOX_HEADER_LE):
|
||||
x1, y1, _, _, x2, y2 = unpack('<dddddd', wkb[17:65])
|
||||
elif wkb.startswith(WKB_BBOX_HEADER_BE):
|
||||
x1, y1, _, _, x2, y2 = unpack('>dddddd', wkb[17:65])
|
||||
else:
|
||||
raise ValueError("WKB has wrong header")
|
||||
|
||||
return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_point(pt: Point, buffer: float) -> 'Bbox':
|
||||
""" Return a Bbox around the point with the buffer added to all sides.
|
||||
"""
|
||||
return Bbox(pt[0] - buffer, pt[1] - buffer,
|
||||
pt[0] + buffer, pt[1] + buffer)
|
||||
|
||||
|
||||
@staticmethod
|
||||
def from_param(inp: Any) -> 'Bbox':
|
||||
""" Return a Bbox from an input parameter. The box may be
|
||||
given as a Bbox, a string or a list or strings or integer.
|
||||
Raises a UsageError if the format is incorrect.
|
||||
"""
|
||||
if isinstance(inp, Bbox):
|
||||
return inp
|
||||
|
||||
seq: Sequence[str]
|
||||
if isinstance(inp, str):
|
||||
seq = inp.split(',')
|
||||
elif isinstance(inp, abc.Sequence):
|
||||
seq = inp
|
||||
|
||||
if len(seq) != 4:
|
||||
raise UsageError('Bounding box parameter needs 4 coordinates.')
|
||||
try:
|
||||
x1, y1, x2, y2 = filter(math.isfinite, map(float, seq))
|
||||
except ValueError as exc:
|
||||
raise UsageError('Bounding box parameter needs to be numbers.') from exc
|
||||
|
||||
x1 = min(180, max(-180, x1))
|
||||
x2 = min(180, max(-180, x2))
|
||||
y1 = min(90, max(-90, y1))
|
||||
y2 = min(90, max(-90, y2))
|
||||
|
||||
if x1 == x2 or y1 == y2:
|
||||
raise UsageError('Bounding box with invalid parameters.')
|
||||
|
||||
return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
|
||||
|
||||
|
||||
class GeometryFormat(enum.Flag):
|
||||
""" All search functions support returning the full geometry of a place in
|
||||
various formats. The internal geometry is converted by PostGIS to
|
||||
the desired format and then returned as a string. It is possible to
|
||||
request multiple formats at the same time.
|
||||
"""
|
||||
NONE = 0
|
||||
""" No geometry requested. Alias for a empty flag.
|
||||
"""
|
||||
GEOJSON = enum.auto()
|
||||
"""
|
||||
[GeoJSON](https://geojson.org/) format
|
||||
"""
|
||||
KML = enum.auto()
|
||||
"""
|
||||
[KML](https://en.wikipedia.org/wiki/Keyhole_Markup_Language) format
|
||||
"""
|
||||
SVG = enum.auto()
|
||||
"""
|
||||
[SVG](http://www.w3.org/TR/SVG/paths.html) format
|
||||
"""
|
||||
TEXT = enum.auto()
|
||||
"""
|
||||
[WKT](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) format
|
||||
"""
|
||||
|
||||
|
||||
class DataLayer(enum.Flag):
|
||||
""" The `DataLayer` flag type defines the layers that can be selected
|
||||
for reverse and forward search.
|
||||
"""
|
||||
ADDRESS = enum.auto()
|
||||
""" The address layer contains all places relevant for addresses:
|
||||
fully qualified addresses with a house number (or a house name equivalent,
|
||||
for some addresses) and places that can be part of an address like
|
||||
roads, cities, states.
|
||||
"""
|
||||
POI = enum.auto()
|
||||
""" Layer for points of interest like shops, restaurants but also
|
||||
recycling bins or postboxes.
|
||||
"""
|
||||
RAILWAY = enum.auto()
|
||||
""" Layer with railway features including tracks and other infrastructure.
|
||||
Note that in Nominatim's standard configuration, only very few railway
|
||||
features are imported into the database. Thus a custom configuration
|
||||
is required to make full use of this layer.
|
||||
"""
|
||||
NATURAL = enum.auto()
|
||||
""" Layer with natural features like rivers, lakes and mountains.
|
||||
"""
|
||||
MANMADE = enum.auto()
|
||||
""" Layer with other human-made features and boundaries. This layer is
|
||||
the catch-all and includes all features not covered by the other
|
||||
layers. A typical example for this layer are national park boundaries.
|
||||
"""
|
||||
|
||||
|
||||
def format_country(cc: Any) -> List[str]:
|
||||
""" Extract a list of country codes from the input which may be either
|
||||
a string or list of strings. Filters out all values that are not
|
||||
a two-letter string.
|
||||
"""
|
||||
clist: Sequence[str]
|
||||
if isinstance(cc, str):
|
||||
clist = cc.split(',')
|
||||
elif isinstance(cc, abc.Sequence):
|
||||
clist = cc
|
||||
else:
|
||||
raise UsageError("Parameter 'country' needs to be a comma-separated list "
|
||||
"or a Python list of strings.")
|
||||
|
||||
return [cc.lower() for cc in clist if isinstance(cc, str) and len(cc) == 2]
|
||||
|
||||
|
||||
def format_excluded(ids: Any) -> List[int]:
|
||||
""" Extract a list of place ids from the input which may be either
|
||||
a string or a list of strings or ints. Ignores empty value but
|
||||
throws a UserError on anything that cannot be converted to int.
|
||||
"""
|
||||
plist: Sequence[str]
|
||||
if isinstance(ids, str):
|
||||
plist = [s.strip() for s in ids.split(',')]
|
||||
elif isinstance(ids, abc.Sequence):
|
||||
plist = ids
|
||||
else:
|
||||
raise UsageError("Parameter 'excluded' needs to be a comma-separated list "
|
||||
"or a Python list of numbers.")
|
||||
if not all(isinstance(i, int) or
|
||||
(isinstance(i, str) and (not i or i.isdigit())) for i in plist):
|
||||
raise UsageError("Parameter 'excluded' only takes place IDs.")
|
||||
|
||||
return [int(id) for id in plist if id] or [0]
|
||||
|
||||
|
||||
def format_categories(categories: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
|
||||
""" Extract a list of categories. Currently a noop.
|
||||
"""
|
||||
return categories
|
||||
|
||||
TParam = TypeVar('TParam', bound='LookupDetails') # pylint: disable=invalid-name
|
||||
|
||||
@dataclasses.dataclass
|
||||
class LookupDetails:
|
||||
""" Collection of parameters that define which kind of details are
|
||||
returned with a lookup or details result.
|
||||
"""
|
||||
geometry_output: GeometryFormat = GeometryFormat.NONE
|
||||
""" Add the full geometry of the place to the result. Multiple
|
||||
formats may be selected. Note that geometries can become quite large.
|
||||
"""
|
||||
address_details: bool = False
|
||||
""" Get detailed information on the places that make up the address
|
||||
for the result.
|
||||
"""
|
||||
linked_places: bool = False
|
||||
""" Get detailed information on the places that link to the result.
|
||||
"""
|
||||
parented_places: bool = False
|
||||
""" Get detailed information on all places that this place is a parent
|
||||
for, i.e. all places for which it provides the address details.
|
||||
Only POI places can have parents.
|
||||
"""
|
||||
keywords: bool = False
|
||||
""" Add information about the search terms used for this place.
|
||||
"""
|
||||
geometry_simplification: float = 0.0
|
||||
""" Simplification factor for a geometry in degrees WGS. A factor of
|
||||
0.0 means the original geometry is kept. The higher the value, the
|
||||
more the geometry gets simplified.
|
||||
"""
|
||||
locales: Locales = Locales()
|
||||
""" Preferred languages for localization of results.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam:
|
||||
""" Load the data fields of the class from a dictionary.
|
||||
Unknown entries in the dictionary are ignored, missing ones
|
||||
get the default setting.
|
||||
|
||||
The function supports type checking and throws a UsageError
|
||||
when the value does not fit.
|
||||
"""
|
||||
def _check_field(v: Any, field: 'dataclasses.Field[Any]') -> Any:
|
||||
if v is None:
|
||||
return field.default_factory() \
|
||||
if field.default_factory != dataclasses.MISSING \
|
||||
else field.default
|
||||
if field.metadata and 'transform' in field.metadata:
|
||||
return field.metadata['transform'](v)
|
||||
if not isinstance(v, field.type):
|
||||
raise UsageError(f"Parameter '{field.name}' needs to be of {field.type!s}.")
|
||||
return v
|
||||
|
||||
return cls(**{f.name: _check_field(kwargs[f.name], f)
|
||||
for f in dataclasses.fields(cls) if f.name in kwargs})
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class ReverseDetails(LookupDetails):
|
||||
""" Collection of parameters for the reverse call.
|
||||
"""
|
||||
max_rank: int = dataclasses.field(default=30,
|
||||
metadata={'transform': lambda v: max(0, min(v, 30))}
|
||||
)
|
||||
""" Highest address rank to return.
|
||||
"""
|
||||
layers: DataLayer = DataLayer.ADDRESS | DataLayer.POI
|
||||
""" Filter which kind of data to include.
|
||||
"""
|
||||
|
||||
@dataclasses.dataclass
|
||||
class SearchDetails(LookupDetails):
|
||||
""" Collection of parameters for the search call.
|
||||
"""
|
||||
max_results: int = 10
|
||||
""" Maximum number of results to be returned. The actual number of results
|
||||
may be less.
|
||||
"""
|
||||
min_rank: int = dataclasses.field(default=0,
|
||||
metadata={'transform': lambda v: max(0, min(v, 30))}
|
||||
)
|
||||
""" Lowest address rank to return.
|
||||
"""
|
||||
max_rank: int = dataclasses.field(default=30,
|
||||
metadata={'transform': lambda v: max(0, min(v, 30))}
|
||||
)
|
||||
""" Highest address rank to return.
|
||||
"""
|
||||
layers: Optional[DataLayer] = dataclasses.field(default=None,
|
||||
metadata={'transform': lambda r : r})
|
||||
""" Filter which kind of data to include. When 'None' (the default) then
|
||||
filtering by layers is disabled.
|
||||
"""
|
||||
countries: List[str] = dataclasses.field(default_factory=list,
|
||||
metadata={'transform': format_country})
|
||||
""" Restrict search results to the given countries. An empty list (the
|
||||
default) will disable this filter.
|
||||
"""
|
||||
excluded: List[int] = dataclasses.field(default_factory=list,
|
||||
metadata={'transform': format_excluded})
|
||||
""" List of OSM objects to exclude from the results. Currently only
|
||||
works when the internal place ID is given.
|
||||
An empty list (the default) will disable this filter.
|
||||
"""
|
||||
viewbox: Optional[Bbox] = dataclasses.field(default=None,
|
||||
metadata={'transform': Bbox.from_param})
|
||||
""" Focus the search on a given map area.
|
||||
"""
|
||||
bounded_viewbox: bool = False
|
||||
""" Use 'viewbox' as a filter and restrict results to places within the
|
||||
given area.
|
||||
"""
|
||||
near: Optional[Point] = dataclasses.field(default=None,
|
||||
metadata={'transform': Point.from_param})
|
||||
""" Order results by distance to the given point.
|
||||
"""
|
||||
near_radius: Optional[float] = dataclasses.field(default=None,
|
||||
metadata={'transform': lambda r : r})
|
||||
""" Use near point as a filter and drop results outside the given
|
||||
radius. Radius is given in degrees WSG84.
|
||||
"""
|
||||
categories: List[Tuple[str, str]] = dataclasses.field(default_factory=list,
|
||||
metadata={'transform': format_categories})
|
||||
""" Restrict search to places with one of the given class/type categories.
|
||||
An empty list (the default) will disable this filter.
|
||||
"""
|
||||
viewbox_x2: Optional[Bbox] = None
|
||||
|
||||
def __post_init__(self) -> None:
|
||||
if self.viewbox is not None:
|
||||
xext = (self.viewbox.maxlon - self.viewbox.minlon)/2
|
||||
yext = (self.viewbox.maxlat - self.viewbox.minlat)/2
|
||||
self.viewbox_x2 = Bbox(self.viewbox.minlon - xext, self.viewbox.minlat - yext,
|
||||
self.viewbox.maxlon + xext, self.viewbox.maxlat + yext)
|
||||
|
||||
|
||||
def restrict_min_max_rank(self, new_min: int, new_max: int) -> None:
|
||||
""" Change the min_rank and max_rank fields to respect the
|
||||
given boundaries.
|
||||
"""
|
||||
assert new_min <= new_max
|
||||
self.min_rank = max(self.min_rank, new_min)
|
||||
self.max_rank = min(self.max_rank, new_max)
|
||||
|
||||
|
||||
def is_impossible(self) -> bool:
|
||||
""" Check if the parameter configuration is contradictionary and
|
||||
cannot yield any results.
|
||||
"""
|
||||
return (self.min_rank > self.max_rank
|
||||
or (self.bounded_viewbox
|
||||
and self.viewbox is not None and self.near is not None
|
||||
and self.viewbox.contains(self.near))
|
||||
or (self.layers is not None and not self.layers)
|
||||
or (self.max_rank <= 4 and
|
||||
self.layers is not None and not self.layers & DataLayer.ADDRESS))
|
||||
|
||||
|
||||
def layer_enabled(self, layer: DataLayer) -> bool:
|
||||
""" Check if the given layer has been chosen. Also returns
|
||||
true when layer restriction has been disabled completely.
|
||||
"""
|
||||
return self.layers is None or bool(self.layers & layer)
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user