forked from hans/Nominatim
Compare commits
234 Commits
docs-4.3.x
...
4.4.x
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
896a40b7d5 | ||
|
|
1e71085004 | ||
|
|
3c05d98b80 | ||
|
|
7bbdf57b08 | ||
|
|
5e91b78ff4 | ||
|
|
ad72641424 | ||
|
|
75130e4332 | ||
|
|
47b41ed510 | ||
|
|
b4e2e7de16 | ||
|
|
1c3ed66ca1 | ||
|
|
e5a5f02666 | ||
|
|
11ced26025 | ||
|
|
edb1eec46d | ||
|
|
63eacc5589 | ||
|
|
e929693cae | ||
|
|
ae7c584e28 | ||
|
|
4d5faf9423 | ||
|
|
b7eea4d53a | ||
|
|
dd2c794de5 | ||
|
|
3b6d35fc12 | ||
|
|
9fa73cfb15 | ||
|
|
62b7670e0c | ||
|
|
d7bb449e74 | ||
|
|
247065ff6f | ||
|
|
9a84adef59 | ||
|
|
1879cf902c | ||
|
|
019a68a4bb | ||
|
|
110491011f | ||
|
|
36b1660121 | ||
|
|
56201feb28 | ||
|
|
c6d40d4bf4 | ||
|
|
a4f2e6a893 | ||
|
|
b427fc7965 | ||
|
|
e264604894 | ||
|
|
3a5d9f0377 | ||
|
|
8be27015b2 | ||
|
|
100391fb8e | ||
|
|
dc1baaa0af | ||
|
|
7205491b84 | ||
|
|
918fec73c6 | ||
|
|
b6df486525 | ||
|
|
8bd8a040e0 | ||
|
|
781e83ddc3 | ||
|
|
5afd96d210 | ||
|
|
cf49a070fd | ||
|
|
4aba36c5ac | ||
|
|
ca6e65fff1 | ||
|
|
1e0025b095 | ||
|
|
173e85c9e6 | ||
|
|
ffb467028e | ||
|
|
05fad607ff | ||
|
|
19360a9552 | ||
|
|
b087f3ab7b | ||
|
|
2c8fb31381 | ||
|
|
b2d3f0a8b3 | ||
|
|
bd8025feab | ||
|
|
4c19762e33 | ||
|
|
1015ac40ae | ||
|
|
4ce13f5c1f | ||
|
|
2833362cf6 | ||
|
|
bc51378aee | ||
|
|
39039e2a55 | ||
|
|
f523c01571 | ||
|
|
81eed0680c | ||
|
|
33c0f249b1 | ||
|
|
76eadc562c | ||
|
|
3cc3e3b2e3 | ||
|
|
f07f8530a8 | ||
|
|
103800a732 | ||
|
|
f9ba7a465a | ||
|
|
fed46240d5 | ||
|
|
2703442fd2 | ||
|
|
2813bf18e6 | ||
|
|
dcebea376d | ||
|
|
b3a2b3d484 | ||
|
|
7321e66d08 | ||
|
|
9627352ee4 | ||
|
|
bfc7acbb18 | ||
|
|
e0ca2ce6ec | ||
|
|
b969c5a62f | ||
|
|
28f7e51279 | ||
|
|
d35eb4105e | ||
|
|
b2afe3ce3e | ||
|
|
7337898b84 | ||
|
|
4305160c91 | ||
|
|
dc52d0954e | ||
|
|
d3a575319f | ||
|
|
2592bf1954 | ||
|
|
88d7ffa274 | ||
|
|
474d4230b8 | ||
|
|
10a5424a71 | ||
|
|
7eb04f67e2 | ||
|
|
1d7e078a2c | ||
|
|
f03ec3ea12 | ||
|
|
8e90fa3395 | ||
|
|
02af0a2c87 | ||
|
|
fa4e5513d1 | ||
|
|
93afe5a7c3 | ||
|
|
af85ad390f | ||
|
|
ab45db5360 | ||
|
|
89094cf92e | ||
|
|
3f5484f48f | ||
|
|
ff06b64329 | ||
|
|
6d39563b87 | ||
|
|
0d840c8d4e | ||
|
|
381bd0b576 | ||
|
|
b5c61e0b5b | ||
|
|
df6eddebcd | ||
|
|
b6c8c0e72b | ||
|
|
b06f5fddcb | ||
|
|
8791c6cb69 | ||
|
|
615b166c68 | ||
|
|
c41f2fed21 | ||
|
|
05e47fbb28 | ||
|
|
1b7c8240ba | ||
|
|
c4fd3ab97f | ||
|
|
8c7140d92b | ||
|
|
3969ce0f55 | ||
|
|
4f5f5ea8fc | ||
|
|
5f7cc91cf9 | ||
|
|
424c1f0d41 | ||
|
|
cff05394a1 | ||
|
|
638b40c3ec | ||
|
|
53d2050dc5 | ||
|
|
97ac036df5 | ||
|
|
482f7fe3ba | ||
|
|
567c31ab6a | ||
|
|
7d28fc35d1 | ||
|
|
c06f902398 | ||
|
|
59ae63e6f5 | ||
|
|
9c7d947fd1 | ||
|
|
58db0ad6d8 | ||
|
|
3b09c39dbf | ||
|
|
db917cb0d4 | ||
|
|
ba6cdd875d | ||
|
|
d231ff60ed | ||
|
|
c74904d075 | ||
|
|
22204050f2 | ||
|
|
667197a47e | ||
|
|
e8b866aa88 | ||
|
|
e7b8e1a2c2 | ||
|
|
279b4fd6d2 | ||
|
|
b7c83d3580 | ||
|
|
d4018f2e3b | ||
|
|
38369ca3cf | ||
|
|
cc0bdd34e9 | ||
|
|
8e71ff329c | ||
|
|
b4e3d0ea44 | ||
|
|
992703b15e | ||
|
|
ba5ec80611 | ||
|
|
1c1447e709 | ||
|
|
3c32c0354a | ||
|
|
8a2c6067a2 | ||
|
|
d60a45715a | ||
|
|
3c7a28dab0 | ||
|
|
0c72a434e0 | ||
|
|
32e7b59b1f | ||
|
|
f448423727 | ||
|
|
b2319e52ff | ||
|
|
25279d009a | ||
|
|
3f72ca4bca | ||
|
|
70dc4957dc | ||
|
|
d8ed565bce | ||
|
|
a7f5c6c8f5 | ||
|
|
a8b023e57e | ||
|
|
47ca56f21b | ||
|
|
580a7b032f | ||
|
|
8fcc2bb7f5 | ||
|
|
d6fe58f84e | ||
|
|
2d54de09bb | ||
|
|
4e4d29f653 | ||
|
|
195c13ee8a | ||
|
|
ac5ef64701 | ||
|
|
e7dc24c026 | ||
|
|
155f26060d | ||
|
|
a87fe8d8bf | ||
|
|
158df6b2e8 | ||
|
|
b8db76c925 | ||
|
|
fffdfc9b88 | ||
|
|
6478409b05 | ||
|
|
ee556fd42e | ||
|
|
9a1b8a67d6 | ||
|
|
383e3ccd25 | ||
|
|
b4ce1fb599 | ||
|
|
2bf8e62580 | ||
|
|
afb439b089 | ||
|
|
78a87ad16b | ||
|
|
5bf55a69a5 | ||
|
|
ca782e2f20 | ||
|
|
308de35802 | ||
|
|
a9ac68a729 | ||
|
|
84d6b481ae | ||
|
|
613c8635a8 | ||
|
|
899a04ad26 | ||
|
|
d8dca2a3a9 | ||
|
|
8216899a9a | ||
|
|
b1d419f458 | ||
|
|
0417946153 | ||
|
|
1149578e8f | ||
|
|
37488ee82b | ||
|
|
06bbd501fd | ||
|
|
07e6c5cf69 | ||
|
|
d0c91e4acf | ||
|
|
114cdafe7e | ||
|
|
837bdecde8 | ||
|
|
d9d0e70e5b | ||
|
|
1255efba7f | ||
|
|
6ad397d4a9 | ||
|
|
570ca22d71 | ||
|
|
418f381b49 | ||
|
|
2cae37ccde | ||
|
|
650fbc2563 | ||
|
|
9ec26c60ff | ||
|
|
06204dfcd8 | ||
|
|
fbe40e005d | ||
|
|
e9efef9095 | ||
|
|
95c3181a35 | ||
|
|
12dbfb0777 | ||
|
|
b62dbd1f92 | ||
|
|
5011fde176 | ||
|
|
54cb9a33b1 | ||
|
|
f1fbcd863d | ||
|
|
b00b16aa3a | ||
|
|
0f19695225 | ||
|
|
7fcbe13669 | ||
|
|
87c91ec5c4 | ||
|
|
0e10916b07 | ||
|
|
21df87dedc | ||
|
|
fd26310d6a | ||
|
|
5762a5bc80 | ||
|
|
8106e67f14 | ||
|
|
f029fb3c65 | ||
|
|
44da684d1d | ||
|
|
f59a072aa6 |
7
.codespellrc
Normal file
7
.codespellrc
Normal file
@@ -0,0 +1,7 @@
|
||||
# https://github.com/codespell-project/codespell
|
||||
|
||||
[codespell]
|
||||
skip = ./man/nominatim.1,data,./docs/styles.css,lib-php,module,munin,osm2pgsql,./test,./settings/*.lua,./settings/*.yaml,./settings/**/*.yaml,./settings/icu-rules,./nominatim/tokenizer/token_analysis/config_variants.py
|
||||
# Need to be lowercase in the list
|
||||
# Unter = Unter den Linden (an example address)
|
||||
ignore-words-list = inout,unter
|
||||
6
.github/actions/build-nominatim/action.yml
vendored
6
.github/actions/build-nominatim/action.yml
vendored
@@ -25,12 +25,12 @@ runs:
|
||||
shell: bash
|
||||
- name: Install${{ matrix.flavour }} prerequisites
|
||||
run: |
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev
|
||||
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
|
||||
if [ "$FLAVOUR" == "oldstuff" ]; then
|
||||
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg
|
||||
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg aiosqlite
|
||||
else
|
||||
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
|
||||
pip3 install sqlalchemy psycopg
|
||||
pip3 install sqlalchemy psycopg aiosqlite
|
||||
fi
|
||||
shell: bash
|
||||
env:
|
||||
|
||||
129
.github/workflows/ci-tests.yml
vendored
129
.github/workflows/ci-tests.yml
vendored
@@ -11,7 +11,7 @@ jobs:
|
||||
with:
|
||||
submodules: true
|
||||
|
||||
- uses: actions/cache@v3
|
||||
- uses: actions/cache@v4
|
||||
with:
|
||||
path: |
|
||||
data/country_osm_grid.sql.gz
|
||||
@@ -27,7 +27,7 @@ jobs:
|
||||
mv nominatim-src.tar.bz2 Nominatim
|
||||
|
||||
- name: 'Upload Artifact'
|
||||
uses: actions/upload-artifact@v3
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
path: nominatim-src.tar.bz2
|
||||
@@ -43,41 +43,29 @@ jobs:
|
||||
ubuntu: 20
|
||||
postgresql: '9.6'
|
||||
postgis: '2.5'
|
||||
php: '7.3'
|
||||
lua: '5.1'
|
||||
- flavour: ubuntu-20
|
||||
ubuntu: 20
|
||||
postgresql: 13
|
||||
postgis: 3
|
||||
php: '7.4'
|
||||
lua: '5.3'
|
||||
- flavour: ubuntu-22
|
||||
ubuntu: 22
|
||||
postgresql: 15
|
||||
postgis: 3
|
||||
php: '8.1'
|
||||
lua: '5.3'
|
||||
|
||||
runs-on: ubuntu-${{ matrix.ubuntu }}.04
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
|
||||
- name: Unpack Nominatim
|
||||
run: tar xf nominatim-src.tar.bz2
|
||||
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: ${{ matrix.php }}
|
||||
tools: phpunit:9, phpcs, composer
|
||||
ini-values: opcache.jit=disable
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
- uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.7
|
||||
if: matrix.flavour == 'oldstuff'
|
||||
@@ -113,22 +101,16 @@ jobs:
|
||||
if: matrix.flavour == 'oldstuff'
|
||||
|
||||
- name: Install Python webservers
|
||||
run: pip3 install falcon starlette
|
||||
run: pip3 install falcon starlette asgi_lifespan
|
||||
|
||||
- name: Install latest pylint
|
||||
run: pip3 install -U pylint asgi_lifespan
|
||||
|
||||
- name: PHP linting
|
||||
run: phpcs --report-width=120 .
|
||||
working-directory: Nominatim
|
||||
run: pip3 install -U pylint
|
||||
if: matrix.flavour == 'ubuntu-22'
|
||||
|
||||
- name: Python linting
|
||||
run: python3 -m pylint nominatim
|
||||
working-directory: Nominatim
|
||||
|
||||
- name: PHP unit tests
|
||||
run: phpunit ./
|
||||
working-directory: Nominatim/test/php
|
||||
if: matrix.flavour == 'ubuntu-22'
|
||||
|
||||
- name: Python unit tests
|
||||
run: python3 -m pytest test/python
|
||||
@@ -152,8 +134,12 @@ jobs:
|
||||
needs: create-archive
|
||||
runs-on: ubuntu-20.04
|
||||
|
||||
strategy:
|
||||
matrix:
|
||||
postgresql: ["13", "16"]
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
|
||||
@@ -167,11 +153,13 @@ jobs:
|
||||
|
||||
- uses: ./Nominatim/.github/actions/setup-postgresql
|
||||
with:
|
||||
postgresql-version: 13
|
||||
postgresql-version: ${{ matrix.postgresql }}
|
||||
postgis-version: 3
|
||||
|
||||
- name: Install Postgresql server dev
|
||||
run: sudo apt-get install postgresql-server-dev-13
|
||||
run: sudo apt-get install postgresql-server-dev-$PGVER
|
||||
env:
|
||||
PGVER: ${{ matrix.postgresql }}
|
||||
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
@@ -182,16 +170,16 @@ jobs:
|
||||
|
||||
- name: BDD tests (legacy tokenizer)
|
||||
run: |
|
||||
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy --format=progress3
|
||||
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php -DTOKENIZER=legacy --format=progress3
|
||||
working-directory: Nominatim/test/bdd
|
||||
|
||||
|
||||
python-api-test:
|
||||
php-test:
|
||||
needs: create-archive
|
||||
runs-on: ubuntu-22.04
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
|
||||
@@ -203,6 +191,23 @@ jobs:
|
||||
postgresql-version: 15
|
||||
postgis-version: 3
|
||||
|
||||
- name: Setup PHP
|
||||
uses: shivammathur/setup-php@v2
|
||||
with:
|
||||
php-version: 8.1
|
||||
tools: phpunit:9, phpcs, composer
|
||||
ini-values: opcache.jit=disable
|
||||
env:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: PHP linting
|
||||
run: phpcs --report-width=120 .
|
||||
working-directory: Nominatim
|
||||
|
||||
- name: PHP unit tests
|
||||
run: phpunit ./
|
||||
working-directory: Nominatim/test/php
|
||||
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
flavour: 'ubuntu-22'
|
||||
@@ -210,12 +215,9 @@ jobs:
|
||||
- name: Install test prerequsites
|
||||
run: sudo apt-get install -y -qq python3-behave
|
||||
|
||||
- name: Install Python webservers
|
||||
run: pip3 install starlette asgi_lifespan httpx
|
||||
|
||||
- name: BDD tests (starlette)
|
||||
- name: BDD tests (php)
|
||||
run: |
|
||||
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=starlette --format=progress3
|
||||
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php --format=progress3
|
||||
working-directory: Nominatim/test/bdd
|
||||
|
||||
|
||||
@@ -265,7 +267,7 @@ jobs:
|
||||
OS: ${{ matrix.name }}
|
||||
INSTALL_MODE: ${{ matrix.install_mode }}
|
||||
|
||||
- uses: actions/download-artifact@v3
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
path: /home/nominatim
|
||||
@@ -346,3 +348,54 @@ jobs:
|
||||
- name: Clean up database (reverse-only import)
|
||||
run: nominatim refresh --postcodes --word-tokens
|
||||
working-directory: /home/nominatim/nominatim-project
|
||||
|
||||
install-no-superuser:
|
||||
runs-on: ubuntu-latest
|
||||
needs: create-archive
|
||||
|
||||
steps:
|
||||
- uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: full-source
|
||||
|
||||
- name: Unpack Nominatim
|
||||
run: tar xf nominatim-src.tar.bz2
|
||||
|
||||
- uses: ./Nominatim/.github/actions/setup-postgresql
|
||||
with:
|
||||
postgresql-version: 16
|
||||
postgis-version: 3
|
||||
|
||||
- uses: ./Nominatim/.github/actions/build-nominatim
|
||||
with:
|
||||
flavour: ubuntu-22
|
||||
lua: 5.3
|
||||
|
||||
- name: Prepare import environment
|
||||
run: |
|
||||
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
|
||||
rm -rf Nominatim
|
||||
|
||||
- name: Prepare Database
|
||||
run: |
|
||||
nominatim import --prepare-database
|
||||
|
||||
- name: Create import user
|
||||
run: |
|
||||
sudo -u postgres createuser osm-import
|
||||
psql -d nominatim -c "ALTER USER \"osm-import\" WITH PASSWORD 'osm-import'"
|
||||
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "osm-import"'
|
||||
|
||||
- name: Run import
|
||||
run: |
|
||||
NOMINATIM_DATABASE_DSN="pgsql:host=127.0.0.1;dbname=nominatim;user=osm-import;password=osm-import" nominatim import --continue import-from-file --osm-file test.pbf
|
||||
|
||||
- name: Check full import
|
||||
run: nominatim admin --check-database
|
||||
|
||||
codespell:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: codespell-project/actions-codespell@v2
|
||||
with:
|
||||
only_warn: 1
|
||||
|
||||
@@ -19,8 +19,8 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
|
||||
project(nominatim)
|
||||
|
||||
set(NOMINATIM_VERSION_MAJOR 4)
|
||||
set(NOMINATIM_VERSION_MINOR 3)
|
||||
set(NOMINATIM_VERSION_PATCH 0)
|
||||
set(NOMINATIM_VERSION_MINOR 4)
|
||||
set(NOMINATIM_VERSION_PATCH 1)
|
||||
|
||||
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
|
||||
|
||||
@@ -82,13 +82,14 @@ endif()
|
||||
|
||||
# Setting PHP binary variable as to command line (prevailing) or auto detect
|
||||
|
||||
if (BUILD_API OR BUILD_IMPORTER)
|
||||
if (BUILD_API)
|
||||
if (NOT PHP_BIN)
|
||||
find_program (PHP_BIN php)
|
||||
endif()
|
||||
# sanity check if PHP binary exists
|
||||
if (NOT EXISTS ${PHP_BIN})
|
||||
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
|
||||
message(WARNING "PHP binary not found. Only Python frontend can be used.")
|
||||
set(PHP_BIN "")
|
||||
else()
|
||||
message (STATUS "Using PHP binary " ${PHP_BIN})
|
||||
endif()
|
||||
@@ -226,7 +227,11 @@ if (BUILD_IMPORTER)
|
||||
PATTERN "paths.py" EXCLUDE
|
||||
PATTERN __pycache__ EXCLUDE)
|
||||
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
|
||||
if (EXISTS ${PHP_BIN})
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
|
||||
else()
|
||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py-no-php.tmpl paths-py.installed)
|
||||
endif()
|
||||
install(FILES ${PROJECT_BINARY_DIR}/paths-py.installed
|
||||
DESTINATION ${NOMINATIM_LIBDIR}/lib-python/nominatim
|
||||
RENAME paths.py)
|
||||
@@ -254,7 +259,7 @@ if (BUILD_MODULE)
|
||||
DESTINATION ${NOMINATIM_LIBDIR}/module)
|
||||
endif()
|
||||
|
||||
if (BUILD_API)
|
||||
if (BUILD_API AND EXISTS ${PHP_BIN})
|
||||
install(DIRECTORY lib-php DESTINATION ${NOMINATIM_LIBDIR})
|
||||
endif()
|
||||
|
||||
|
||||
@@ -69,7 +69,7 @@ Before submitting a pull request make sure that the tests pass:
|
||||
|
||||
Nominatim follows semantic versioning. Major releases are done for large changes
|
||||
that require (or at least strongly recommend) a reimport of the databases.
|
||||
Minor releases can usually be applied to exisiting databases. Patch releases
|
||||
Minor releases can usually be applied to existing databases. Patch releases
|
||||
contain bug fixes only and are released from a separate branch where the
|
||||
relevant changes are cherry-picked from the master branch.
|
||||
|
||||
|
||||
63
ChangeLog
63
ChangeLog
@@ -1,3 +1,54 @@
|
||||
4.4.1
|
||||
* fix geocodejson output: admin level output should only print boundaries
|
||||
* updating: restrict invalidation of child objects on large street features
|
||||
* restrict valid interpolation house numbers to 0-999999
|
||||
* fix import error when SQLAlchemy 1.4 and psycopg3 are installed
|
||||
* various typo fixes in the documentation
|
||||
|
||||
4.4.0
|
||||
* add export to SQLite database and SQLite support for the frontend
|
||||
* switch to Python frontend as the default frontend
|
||||
* update to osm2pgsql 1.11.0
|
||||
* add support for new osm2pgsql middle table format
|
||||
* simplify geometry for large polygon objects not used in addresses
|
||||
* various performance tweaks for search in Python frontend
|
||||
* fix regression in search with categories where it was confused with near
|
||||
search
|
||||
* partially roll back use of SQLAlchemy lambda statements due to bugs
|
||||
in SQLAchemy
|
||||
* fix handling of timezones for timestamps from the database
|
||||
* fix handling of full address searches in connection with a viewbox
|
||||
* fix postcode computation of highway areas
|
||||
* fix handling of timeout errors for Python <= 3.10
|
||||
* fix address computation for postcode areas
|
||||
* fix variable shadowing in osm2pgsql flex script, causing bugs with LuaJIT
|
||||
* make sure extratags are always null when empty
|
||||
* reduce importance of places without wikipedia reference
|
||||
* improve performance of word count computations
|
||||
* drop support for wikipedia tags with full URLs
|
||||
* replace get_addressdata() SQL implementation with a Python function
|
||||
* improve display name for non-address features
|
||||
* fix postcode validation for postcodes with country code
|
||||
(thanks @pawel-wroniszewski)
|
||||
* add possibility to run imports without superuser database rights
|
||||
(thanks @robbe-haesendonck)
|
||||
* new CLI command for cleaning deleted relations (thanks @lujoh)
|
||||
* add check for database version in the CLI check command
|
||||
* updates to import styles ignoring more unused objects
|
||||
* various typo fixes (thanks @kumarUjjawal)
|
||||
|
||||
4.3.2
|
||||
* fix potential SQL injection issue for 'nominatim admin --collect-os-info'
|
||||
* PHP frontend: fix on-the-fly lookup of postcode areas near boundaries
|
||||
* Python frontend: improve handling of viewbox
|
||||
* Python frontend: correct deployment instructions
|
||||
|
||||
4.3.1
|
||||
* reintroduce result rematching
|
||||
* improve search of multi-part names
|
||||
* fix accidentally switched meaning of --reverse-only and --search-only in
|
||||
warm command
|
||||
|
||||
4.3.0
|
||||
* fix failing importance recalculation command
|
||||
* fix merging of linked names into unnamed boundaries
|
||||
@@ -23,7 +74,7 @@
|
||||
* new documentation section for library
|
||||
* various smaller fixes to existing documentation
|
||||
(thanks @woodpeck, @bloom256, @biswajit-k)
|
||||
* updates to vagrant install scripts, drop support for Ubunut 18
|
||||
* updates to vagrant install scripts, drop support for Ubuntu 18
|
||||
(thanks @n-timofeev)
|
||||
* removed obsolete configuration variables from env.defaults
|
||||
* add script for generating a taginfo description (thanks @biswajit-k)
|
||||
@@ -240,7 +291,7 @@
|
||||
* increase splitting for large geometries to improve indexing speed
|
||||
* remove deprecated get_magic_quotes_gpc() function
|
||||
* make sure that all postcodes have an entry in word and are thus searchable
|
||||
* remove use of ST_Covers in conjunction woth ST_Intersects,
|
||||
* remove use of ST_Covers in conjunction with ST_Intersects,
|
||||
causes bad query planning and slow updates in Postgis3
|
||||
* update osm2pgsql
|
||||
|
||||
@@ -297,7 +348,7 @@
|
||||
* exclude postcode ranges separated by colon from centre point calculation
|
||||
* update osm2pgsql, better handling of imports without flatnode file
|
||||
* switch to more efficient algorithm for word set computation
|
||||
* use only boundries for country and state parts of addresses
|
||||
* use only boundaries for country and state parts of addresses
|
||||
* improve updates of addresses with housenumbers and interpolations
|
||||
* remove country from place_addressline table and use country_code instead
|
||||
* optimise indexes on search_name partition tables
|
||||
@@ -336,7 +387,7 @@
|
||||
|
||||
* complete rewrite of reverse search algorithm
|
||||
* add new geojson and geocodejson output formats
|
||||
* add simple export script to exprot addresses to CSV
|
||||
* add simple export script to export addresses to CSV
|
||||
* remove is_in terms from address computation
|
||||
* remove unused search_name_country tables
|
||||
* various smaller fixes to query parsing
|
||||
@@ -401,7 +452,7 @@
|
||||
* move installation documentation into this repo
|
||||
* add self-documenting vagrant scripts
|
||||
* remove --create-website, recommend to use website directory in build
|
||||
* add accessor functions for URL parameters and improve erro checking
|
||||
* add accessor functions for URL parameters and improve error checking
|
||||
* remove IP blocking and rate-limiting code
|
||||
* enable CI via travis
|
||||
* reformatting for more consistent coding style
|
||||
@@ -412,7 +463,7 @@
|
||||
* update to refactored osm2pgsql which use libosmium based types
|
||||
* switch from osmosis to pyosmium for updates
|
||||
* be more strict when matching against special search terms
|
||||
* handle postcode entries with mutliple values correctly
|
||||
* handle postcode entries with multiple values correctly
|
||||
|
||||
2.5
|
||||
|
||||
|
||||
@@ -9,10 +9,10 @@ versions.
|
||||
|
||||
| Version | End of support for security updates |
|
||||
| ------- | ----------------------------------- |
|
||||
| 4.4.x | 2026-03-07 |
|
||||
| 4.3.x | 2025-09-07 |
|
||||
| 4.2.x | 2024-11-24 |
|
||||
| 4.1.x | 2024-08-05 |
|
||||
| 4.0.x | 2023-11-02 |
|
||||
|
||||
## Reporting a Vulnerability
|
||||
|
||||
@@ -36,5 +36,6 @@ incident. Announcements will also be published at the
|
||||
|
||||
## List of Previous Incidents
|
||||
|
||||
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
|
||||
* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html)
|
||||
* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
|
||||
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
|
||||
|
||||
2
Vagrantfile
vendored
2
Vagrantfile
vendored
@@ -38,7 +38,7 @@ Vagrant.configure("2") do |config|
|
||||
lv.memory = 2048
|
||||
lv.nested = true
|
||||
if ENV['CHECKOUT'] != 'y' then
|
||||
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs'
|
||||
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs', nfs_udp: false
|
||||
end
|
||||
end
|
||||
|
||||
|
||||
15
cmake/paths-py-no-php.tmpl
Normal file
15
cmake/paths-py-no-php.tmpl
Normal file
@@ -0,0 +1,15 @@
|
||||
# SPDX-License-Identifier: GPL-2.0-only
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2022 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Path settings for extra data used by Nominatim (installed version).
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
PHPLIB_DIR = None
|
||||
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
|
||||
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
|
||||
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()
|
||||
@@ -5,6 +5,35 @@ your Nominatim database. It is assumed that you have already successfully
|
||||
installed the Nominatim software itself, if not return to the
|
||||
[installation page](Installation.md).
|
||||
|
||||
## Importing with a database user without superuser rights
|
||||
|
||||
Nominatim usually creates its own PostgreSQL database at the beginning of the
|
||||
import process. This makes usage easier for the user but means that the
|
||||
database user doing the import needs the appropriate rights.
|
||||
|
||||
If you prefer to run the import with a database user with limited rights,
|
||||
you can do so by changing the import process as follows:
|
||||
|
||||
1. Run the command for database preparation with a database user with
|
||||
superuser rights. For example, to use a db user 'dbadmin' for a
|
||||
database 'nominatim', execute:
|
||||
|
||||
```
|
||||
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=dbadmin" nominatim import --prepare-database
|
||||
```
|
||||
|
||||
2. Grant the import user the right to create tables. For example, foe user 'import-user':
|
||||
|
||||
```
|
||||
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "import-user"'
|
||||
```
|
||||
|
||||
3. Now run the reminder of the import with the import user:
|
||||
|
||||
```
|
||||
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=import-user" nominatim import --continue import-from-file --osm-file file.pbf
|
||||
```
|
||||
|
||||
## Importing multiple regions (without updates)
|
||||
|
||||
To import multiple regions in your database you can simply give multiple
|
||||
|
||||
@@ -43,6 +43,22 @@ virtualenv /srv/nominatim-venv
|
||||
Next you need to set up the service that runs the Nominatim frontend. This is
|
||||
easiest done with a systemd job.
|
||||
|
||||
First you need to tell systemd to create a socket file to be used by
|
||||
hunicorn. Create the following file `/etc/systemd/system/nominatim.socket`:
|
||||
|
||||
``` systemd
|
||||
[Unit]
|
||||
Description=Gunicorn socket for Nominatim
|
||||
|
||||
[Socket]
|
||||
ListenStream=/run/nominatim.sock
|
||||
SocketUser=www-data
|
||||
|
||||
[Install]
|
||||
WantedBy=multi-user.target
|
||||
```
|
||||
|
||||
Now you can add the systemd service for Nominatim itself.
|
||||
Create the following file `/etc/systemd/system/nominatim.service`:
|
||||
|
||||
``` systemd
|
||||
@@ -74,12 +90,14 @@ its own Python process using
|
||||
[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
|
||||
connections to the database to serve requests in parallel.
|
||||
|
||||
Make the new service known to systemd and start it:
|
||||
Make the new services known to systemd and start it:
|
||||
|
||||
``` sh
|
||||
sudo systemctl daemon-reload
|
||||
sudo systemctl enable nominatim
|
||||
sudo systemctl start nominatim
|
||||
sudo systemctl enable nominatim.socket
|
||||
sudo systemctl start nominatim.socket
|
||||
sudo systemctl enable nominatim.service
|
||||
sudo systemctl start nominatim.service
|
||||
```
|
||||
|
||||
This sets the service up, so that Nominatim is automatically started
|
||||
|
||||
@@ -37,40 +37,6 @@ nominatim import --continue indexing
|
||||
Otherwise it's best to start the full setup from the beginning.
|
||||
|
||||
|
||||
### PHP "open_basedir restriction in effect" warnings
|
||||
|
||||
PHP Warning: file_get_contents(): open_basedir restriction in effect.
|
||||
|
||||
You need to adjust the
|
||||
[open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir)
|
||||
setting in your PHP configuration (`php.ini` file). By default this setting may
|
||||
look like this:
|
||||
|
||||
open_basedir = /srv/http/:/home/:/tmp/:/usr/share/pear/
|
||||
|
||||
Either add reported directories to the list or disable this setting temporarily
|
||||
by adding ";" at the beginning of the line. Don't forget to enable this setting
|
||||
again once you are done with the PHP command line operations.
|
||||
|
||||
|
||||
### PHP timezeone warnings
|
||||
|
||||
The Apache log may contain lots of PHP warnings like this:
|
||||
`PHP Warning: date_default_timezone_set() function.`
|
||||
|
||||
You should set the default time zone as instructed in the warning in
|
||||
your `php.ini` file. Find the entry about timezone and set it to
|
||||
something like this:
|
||||
|
||||
; Defines the default timezone used by the date functions
|
||||
; https://php.net/date.timezone
|
||||
date.timezone = 'America/Denver'
|
||||
|
||||
Or
|
||||
|
||||
```
|
||||
echo "date.timezone = 'America/Denver'" > /etc/php.d/timezone.ini
|
||||
```
|
||||
|
||||
### nominatim.so version mismatch
|
||||
|
||||
@@ -170,7 +136,7 @@ recreate `nominatim.so`. Try
|
||||
cmake $main_Nominatim_path && make
|
||||
```
|
||||
|
||||
### Setup.php fails with "DB Error: extension not found"
|
||||
### Setup fails with "DB Error: extension not found"
|
||||
|
||||
Make sure you have the PostgreSQL extensions "hstore" and "postgis" installed.
|
||||
See the installation instructions for a full list of required packages.
|
||||
|
||||
@@ -14,15 +14,15 @@ to a single Nominatim setup: configuration, extra data, etc. Create a project
|
||||
directory apart from the Nominatim software and change into the directory:
|
||||
|
||||
```
|
||||
mkdir ~/nominatim-planet
|
||||
cd ~/nominatim-planet
|
||||
mkdir ~/nominatim-project
|
||||
cd ~/nominatim-project
|
||||
```
|
||||
|
||||
In the following, we refer to the project directory as `$PROJECT_DIR`. To be
|
||||
able to copy&paste instructions, you can export the appropriate variable:
|
||||
|
||||
```
|
||||
export PROJECT_DIR=~/nominatim-planet
|
||||
export PROJECT_DIR=~/nominatim-project
|
||||
```
|
||||
|
||||
The Nominatim tool assumes per default that the current working directory is
|
||||
@@ -153,7 +153,7 @@ if you plan to use the installation only for exports to a
|
||||
[photon](https://photon.komoot.io/) database, then you can set up a database
|
||||
without search indexes. Add `--reverse-only` to your setup command above.
|
||||
|
||||
This saves about 5% of disk space.
|
||||
This saves about 5% of disk space, import time won't be significant faster.
|
||||
|
||||
### Filtering Imported Data
|
||||
|
||||
@@ -228,7 +228,7 @@ to load the OSM data into the PostgreSQL database. This step is very demanding
|
||||
in terms of RAM usage. osm2pgsql and PostgreSQL are running in parallel at
|
||||
this point. PostgreSQL blocks at least the part of RAM that has been configured
|
||||
with the `shared_buffers` parameter during
|
||||
[PostgreSQL tuning](Installation.md#postgresql-tuning)
|
||||
[PostgreSQL tuning](Installation.md#tuning-the-postgresql-database)
|
||||
and needs some memory on top of that. osm2pgsql needs at least 2GB of RAM for
|
||||
its internal data structures, potentially more when it has to process very large
|
||||
relations. In addition it needs to maintain a cache for node locations. The size
|
||||
@@ -268,27 +268,10 @@ nominatim reverse --lat 51 --lon 45
|
||||
```
|
||||
|
||||
If you want to run Nominatim as a service, you need to make a choice between
|
||||
running the traditional PHP frontend or the new experimental Python frontend.
|
||||
running the modern Python frontend and the legacy PHP frontend.
|
||||
Make sure you have installed the right packages as per
|
||||
[Installation](Installation.md#software).
|
||||
|
||||
#### Testing the PHP frontend
|
||||
|
||||
You can run a small test server with the PHP frontend like this:
|
||||
|
||||
```sh
|
||||
nominatim serve
|
||||
```
|
||||
|
||||
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
|
||||
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
|
||||
or, for reverse-only installations a reverse query,
|
||||
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
|
||||
|
||||
Do not use this test server in production.
|
||||
To run Nominatim via webservers like Apache or nginx, please continue reading
|
||||
[Deploy the PHP frontend](Deployment-PHP.md).
|
||||
|
||||
#### Testing the Python frontend
|
||||
|
||||
To run the test server against the Python frontend, you must choose a
|
||||
@@ -296,10 +279,10 @@ web framework to use, either starlette or falcon. Make sure the appropriate
|
||||
packages are installed. Then run
|
||||
|
||||
``` sh
|
||||
nominatim serve --engine falcon
|
||||
nominatim serve
|
||||
```
|
||||
|
||||
or
|
||||
or, if you prefer to use Starlette instead of Falcon as webserver,
|
||||
|
||||
``` sh
|
||||
nominatim serve --engine starlette
|
||||
@@ -314,6 +297,24 @@ Do not use this test server in production.
|
||||
To run Nominatim via webservers like Apache or nginx, please continue reading
|
||||
[Deploy the Python frontend](Deployment-Python.md).
|
||||
|
||||
#### Testing the PHP frontend
|
||||
|
||||
You can run a small test server with the PHP frontend like this:
|
||||
|
||||
```sh
|
||||
nominatim serve --engine php
|
||||
```
|
||||
|
||||
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
|
||||
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
|
||||
or, for reverse-only installations a reverse query,
|
||||
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
|
||||
|
||||
Do not use this test server in production.
|
||||
To run Nominatim via webservers like Apache or nginx, please continue reading
|
||||
[Deploy the PHP frontend](Deployment-PHP.md).
|
||||
|
||||
|
||||
|
||||
## Enabling search by category phrases
|
||||
|
||||
|
||||
@@ -55,23 +55,24 @@ For running Nominatim:
|
||||
* [PyYaml](https://pyyaml.org/) (5.1+)
|
||||
* [datrie](https://github.com/pytries/datrie)
|
||||
|
||||
When running the PHP frontend:
|
||||
|
||||
* [PHP](https://php.net) (7.3+)
|
||||
* PHP-pgsql
|
||||
* PHP-intl (bundled with PHP)
|
||||
|
||||
For running continuous updates:
|
||||
|
||||
* [pyosmium](https://osmcode.org/pyosmium/)
|
||||
|
||||
For running the experimental Python frontend:
|
||||
For running the Python frontend:
|
||||
|
||||
* one of the following web frameworks:
|
||||
* [falcon](https://falconframework.org/) (3.0+)
|
||||
* [starlette](https://www.starlette.io/)
|
||||
* [uvicorn](https://www.uvicorn.org/)
|
||||
|
||||
For running the legacy PHP frontend:
|
||||
|
||||
* [PHP](https://php.net) (7.3+)
|
||||
* PHP-pgsql
|
||||
* PHP-intl (bundled with PHP)
|
||||
|
||||
|
||||
For dependencies for running tests and building documentation, see
|
||||
the [Development section](../develop/Development-Environment.md).
|
||||
|
||||
|
||||
@@ -60,16 +60,13 @@ to finish the recomputation.
|
||||
|
||||
## Removing large deleted objects
|
||||
|
||||
Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
|
||||
|
||||
Nominatim refuses to delete very large areas because often these deletions are
|
||||
accidental and are reverted within hours. Instead the deletions are logged in
|
||||
the `import_polygon_delete` table and left to the administrator to clean up.
|
||||
|
||||
There is currently no command to do that. You can use the following SQL
|
||||
query to force a deletion on all objects that have been deleted more than
|
||||
a certain timespan ago (here: 1 month):
|
||||
To run this command you will need to pass a PostgreSQL time interval. For example to
|
||||
delete any objects that have been deleted more than a month ago you would run:
|
||||
`nominatim admin --clean-deleted '1 month'`
|
||||
|
||||
```sql
|
||||
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
|
||||
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
|
||||
and age(p.indexed_date) > '1 month'::interval
|
||||
```
|
||||
|
||||
@@ -149,6 +149,8 @@ In terms of address details the zoom levels are as follows:
|
||||
|-----------| ----- | ------- |
|
||||
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
|
||||
|
||||
**`[Python-only]`**
|
||||
|
||||
The layer filter allows to select places by themes.
|
||||
|
||||
The `address` layer contains all places that make up an address:
|
||||
@@ -163,7 +165,7 @@ The `railway` layer includes railway infrastructure like tracks.
|
||||
Note that in Nominatim's standard configuration, only very few railway
|
||||
features are imported into the database.
|
||||
|
||||
The `natural` layer collects feautures like rivers, lakes and mountains while
|
||||
The `natural` layer collects features like rivers, lakes and mountains while
|
||||
the `manmade` layer functions as a catch-all for features not covered by the
|
||||
other layers.
|
||||
|
||||
|
||||
@@ -165,7 +165,7 @@ results and thus give a preference to some results over others.
|
||||
|-----------| ----- | ------- |
|
||||
| countrycodes | comma-separated list of country codes | _unset_ |
|
||||
|
||||
Filer that limits the search results to one or more countries.
|
||||
Filter that limits the search results to one or more countries.
|
||||
The country code must be the
|
||||
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
|
||||
of the country, e.g. `gb` for the United Kingdom, `de` for Germany.
|
||||
@@ -179,7 +179,7 @@ also excluded when the filter is set.
|
||||
This parameter should not be confused with the 'country' parameter of
|
||||
the structured query. The 'country' parameter contains a search term
|
||||
and will be handled with some fuzziness. The `countrycodes` parameter
|
||||
is a hard filter and as such should be prefered. Having both parameters
|
||||
is a hard filter and as such should be preferred. Having both parameters
|
||||
in the same query will work. If the parameters contradict each other,
|
||||
the search will come up empty.
|
||||
|
||||
@@ -187,6 +187,8 @@ also excluded when the filter is set.
|
||||
|-----------| ----- | ------- |
|
||||
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
|
||||
|
||||
**`[Python-only]`**
|
||||
|
||||
The layer filter allows to select places by themes.
|
||||
|
||||
The `address` layer contains all places that make up an address:
|
||||
@@ -201,7 +203,7 @@ The `railway` layer includes railway infrastructure like tracks.
|
||||
Note that in Nominatim's standard configuration, only very few railway
|
||||
features are imported into the database.
|
||||
|
||||
The `natural` layer collects feautures like rivers, lakes and mountains while
|
||||
The `natural` layer collects features like rivers, lakes and mountains while
|
||||
the `manmade` layer functions as a catch-all for features not covered by the
|
||||
other layers.
|
||||
|
||||
@@ -215,7 +217,7 @@ the 'state', 'country' or 'city' part of an address. A featureType of
|
||||
settlement selects any human inhabited feature from 'state' down to
|
||||
'neighbourhood'.
|
||||
|
||||
When featureType ist set, then results are automatically restricted
|
||||
When featureType is set, then results are automatically restricted
|
||||
to the address layer (see above).
|
||||
|
||||
!!! tip
|
||||
@@ -225,7 +227,7 @@ to the address layer (see above).
|
||||
|
||||
| Parameter | Value | Default |
|
||||
|-----------| ----- | ------- |
|
||||
| exclude_place_ids | comma-separeted list of place ids |
|
||||
| exclude_place_ids | comma-separated list of place ids |
|
||||
|
||||
If you do not want certain OSM objects to appear in the search
|
||||
result, give a comma separated list of the `place_id`s you want to skip.
|
||||
@@ -246,7 +248,7 @@ box. `x` is longitude, `y` is latitude.
|
||||
| bounded | 0 or 1 | 0 |
|
||||
|
||||
When set to 1, then it turns the 'viewbox' parameter (see above) into
|
||||
a filter paramter, excluding any results outside the viewbox.
|
||||
a filter parameter, excluding any results outside the viewbox.
|
||||
|
||||
When `bounded=1` is given and the viewbox is small enough, then an amenity-only
|
||||
search is allowed. Give the special keyword for the amenity in square
|
||||
|
||||
@@ -280,7 +280,7 @@ kinds of geometries can be used:
|
||||
* __relation_as_multipolygon__ creates a (Multi)Polygon from the ways in
|
||||
the relation. If the ways do not form a valid area, then the object is
|
||||
silently discarded.
|
||||
* __relation_as_multiline__ creates a (Mutli)LineString from the ways in
|
||||
* __relation_as_multiline__ creates a (Multi)LineString from the ways in
|
||||
the relations. Ways are combined as much as possible without any regards
|
||||
to their order in the relation.
|
||||
|
||||
|
||||
55
docs/customize/SQLite.md
Normal file
55
docs/customize/SQLite.md
Normal file
@@ -0,0 +1,55 @@
|
||||
A Nominatim database can be converted into an SQLite database and used as
|
||||
a read-only source for geocoding queries. This sections describes how to
|
||||
create and use an SQLite database.
|
||||
|
||||
!!! danger
|
||||
This feature is in an experimental state at the moment. Use at your own
|
||||
risk.
|
||||
|
||||
## Installing prerequisites
|
||||
|
||||
To use a SQLite database, you need to install:
|
||||
|
||||
* SQLite (>= 3.30)
|
||||
* Spatialite (> 5.0.0)
|
||||
|
||||
On Ubuntu/Debian, you can run:
|
||||
|
||||
sudo apt install sqlite3 libsqlite3-mod-spatialite libspatialite7
|
||||
|
||||
## Creating a new SQLite database
|
||||
|
||||
Nominatim cannot import directly into SQLite database. Instead you have to
|
||||
first create a geocoding database in PostgreSQL by running a
|
||||
[regular Nominatim import](../admin/Import.md).
|
||||
|
||||
Once this is done, the database can be converted to SQLite with
|
||||
|
||||
nominatim convert -o mydb.sqlite
|
||||
|
||||
This will create a database where all geocoding functions are available.
|
||||
Depending on what functions you need, the database can be made smaller:
|
||||
|
||||
* `--without-reverse` omits indexes only needed for reverse geocoding
|
||||
* `--without-search` omit tables and indexes used for forward search
|
||||
* `--without-details` leaves out extra information only available in the
|
||||
details API
|
||||
|
||||
## Using an SQLite database
|
||||
|
||||
Once you have created the database, you can use it by simply pointing the
|
||||
database DSN to the SQLite file:
|
||||
|
||||
NOMINATIM_DATABASE_DSN=sqlite:dbname=mydb.sqlite
|
||||
|
||||
Please note that SQLite support is only available for the Python frontend. To
|
||||
use the test server with an SQLite database, you therefore need to switch
|
||||
the frontend engine:
|
||||
|
||||
nominatim serve --engine falcon
|
||||
|
||||
You need to install falcon or starlette for this, depending on which engine
|
||||
you choose.
|
||||
|
||||
The CLI query commands and the library interface already use the new Python
|
||||
frontend and therefore work right out of the box.
|
||||
@@ -394,7 +394,7 @@ The analyzer cannot be customized.
|
||||
##### Postcode token analyzer
|
||||
|
||||
The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
|
||||
a 'lookup' varaint of the token, which produces variants with optional
|
||||
a 'lookup' variant of the token, which produces variants with optional
|
||||
spaces. Use together with the clean-postcodes sanitizer.
|
||||
|
||||
The analyzer cannot be customized.
|
||||
|
||||
@@ -129,7 +129,7 @@ sanitizers:
|
||||
!!! warning
|
||||
This example is just a simplified show case on how to create a sanitizer.
|
||||
It is not really read for real-world use: while the sanitizer would
|
||||
correcly transform `West 5th Street` into `5th Street`. it would also
|
||||
correctly transform `West 5th Street` into `5th Street`. it would also
|
||||
shorten a simple `North Street` to `Street`.
|
||||
|
||||
For more sanitizer examples, have a look at the sanitizers provided by Nominatim.
|
||||
|
||||
@@ -10,7 +10,7 @@ There are two kind of tests in this test suite. There are functional tests
|
||||
which test the API interface using a BDD test framework and there are unit
|
||||
tests for specific PHP functions.
|
||||
|
||||
This test directory is sturctured as follows:
|
||||
This test directory is structured as follows:
|
||||
|
||||
```
|
||||
-+- bdd Functional API tests
|
||||
|
||||
@@ -18,7 +18,7 @@ elseif (has 'addr:place'?) then (yes)
|
||||
**with same name**;
|
||||
kill
|
||||
else (no)
|
||||
:add addr:place to adress;
|
||||
:add addr:place to address;
|
||||
:**Use closest place**\n**rank 16 to 25**;
|
||||
kill
|
||||
endif
|
||||
|
||||
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 9.8 KiB After Width: | Height: | Size: 9.8 KiB |
@@ -14,7 +14,7 @@ in the database.
|
||||
|
||||
The library also misses a proper installation routine, so some manipulation
|
||||
of the PYTHONPATH is required. At the moment, use is only recommended for
|
||||
developers wit some experience in Python.
|
||||
developers with some experience in Python.
|
||||
|
||||
## Installation
|
||||
|
||||
|
||||
@@ -22,8 +22,8 @@ nav:
|
||||
- 'Basic Installation': 'admin/Installation.md'
|
||||
- 'Import' : 'admin/Import.md'
|
||||
- 'Update' : 'admin/Update.md'
|
||||
- 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
|
||||
- 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
|
||||
- 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
|
||||
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
|
||||
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
|
||||
- 'Maintenance' : 'admin/Maintenance.md'
|
||||
@@ -40,6 +40,7 @@ nav:
|
||||
- 'Special Phrases': 'customize/Special-Phrases.md'
|
||||
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
|
||||
- 'External data: Postcodes': 'customize/Postcodes.md'
|
||||
- 'Conversion to SQLite': 'customize/SQLite.md'
|
||||
- 'Library Guide':
|
||||
- 'Getting Started': 'library/Getting-Started.md'
|
||||
- 'Nominatim API class': 'library/NominatimAPI.md'
|
||||
|
||||
@@ -187,6 +187,7 @@ BEGIN
|
||||
|
||||
-- --- Return the record for the base entry.
|
||||
|
||||
current_rank_address := 1000;
|
||||
FOR location IN
|
||||
SELECT placex.place_id, osm_type, osm_id, name,
|
||||
coalesce(extratags->'linked_place', extratags->'place') as place_type,
|
||||
@@ -261,7 +262,7 @@ BEGIN
|
||||
-- If the place had a postcode assigned, take this one only
|
||||
-- into consideration when it is an area and the place does not have
|
||||
-- a postcode itself.
|
||||
IF location.fromarea AND location.isaddress
|
||||
IF location.fromarea AND location_isaddress
|
||||
AND (place.address is null or not place.address ? 'postcode')
|
||||
THEN
|
||||
place.postcode := null; -- remove the less exact postcode
|
||||
|
||||
@@ -62,10 +62,6 @@ BEGIN
|
||||
WHILE langs[i] IS NOT NULL LOOP
|
||||
wiki_article := extratags->(case when langs[i] in ('english','country') THEN 'wikipedia' ELSE 'wikipedia:'||langs[i] END);
|
||||
IF wiki_article is not null THEN
|
||||
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/wiki/',E'\\2:');
|
||||
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/w/index.php\\?title=',E'\\2:');
|
||||
wiki_article := regexp_replace(wiki_article,E'^(.*?)/([a-z]{2,3})/wiki/',E'\\2:');
|
||||
--wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3})[=:]',E'\\2:');
|
||||
wiki_article := replace(wiki_article,' ','_');
|
||||
IF strpos(wiki_article, ':') IN (3,4) THEN
|
||||
wiki_article_language := lower(trim(split_part(wiki_article, ':', 1)));
|
||||
@@ -134,7 +130,7 @@ BEGIN
|
||||
|
||||
-- Still nothing? Fall back to a default.
|
||||
IF result.importance is null THEN
|
||||
result.importance := 0.75001 - (rank_search::float / 40);
|
||||
result.importance := 0.40001 - (rank_search::float / 75);
|
||||
END IF;
|
||||
|
||||
{% if 'secondary_importance' in db.tables %}
|
||||
|
||||
@@ -219,10 +219,11 @@ BEGIN
|
||||
-- formatted postcode and therefore 'postcode' contains a derived
|
||||
-- variant.
|
||||
CASE WHEN address ? 'postcode' THEN placex.postcode ELSE NULL::text END as postcode,
|
||||
substring(address->'housenumber','[0-9]+')::integer as hnr
|
||||
(address->'housenumber')::integer as hnr
|
||||
FROM placex, generate_series(1, array_upper(waynodes, 1)) nodeidpos
|
||||
WHERE osm_type = 'N' and osm_id = waynodes[nodeidpos]::BIGINT
|
||||
and address is not NULL and address ? 'housenumber'
|
||||
and address->'housenumber' ~ '^[0-9]{1,6}$'
|
||||
and ST_Distance(NEW.linegeo, geometry) < 0.0005
|
||||
ORDER BY nodeidpos
|
||||
LOOP
|
||||
|
||||
@@ -296,7 +296,9 @@ BEGIN
|
||||
extratags = NEW.extratags,
|
||||
admin_level = NEW.admin_level,
|
||||
indexed_status = 2,
|
||||
geometry = NEW.geometry
|
||||
geometry = CASE WHEN existingplacex.rank_address = 0
|
||||
THEN simplify_large_polygons(NEW.geometry)
|
||||
ELSE NEW.geometry END
|
||||
WHERE place_id = existingplacex.place_id;
|
||||
|
||||
-- Invalidate linked places: they potentially get a new name and addresses.
|
||||
@@ -363,57 +365,3 @@ BEGIN
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION flush_deleted_places()
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
BEGIN
|
||||
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
|
||||
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
|
||||
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
|
||||
|
||||
-- delete from place table
|
||||
ALTER TABLE place DISABLE TRIGGER place_before_delete;
|
||||
DELETE FROM place USING place_to_be_deleted
|
||||
WHERE place.osm_type = place_to_be_deleted.osm_type
|
||||
and place.osm_id = place_to_be_deleted.osm_id
|
||||
and place.class = place_to_be_deleted.class
|
||||
and place.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
ALTER TABLE place ENABLE TRIGGER place_before_delete;
|
||||
|
||||
-- Mark for delete in the placex table
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
|
||||
-- Mark for delete in interpolations
|
||||
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE place_to_be_deleted.osm_type = 'W'
|
||||
and place_to_be_deleted.class = 'place'
|
||||
and place_to_be_deleted.type = 'houses'
|
||||
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
|
||||
and not deferred;
|
||||
|
||||
-- Clear todo list.
|
||||
TRUNCATE TABLE place_to_be_deleted;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
||||
-- Copyright (C) 2024 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
-- Trigger functions for the placex table.
|
||||
@@ -119,12 +119,14 @@ CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
|
||||
AS $$
|
||||
DECLARE
|
||||
location RECORD;
|
||||
member JSONB;
|
||||
parent RECORD;
|
||||
result BIGINT;
|
||||
distance FLOAT;
|
||||
new_distance FLOAT;
|
||||
waygeom GEOMETRY;
|
||||
BEGIN
|
||||
{% if db.middle_db_format == '1' %}
|
||||
FOR location IN
|
||||
SELECT members FROM planet_osm_rels
|
||||
WHERE parts @> ARRAY[poi_osm_id]
|
||||
@@ -161,6 +163,40 @@ BEGIN
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
|
||||
{% else %}
|
||||
FOR member IN
|
||||
SELECT value FROM planet_osm_rels r, LATERAL jsonb_array_elements(members)
|
||||
WHERE planet_osm_member_ids(members, poi_osm_type::char(1)) && ARRAY[poi_osm_id]
|
||||
and tags->>'type' = 'associatedStreet'
|
||||
and value->>'role' = 'street'
|
||||
LOOP
|
||||
FOR parent IN
|
||||
SELECT place_id, geometry
|
||||
FROM placex
|
||||
WHERE osm_type = (member->>'type')::char(1)
|
||||
and osm_id = (member->>'ref')::bigint
|
||||
and name is not null
|
||||
and rank_search between 26 and 27
|
||||
LOOP
|
||||
-- Find the closest 'street' member.
|
||||
-- Avoid distance computation for the frequent case where there is
|
||||
-- only one street member.
|
||||
IF waygeom is null THEN
|
||||
result := parent.place_id;
|
||||
waygeom := parent.geometry;
|
||||
ELSE
|
||||
distance := coalesce(distance, ST_Distance(waygeom, bbox));
|
||||
new_distance := ST_Distance(parent.geometry, bbox);
|
||||
IF new_distance < distance THEN
|
||||
distance := new_distance;
|
||||
result := parent.place_id;
|
||||
waygeom := parent.geometry;
|
||||
END IF;
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
{% endif %}
|
||||
|
||||
RETURN result;
|
||||
END;
|
||||
$$
|
||||
@@ -257,7 +293,11 @@ CREATE OR REPLACE FUNCTION find_linked_place(bnd placex)
|
||||
RETURNS placex
|
||||
AS $$
|
||||
DECLARE
|
||||
{% if db.middle_db_format == '1' %}
|
||||
relation_members TEXT[];
|
||||
{% else %}
|
||||
relation_members JSONB;
|
||||
{% endif %}
|
||||
rel_member RECORD;
|
||||
linked_placex placex%ROWTYPE;
|
||||
bnd_name TEXT;
|
||||
@@ -678,6 +718,12 @@ BEGIN
|
||||
NEW.country_code := NULL;
|
||||
END IF;
|
||||
|
||||
-- Simplify polygons with a very large memory footprint when they
|
||||
-- do not take part in address computation.
|
||||
IF NEW.rank_address = 0 THEN
|
||||
NEW.geometry := simplify_large_polygons(NEW.geometry);
|
||||
END IF;
|
||||
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %}
|
||||
@@ -685,10 +731,12 @@ BEGIN
|
||||
{% if not disable_diff_updates %}
|
||||
-- The following is not needed until doing diff updates, and slows the main index process down
|
||||
|
||||
IF NEW.rank_address > 0 THEN
|
||||
IF NEW.rank_address between 2 and 27 THEN
|
||||
IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN
|
||||
-- Performance: We just can't handle re-indexing for country level changes
|
||||
IF st_area(NEW.geometry) < 1 THEN
|
||||
IF (NEW.rank_address < 26 and st_area(NEW.geometry) < 1)
|
||||
OR (NEW.rank_address >= 26 and st_area(NEW.geometry) < 0.01)
|
||||
THEN
|
||||
-- mark items within the geometry for re-indexing
|
||||
-- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
|
||||
|
||||
@@ -703,9 +751,11 @@ BEGIN
|
||||
or name is not null
|
||||
or (NEW.rank_address >= 16 and address ? 'place'));
|
||||
END IF;
|
||||
ELSE
|
||||
ELSEIF ST_GeometryType(NEW.geometry) not in ('ST_LineString', 'ST_MultiLineString')
|
||||
OR ST_Length(NEW.geometry) < 0.5
|
||||
THEN
|
||||
-- mark nearby items for re-indexing, where 'nearby' depends on the features rank_search and is a complete guess :(
|
||||
diameter := update_place_diameter(NEW.rank_search);
|
||||
diameter := update_place_diameter(NEW.rank_address);
|
||||
IF diameter > 0 THEN
|
||||
-- RAISE WARNING 'placex point insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,diameter;
|
||||
IF NEW.rank_search >= 26 THEN
|
||||
@@ -749,7 +799,11 @@ CREATE OR REPLACE FUNCTION placex_update()
|
||||
DECLARE
|
||||
i INTEGER;
|
||||
location RECORD;
|
||||
{% if db.middle_db_format == '1' %}
|
||||
relation_members TEXT[];
|
||||
{% else %}
|
||||
relation_member JSONB;
|
||||
{% endif %}
|
||||
|
||||
geom GEOMETRY;
|
||||
parent_address_level SMALLINT;
|
||||
@@ -794,6 +848,9 @@ BEGIN
|
||||
result := deleteLocationArea(NEW.partition, NEW.place_id, NEW.rank_search);
|
||||
|
||||
NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
|
||||
IF NEW.extratags = ''::hstore THEN
|
||||
NEW.extratags := NULL;
|
||||
END IF;
|
||||
|
||||
-- NEW.linked_place_id contains the precomputed linkee. Save this and restore
|
||||
-- the previous link status.
|
||||
@@ -968,6 +1025,7 @@ BEGIN
|
||||
|
||||
-- waterway ways are linked when they are part of a relation and have the same class/type
|
||||
IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN
|
||||
{% if db.middle_db_format == '1' %}
|
||||
FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[]
|
||||
LOOP
|
||||
FOR i IN 1..array_upper(relation_members, 1) BY 2 LOOP
|
||||
@@ -986,6 +1044,29 @@ BEGIN
|
||||
END IF;
|
||||
END LOOP;
|
||||
END LOOP;
|
||||
{% else %}
|
||||
FOR relation_member IN
|
||||
SELECT value FROM planet_osm_rels r, LATERAL jsonb_array_elements(r.members)
|
||||
WHERE r.id = NEW.osm_id
|
||||
LOOP
|
||||
IF relation_member->>'role' IN ('', 'main_stream', 'side_stream')
|
||||
and relation_member->>'type' = 'W'
|
||||
THEN
|
||||
{% if debug %}RAISE WARNING 'waterway parent %, child %', NEW.osm_id, relation_member;{% endif %}
|
||||
FOR linked_node_id IN
|
||||
SELECT place_id FROM placex
|
||||
WHERE osm_type = 'W' and osm_id = (relation_member->>'ref')::bigint
|
||||
and class = NEW.class and type in ('river', 'stream', 'canal', 'drain', 'ditch')
|
||||
and (relation_member->>'role' != 'side_stream' or NEW.name->'name' = name->'name')
|
||||
LOOP
|
||||
UPDATE placex SET linked_place_id = NEW.place_id WHERE place_id = linked_node_id;
|
||||
{% if 'search_name' in db.tables %}
|
||||
DELETE FROM search_name WHERE place_id = linked_node_id;
|
||||
{% endif %}
|
||||
END LOOP;
|
||||
END IF;
|
||||
END LOOP;
|
||||
{% endif %}
|
||||
{% if debug %}RAISE WARNING 'Waterway processed';{% endif %}
|
||||
END IF;
|
||||
|
||||
@@ -1188,6 +1269,8 @@ BEGIN
|
||||
END IF;
|
||||
ELSEIF NEW.rank_address > 25 THEN
|
||||
max_rank := 25;
|
||||
ELSEIF NEW.class in ('place','boundary') and NEW.type in ('postcode','postal_code') THEN
|
||||
max_rank := NEW.rank_search;
|
||||
ELSE
|
||||
max_rank := NEW.rank_address;
|
||||
END IF;
|
||||
@@ -1241,7 +1324,9 @@ BEGIN
|
||||
OR ST_GeometryType(NEW.geometry) not in ('ST_LineString','ST_MultiLineString')
|
||||
OR ST_Length(NEW.geometry) < 0.02)
|
||||
THEN
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
|
||||
NEW.postcode := get_nearest_postcode(NEW.country_code,
|
||||
CASE WHEN NEW.rank_address > 25
|
||||
THEN NEW.centroid ELSE NEW.geometry END);
|
||||
END IF;
|
||||
|
||||
{% if debug %}RAISE WARNING 'place update % % finished.', NEW.osm_type, NEW.osm_id;{% endif %}
|
||||
|
||||
@@ -287,21 +287,19 @@ LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
|
||||
term_vectors TEXT[],
|
||||
weight_vectors FLOAT[],
|
||||
rankings TEXT,
|
||||
def_weight FLOAT)
|
||||
RETURNS FLOAT
|
||||
AS $$
|
||||
DECLARE
|
||||
pos INT := 1;
|
||||
terms TEXT;
|
||||
rank JSON;
|
||||
BEGIN
|
||||
FOREACH terms IN ARRAY term_vectors
|
||||
FOR rank IN
|
||||
SELECT * FROM json_array_elements(rankings::JSON)
|
||||
LOOP
|
||||
IF search_vector @> terms::INTEGER[] THEN
|
||||
RETURN weight_vectors[pos];
|
||||
IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN
|
||||
RETURN (rank->>0)::float;
|
||||
END IF;
|
||||
pos := pos + 1;
|
||||
END LOOP;
|
||||
RETURN def_weight;
|
||||
END;
|
||||
|
||||
@@ -73,6 +73,26 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION get_rel_node_members(members JSONB, memberLabels TEXT[])
|
||||
RETURNS SETOF BIGINT
|
||||
AS $$
|
||||
DECLARE
|
||||
member JSONB;
|
||||
BEGIN
|
||||
FOR member IN SELECT * FROM jsonb_array_elements(members)
|
||||
LOOP
|
||||
IF member->>'type' = 'N' and member->>'role' = ANY(memberLabels) THEN
|
||||
RETURN NEXT (member->>'ref')::bigint;
|
||||
END IF;
|
||||
END LOOP;
|
||||
|
||||
RETURN;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
-- Copy 'name' to or from the default language.
|
||||
--
|
||||
-- \param country_code Country code of the object being named.
|
||||
@@ -416,6 +436,20 @@ END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION simplify_large_polygons(geometry GEOMETRY)
|
||||
RETURNS GEOMETRY
|
||||
AS $$
|
||||
BEGIN
|
||||
IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
|
||||
and ST_MemSize(geometry) > 3000000
|
||||
THEN
|
||||
geometry := ST_SimplifyPreserveTopology(geometry, 0.0001);
|
||||
END IF;
|
||||
RETURN geometry;
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql IMMUTABLE;
|
||||
|
||||
|
||||
CREATE OR REPLACE FUNCTION place_force_delete(placeid BIGINT)
|
||||
RETURNS BOOLEAN
|
||||
@@ -487,3 +521,56 @@ BEGIN
|
||||
END;
|
||||
$$
|
||||
LANGUAGE plpgsql;
|
||||
|
||||
CREATE OR REPLACE FUNCTION flush_deleted_places()
|
||||
RETURNS INTEGER
|
||||
AS $$
|
||||
BEGIN
|
||||
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
|
||||
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
|
||||
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
|
||||
|
||||
-- delete from place table
|
||||
ALTER TABLE place DISABLE TRIGGER place_before_delete;
|
||||
DELETE FROM place USING place_to_be_deleted
|
||||
WHERE place.osm_type = place_to_be_deleted.osm_type
|
||||
and place.osm_id = place_to_be_deleted.osm_id
|
||||
and place.class = place_to_be_deleted.class
|
||||
and place.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
ALTER TABLE place ENABLE TRIGGER place_before_delete;
|
||||
|
||||
-- Mark for delete in the placex table
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
|
||||
and placex.osm_id = place_to_be_deleted.osm_id
|
||||
and placex.class = place_to_be_deleted.class
|
||||
and placex.type = place_to_be_deleted.type
|
||||
and not deferred;
|
||||
|
||||
-- Mark for delete in interpolations
|
||||
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
|
||||
WHERE place_to_be_deleted.osm_type = 'W'
|
||||
and place_to_be_deleted.class = 'place'
|
||||
and place_to_be_deleted.type = 'houses'
|
||||
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
|
||||
and not deferred;
|
||||
|
||||
-- Clear todo list.
|
||||
TRUNCATE TABLE place_to_be_deleted;
|
||||
|
||||
RETURN NULL;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql;
|
||||
|
||||
@@ -23,6 +23,10 @@ CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
|
||||
USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
-- Index is needed during import but can be dropped as soon as a full
|
||||
-- geometry index is in place. The partial index is almost as big as the full
|
||||
-- index.
|
||||
DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
|
||||
---
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
|
||||
ON placex USING gist (geometry) {{db.tablespace.search_index}}
|
||||
|
||||
@@ -298,7 +298,15 @@ CREATE TABLE IF NOT EXISTS wikipedia_redirect (
|
||||
|
||||
-- osm2pgsql does not create indexes on the middle tables for Nominatim
|
||||
-- Add one for lookup of associated street relations.
|
||||
CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts) WHERE tags @> ARRAY['associatedStreet'];
|
||||
{% if db.middle_db_format == '1' %}
|
||||
CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts)
|
||||
{{db.tablespace.address_index}}
|
||||
WHERE tags @> ARRAY['associatedStreet'];
|
||||
{% else %}
|
||||
CREATE INDEX planet_osm_rels_relation_members_idx ON planet_osm_rels USING gin(planet_osm_member_ids(members, 'R'::character(1)))
|
||||
WITH (fastupdate=off)
|
||||
{{db.tablespace.address_index}};
|
||||
{% endif %}
|
||||
|
||||
-- Needed for lookups if a node is part of an interpolation.
|
||||
CREATE INDEX IF NOT EXISTS idx_place_interpolations
|
||||
|
||||
@@ -1,40 +0,0 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2022 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS word;
|
||||
CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text NOT NULL,
|
||||
type text NOT NULL,
|
||||
word text,
|
||||
info jsonb
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE INDEX idx_word_word_token ON word
|
||||
USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||
-- Used when updating country names from the boundary relation.
|
||||
CREATE INDEX idx_word_country_names ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'C';
|
||||
-- Used when inserting new postcodes on updates.
|
||||
CREATE INDEX idx_word_postcodes ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'P';
|
||||
-- Used when inserting full words.
|
||||
CREATE INDEX idx_word_full_word ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'W';
|
||||
-- Used when inserting analyzed housenumbers (exclude old-style entries).
|
||||
CREATE INDEX idx_word_housenumbers ON word
|
||||
USING btree(word) {{db.tablespace.address_index}}
|
||||
WHERE type = 'H' and word is not null;
|
||||
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_word;
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
|
||||
@@ -347,7 +347,7 @@ BEGIN
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- consider parts before an opening braket a full word as well
|
||||
-- consider parts before an opening bracket a full word as well
|
||||
words := regexp_split_to_array(value, E'[(]');
|
||||
IF array_upper(words, 1) > 1 THEN
|
||||
s := make_standard_name(words[1]);
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# just use the pgxs makefile
|
||||
|
||||
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "15" "14" "13" "12" "11" "10" "9.6")
|
||||
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "16" "15" "14" "13" "12" "11" "10" "9.6")
|
||||
list(APPEND PG_CONFIG_HINTS
|
||||
"/usr/pgsql-${suffix}/bin")
|
||||
endforeach()
|
||||
|
||||
@@ -11,10 +11,12 @@
|
||||
#include "mb/pg_wchar.h"
|
||||
#include <utfasciitable.h>
|
||||
|
||||
#ifdef PG_MODULE_MAGIC
|
||||
PG_MODULE_MAGIC;
|
||||
#if PG_MAJORVERSION_NUM > 15
|
||||
#include "varatt.h"
|
||||
#endif
|
||||
|
||||
PG_MODULE_MAGIC;
|
||||
|
||||
Datum transliteration( PG_FUNCTION_ARGS );
|
||||
Datum gettokenstring( PG_FUNCTION_ARGS );
|
||||
void str_replace(char* buffer, int* len, int* changes, char* from, int fromlen, char* to, int tolen, int);
|
||||
|
||||
@@ -19,6 +19,7 @@ import sqlalchemy.ext.asyncio as sa_asyncio
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.db.sqlalchemy_schema import SearchTables
|
||||
from nominatim.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
|
||||
import nominatim.db.sqlite_functions
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.status import get_status, StatusResult
|
||||
@@ -81,41 +82,78 @@ class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
|
||||
if self._engine:
|
||||
return
|
||||
|
||||
dsn = self.config.get_database_params()
|
||||
pool_size = self.config.get_int('API_POOL_SIZE')
|
||||
extra_args: Dict[str, Any] = {'future': True,
|
||||
'echo': self.config.get_bool('DEBUG_SQL')}
|
||||
|
||||
query = {k: v for k, v in dsn.items()
|
||||
if k not in ('user', 'password', 'dbname', 'host', 'port')}
|
||||
if self.config.get_int('API_POOL_SIZE') == 0:
|
||||
extra_args['poolclass'] = sa.pool.NullPool
|
||||
else:
|
||||
extra_args['poolclass'] = sa.pool.AsyncAdaptedQueuePool
|
||||
extra_args['max_overflow'] = 0
|
||||
extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
|
||||
|
||||
dburl = sa.engine.URL.create(
|
||||
f'postgresql+{PGCORE_LIB}',
|
||||
database=dsn.get('dbname'),
|
||||
username=dsn.get('user'), password=dsn.get('password'),
|
||||
host=dsn.get('host'), port=int(dsn['port']) if 'port' in dsn else None,
|
||||
query=query)
|
||||
engine = sa_asyncio.create_async_engine(dburl, future=True,
|
||||
max_overflow=0, pool_size=pool_size,
|
||||
echo=self.config.get_bool('DEBUG_SQL'))
|
||||
|
||||
try:
|
||||
async with engine.begin() as conn:
|
||||
result = await conn.scalar(sa.text('SHOW server_version_num'))
|
||||
server_version = int(result)
|
||||
except (PGCORE_ERROR, sa.exc.OperationalError):
|
||||
is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
|
||||
|
||||
if is_sqlite:
|
||||
params = dict((p.split('=', 1)
|
||||
for p in self.config.DATABASE_DSN[7:].split(';')))
|
||||
dburl = sa.engine.URL.create('sqlite+aiosqlite',
|
||||
database=params.get('dbname'))
|
||||
|
||||
if not ('NOMINATIM_DATABASE_RW' in self.config.environ
|
||||
and self.config.get_bool('DATABASE_RW')) \
|
||||
and not Path(params.get('dbname', '')).is_file():
|
||||
raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
|
||||
else:
|
||||
dsn = self.config.get_database_params()
|
||||
query = {k: v for k, v in dsn.items()
|
||||
if k not in ('user', 'password', 'dbname', 'host', 'port')}
|
||||
|
||||
dburl = sa.engine.URL.create(
|
||||
f'postgresql+{PGCORE_LIB}',
|
||||
database=dsn.get('dbname'),
|
||||
username=dsn.get('user'),
|
||||
password=dsn.get('password'),
|
||||
host=dsn.get('host'),
|
||||
port=int(dsn['port']) if 'port' in dsn else None,
|
||||
query=query)
|
||||
|
||||
engine = sa_asyncio.create_async_engine(dburl, **extra_args)
|
||||
|
||||
if is_sqlite:
|
||||
server_version = 0
|
||||
|
||||
if server_version >= 110000:
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_connect(dbapi_con: Any, _: Any) -> None:
|
||||
def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
|
||||
dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
|
||||
nominatim.db.sqlite_functions.install_custom_functions(dbapi_con)
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SET jit_above_cost TO '-1'")
|
||||
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
|
||||
# Make sure that all connections get the new settings
|
||||
await self.close()
|
||||
cursor.execute("SELECT load_extension('mod_spatialite')")
|
||||
cursor.execute('SELECT SetDecimalPrecision(7)')
|
||||
dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
|
||||
else:
|
||||
try:
|
||||
async with engine.begin() as conn:
|
||||
result = await conn.scalar(sa.text('SHOW server_version_num'))
|
||||
server_version = int(result)
|
||||
if server_version >= 110000:
|
||||
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
|
||||
await conn.execute(sa.text(
|
||||
"SET max_parallel_workers_per_gather TO '0'"))
|
||||
except (PGCORE_ERROR, sa.exc.OperationalError):
|
||||
server_version = 0
|
||||
|
||||
if server_version >= 110000:
|
||||
@sa.event.listens_for(engine.sync_engine, "connect")
|
||||
def _on_connect(dbapi_con: Any, _: Any) -> None:
|
||||
cursor = dbapi_con.cursor()
|
||||
cursor.execute("SET jit_above_cost TO '-1'")
|
||||
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
|
||||
|
||||
self._property_cache['DB:server_version'] = server_version
|
||||
|
||||
self._tables = SearchTables(sa.MetaData(), engine.name) # pylint: disable=no-member
|
||||
self._tables = SearchTables(sa.MetaData()) # pylint: disable=no-member
|
||||
self._engine = engine
|
||||
|
||||
|
||||
@@ -336,7 +374,7 @@ class NominatimAPI:
|
||||
""" Close all active connections to the database.
|
||||
|
||||
This function also closes the asynchronous worker loop making
|
||||
the NominatimAPI object unusuable.
|
||||
the NominatimAPI object unusable.
|
||||
"""
|
||||
self._loop.run_until_complete(self._async_api.close())
|
||||
self._loop.close()
|
||||
@@ -409,7 +447,7 @@ class NominatimAPI:
|
||||
place. Only meaning full for POI-like objects (places with a
|
||||
rank_address of 30).
|
||||
linked_place_id (Optional[int]): Internal ID of the place this object
|
||||
linkes to. When this ID is set then there is no guarantee that
|
||||
links to. When this ID is set then there is no guarantee that
|
||||
the rest of the result information is complete.
|
||||
admin_level (int): Value of the `admin_level` OSM tag. Only meaningful
|
||||
for administrative boundary objects.
|
||||
|
||||
@@ -13,6 +13,7 @@ import datetime as dt
|
||||
import textwrap
|
||||
import io
|
||||
import re
|
||||
import html
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.asyncio import AsyncConnection
|
||||
@@ -83,33 +84,49 @@ class BaseLogger:
|
||||
def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
extra_params: Union[Mapping[str, Any],
|
||||
Sequence[Mapping[str, Any]], None]) -> str:
|
||||
""" Return the comiled version of the statement.
|
||||
""" Return the compiled version of the statement.
|
||||
"""
|
||||
compiled = cast('sa.ClauseElement', statement).compile(conn.sync_engine)
|
||||
|
||||
params = dict(compiled.params)
|
||||
if isinstance(extra_params, Mapping):
|
||||
for k, v in extra_params.items():
|
||||
params[k] = str(v)
|
||||
if hasattr(v, 'to_wkt'):
|
||||
params[k] = v.to_wkt()
|
||||
elif isinstance(v, (int, float)):
|
||||
params[k] = v
|
||||
else:
|
||||
params[k] = str(v)
|
||||
elif isinstance(extra_params, Sequence) and extra_params:
|
||||
for k in extra_params[0]:
|
||||
params[k] = f':{k}'
|
||||
|
||||
sqlstr = str(compiled)
|
||||
|
||||
if sa.__version__.startswith('1'):
|
||||
try:
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
|
||||
return sqlstr % tuple((repr(params.get(name, None))
|
||||
for name in compiled.positiontup)) # type: ignore
|
||||
except TypeError:
|
||||
return sqlstr
|
||||
if conn.dialect.name == 'postgresql':
|
||||
if sa.__version__.startswith('1'):
|
||||
try:
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
|
||||
return sqlstr % tuple((repr(params.get(name, None))
|
||||
for name in compiled.positiontup)) # type: ignore
|
||||
except TypeError:
|
||||
return sqlstr
|
||||
|
||||
# Fixes an odd issue with Python 3.7 where percentages are not
|
||||
# quoted correctly.
|
||||
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
|
||||
return sqlstr % params
|
||||
# Fixes an odd issue with Python 3.7 where percentages are not
|
||||
# quoted correctly.
|
||||
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
|
||||
return sqlstr % params
|
||||
|
||||
assert conn.dialect.name == 'sqlite'
|
||||
|
||||
# params in positional order
|
||||
pparams = (repr(params.get(name, None)) for name in compiled.positiontup) # type: ignore
|
||||
|
||||
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', '?', sqlstr)
|
||||
sqlstr = re.sub(r"\?", lambda m: next(pparams), sqlstr)
|
||||
|
||||
return sqlstr
|
||||
|
||||
class HTMLLogger(BaseLogger):
|
||||
""" Logger that formats messages in HTML.
|
||||
@@ -211,7 +228,7 @@ class HTMLLogger(BaseLogger):
|
||||
HtmlFormatter(nowrap=True, lineseparator='<br />'))
|
||||
self._write(f'<div class="highlight"><code class="lang-sql">{sqlstr}</code></div>')
|
||||
else:
|
||||
self._write(f'<code class="lang-sql">{sqlstr}</code>')
|
||||
self._write(f'<code class="lang-sql">{html.escape(sqlstr)}</code>')
|
||||
|
||||
|
||||
def _python_var(self, var: Any) -> str:
|
||||
@@ -219,7 +236,7 @@ class HTMLLogger(BaseLogger):
|
||||
fmt = highlight(str(var), PythonLexer(), HtmlFormatter(nowrap=True))
|
||||
return f'<div class="highlight"><code class="lang-python">{fmt}</code></div>'
|
||||
|
||||
return f'<code class="lang-python">{str(var)}</code>'
|
||||
return f'<code class="lang-python">{html.escape(str(var))}</code>'
|
||||
|
||||
|
||||
def _write(self, text: str) -> None:
|
||||
@@ -235,6 +252,10 @@ class TextLogger(BaseLogger):
|
||||
self.buffer = io.StringIO()
|
||||
|
||||
|
||||
def _timestamp(self) -> None:
|
||||
self._write(f'[{dt.datetime.now()}]\n')
|
||||
|
||||
|
||||
def get_buffer(self) -> str:
|
||||
return self.buffer.getvalue()
|
||||
|
||||
@@ -247,6 +268,7 @@ class TextLogger(BaseLogger):
|
||||
|
||||
|
||||
def section(self, heading: str) -> None:
|
||||
self._timestamp()
|
||||
self._write(f"\n# {heading}\n\n")
|
||||
|
||||
|
||||
@@ -283,6 +305,7 @@ class TextLogger(BaseLogger):
|
||||
|
||||
|
||||
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
|
||||
self._timestamp()
|
||||
self._write(f'{heading}:\n')
|
||||
total = 0
|
||||
for rank, res in results:
|
||||
@@ -298,6 +321,7 @@ class TextLogger(BaseLogger):
|
||||
|
||||
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
|
||||
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
|
||||
self._timestamp()
|
||||
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
|
||||
self._write(f"| {sqlstr}\n\n")
|
||||
|
||||
|
||||
@@ -77,8 +77,8 @@ async def find_in_osmline(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
sql = sql.where(t.c.osm_id == place.osm_id).limit(1)
|
||||
if place.osm_class and place.osm_class.isdigit():
|
||||
sql = sql.order_by(sa.func.greatest(0,
|
||||
sa.func.least(int(place.osm_class) - t.c.endnumber),
|
||||
t.c.startnumber - int(place.osm_class)))
|
||||
int(place.osm_class) - t.c.endnumber,
|
||||
t.c.startnumber - int(place.osm_class)))
|
||||
else:
|
||||
return None
|
||||
|
||||
@@ -163,11 +163,10 @@ async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
|
||||
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
|
||||
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
|
||||
return sql.add_columns(sa.literal_column(f"""
|
||||
ST_AsGeoJSON(CASE WHEN ST_NPoints({column.name}) > 5000
|
||||
THEN ST_SimplifyPreserveTopology({column.name}, 0.0001)
|
||||
ELSE {column.name} END)
|
||||
""").label('geometry_geojson'))
|
||||
return sql.add_columns(sa.func.ST_AsGeoJSON(
|
||||
sa.case((sa.func.ST_NPoints(column) > 5000,
|
||||
sa.func.ST_SimplifyPreserveTopology(column, 0.0001)),
|
||||
else_=column), 7).label('geometry_geojson'))
|
||||
else:
|
||||
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
|
||||
return sql.add_columns(sa.func.ST_GeometryType(column).label('geometry_type'))
|
||||
@@ -183,9 +182,9 @@ async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
|
||||
# add missing details
|
||||
assert result is not None
|
||||
result.parent_place_id = row.parent_place_id
|
||||
result.linked_place_id = getattr(row, 'linked_place_id', None)
|
||||
result.admin_level = getattr(row, 'admin_level', 15)
|
||||
if 'type' in result.geometry:
|
||||
result.geometry['type'] = GEOMETRY_TYPE_MAP.get(result.geometry['type'],
|
||||
result.geometry['type'])
|
||||
indexed_date = getattr(row, 'indexed_date', None)
|
||||
if indexed_date is not None:
|
||||
result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
|
||||
@@ -211,13 +210,13 @@ async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
|
||||
|
||||
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if details.geometry_output & ntyp.GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
@@ -239,3 +238,14 @@ async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
|
||||
await nres.add_result_details(conn, [result], details)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
GEOMETRY_TYPE_MAP = {
|
||||
'POINT': 'ST_Point',
|
||||
'MULTIPOINT': 'ST_MultiPoint',
|
||||
'LINESTRING': 'ST_LineString',
|
||||
'MULTILINESTRING': 'ST_MultiLineString',
|
||||
'POLYGON': 'ST_Polygon',
|
||||
'MULTIPOLYGON': 'ST_MultiPolygon',
|
||||
'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Helper classes and functions for formating results into API responses.
|
||||
Helper classes and functions for formatting results into API responses.
|
||||
"""
|
||||
from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping
|
||||
from collections import defaultdict
|
||||
|
||||
@@ -11,14 +11,15 @@ Data classes are part of the public API while the functions are for
|
||||
internal use only. That's why they are implemented as free-standing functions
|
||||
instead of member functions.
|
||||
"""
|
||||
from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any, Union
|
||||
from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable
|
||||
import enum
|
||||
import dataclasses
|
||||
import datetime as dt
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaSelect, SaRow, SaColumn
|
||||
from nominatim.typing import SaSelect, SaRow
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
from nominatim.api.types import Point, Bbox, LookupDetails
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.logging import log
|
||||
@@ -81,12 +82,6 @@ class AddressLine:
|
||||
and its function as an address object. Most fields are optional.
|
||||
Their presence depends on the kind and function of the address part.
|
||||
"""
|
||||
place_id: Optional[int]
|
||||
""" Internal ID of the place.
|
||||
"""
|
||||
osm_object: Optional[Tuple[str, int]]
|
||||
""" OSM type and ID of the place, if such an object exists.
|
||||
"""
|
||||
category: Tuple[str, str]
|
||||
""" Main category of the place, described by a key-value pair.
|
||||
"""
|
||||
@@ -94,16 +89,6 @@ class AddressLine:
|
||||
""" All available names for the place including references, alternative
|
||||
names and translations.
|
||||
"""
|
||||
extratags: Optional[Dict[str, str]]
|
||||
""" Any extra information available about the place. This is a dictionary
|
||||
that usually contains OSM tag key-value pairs.
|
||||
"""
|
||||
|
||||
admin_level: Optional[int]
|
||||
""" The administrative level of a boundary as tagged in the input data.
|
||||
This field is only meaningful for places of the category
|
||||
(boundary, administrative).
|
||||
"""
|
||||
fromarea: bool
|
||||
""" If true, then the exact area of the place is known. Without area
|
||||
information, Nominatim has to make an educated guess if an address
|
||||
@@ -123,6 +108,22 @@ class AddressLine:
|
||||
distance: float
|
||||
""" Distance in degrees between the result place and this address part.
|
||||
"""
|
||||
place_id: Optional[int] = None
|
||||
""" Internal ID of the place.
|
||||
"""
|
||||
osm_object: Optional[Tuple[str, int]] = None
|
||||
""" OSM type and ID of the place, if such an object exists.
|
||||
"""
|
||||
extratags: Optional[Dict[str, str]] = None
|
||||
""" Any extra information available about the place. This is a dictionary
|
||||
that usually contains OSM tag key-value pairs.
|
||||
"""
|
||||
|
||||
admin_level: Optional[int] = None
|
||||
""" The administrative level of a boundary as tagged in the input data.
|
||||
This field is only meaningful for places of the category
|
||||
(boundary, administrative).
|
||||
"""
|
||||
|
||||
local_name: Optional[str] = None
|
||||
""" Place holder for localization of this address part. See
|
||||
@@ -184,6 +185,9 @@ class BaseResult:
|
||||
|
||||
place_id : Optional[int] = None
|
||||
osm_object: Optional[Tuple[str, int]] = None
|
||||
parent_place_id: Optional[int] = None
|
||||
linked_place_id: Optional[int] = None
|
||||
admin_level: int = 15
|
||||
|
||||
locale_name: Optional[str] = None
|
||||
display_name: Optional[str] = None
|
||||
@@ -229,7 +233,7 @@ class BaseResult:
|
||||
of the value or an artificial value computed from the place's
|
||||
search rank.
|
||||
"""
|
||||
return self.importance or (0.7500001 - (self.rank_search/40.0))
|
||||
return self.importance or (0.40001 - (self.rank_search/75.0))
|
||||
|
||||
|
||||
def localize(self, locales: Locales) -> None:
|
||||
@@ -251,9 +255,6 @@ class DetailedResult(BaseResult):
|
||||
""" A search result with more internal information from the database
|
||||
added.
|
||||
"""
|
||||
parent_place_id: Optional[int] = None
|
||||
linked_place_id: Optional[int] = None
|
||||
admin_level: int = 15
|
||||
indexed_date: Optional[dt.datetime] = None
|
||||
|
||||
|
||||
@@ -292,12 +293,6 @@ class SearchResults(List[SearchResult]):
|
||||
May be empty when no result was found.
|
||||
"""
|
||||
|
||||
def localize(self, locales: Locales) -> None:
|
||||
""" Apply the given locales to all results.
|
||||
"""
|
||||
for result in self:
|
||||
result.localize(locales)
|
||||
|
||||
|
||||
def _filter_geometries(row: SaRow) -> Dict[str, str]:
|
||||
return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
|
||||
@@ -317,6 +312,9 @@ def create_from_placex_row(row: Optional[SaRow],
|
||||
place_id=row.place_id,
|
||||
osm_object=(row.osm_type, row.osm_id),
|
||||
category=(row.class_, row.type),
|
||||
parent_place_id = row.parent_place_id,
|
||||
linked_place_id = getattr(row, 'linked_place_id', None),
|
||||
admin_level = getattr(row, 'admin_level', 15),
|
||||
names=_mingle_name_tags(row.name),
|
||||
address=row.address,
|
||||
extratags=row.extratags,
|
||||
@@ -347,6 +345,7 @@ def create_from_osmline_row(row: Optional[SaRow],
|
||||
|
||||
res = class_type(source_table=SourceTable.OSMLINE,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
osm_object=('W', row.osm_id),
|
||||
category=('place', 'houses' if hnr is None else 'house'),
|
||||
address=row.address,
|
||||
@@ -383,6 +382,7 @@ def create_from_tiger_row(row: Optional[SaRow],
|
||||
|
||||
res = class_type(source_table=SourceTable.TIGER,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
|
||||
category=('place', 'houses' if hnr is None else 'house'),
|
||||
postcode=row.postcode,
|
||||
@@ -411,6 +411,7 @@ def create_from_postcode_row(row: Optional[SaRow],
|
||||
|
||||
return class_type(source_table=SourceTable.POSTCODE,
|
||||
place_id=row.place_id,
|
||||
parent_place_id = row.parent_place_id,
|
||||
category=('place', 'postcode'),
|
||||
names={'ref': row.postcode},
|
||||
rank_search=row.rank_search,
|
||||
@@ -434,7 +435,8 @@ def create_from_country_row(row: Optional[SaRow],
|
||||
centroid=Point.from_wkb(row.centroid),
|
||||
names=row.name,
|
||||
rank_address=4, rank_search=4,
|
||||
country_code=row.country_code)
|
||||
country_code=row.country_code,
|
||||
geometry=_filter_geometries(row))
|
||||
|
||||
|
||||
async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
|
||||
@@ -459,19 +461,24 @@ async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
|
||||
log().comment('Query keywords')
|
||||
for result in results:
|
||||
await complete_keywords(conn, result)
|
||||
for result in results:
|
||||
result.localize(details.locales)
|
||||
|
||||
|
||||
def _result_row_to_address_row(row: SaRow) -> AddressLine:
|
||||
""" Create a new AddressLine from the results of a datbase query.
|
||||
def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
|
||||
""" Create a new AddressLine from the results of a database query.
|
||||
"""
|
||||
extratags: Dict[str, str] = getattr(row, 'extratags', {})
|
||||
if hasattr(row, 'place_type') and row.place_type:
|
||||
extratags['place'] = row.place_type
|
||||
extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
|
||||
if 'linked_place' in extratags:
|
||||
extratags['place'] = extratags['linked_place']
|
||||
|
||||
names = _mingle_name_tags(row.name) or {}
|
||||
if getattr(row, 'housenumber', None) is not None:
|
||||
names['housenumber'] = row.housenumber
|
||||
|
||||
if isaddress is None:
|
||||
isaddress = getattr(row, 'isaddress', True)
|
||||
|
||||
return AddressLine(place_id=row.place_id,
|
||||
osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
|
||||
category=(getattr(row, 'class'), row.type),
|
||||
@@ -479,109 +486,216 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
|
||||
extratags=extratags,
|
||||
admin_level=row.admin_level,
|
||||
fromarea=row.fromarea,
|
||||
isaddress=getattr(row, 'isaddress', True),
|
||||
isaddress=isaddress,
|
||||
rank_address=row.rank_address,
|
||||
distance=row.distance)
|
||||
|
||||
|
||||
def _get_housenumber_details(results: List[BaseResultT]) -> Tuple[List[int], List[int]]:
|
||||
places = []
|
||||
hnrs = []
|
||||
for result in results:
|
||||
if result.place_id:
|
||||
housenumber = -1
|
||||
if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
|
||||
if result.housenumber is not None:
|
||||
housenumber = int(result.housenumber)
|
||||
elif result.extratags is not None and 'startnumber' in result.extratags:
|
||||
# details requests do not come with a specific house number
|
||||
housenumber = int(result.extratags['startnumber'])
|
||||
places.append(result.place_id)
|
||||
hnrs.append(housenumber)
|
||||
def _get_address_lookup_id(result: BaseResultT) -> int:
|
||||
assert result.place_id
|
||||
if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
|
||||
return result.parent_place_id or result.place_id
|
||||
|
||||
return places, hnrs
|
||||
return result.linked_place_id or result.place_id
|
||||
|
||||
|
||||
async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
|
||||
assert result.address_rows is not None
|
||||
if result.category[0] not in ('boundary', 'place')\
|
||||
or result.category[1] not in ('postal_code', 'postcode'):
|
||||
postcode = result.postcode
|
||||
if not postcode and result.address:
|
||||
postcode = result.address.get('postcode')
|
||||
if postcode and ',' not in postcode and ';' not in postcode:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'postcode'),
|
||||
names={'ref': postcode},
|
||||
fromarea=False, isaddress=True, rank_address=5,
|
||||
distance=0.0))
|
||||
if result.country_code:
|
||||
async def _get_country_names() -> Optional[Dict[str, str]]:
|
||||
t = conn.t.country_name
|
||||
sql = sa.select(t.c.name, t.c.derived_name)\
|
||||
.where(t.c.country_code == result.country_code)
|
||||
for cres in await conn.execute(sql):
|
||||
names = cast(Dict[str, str], cres[0])
|
||||
if cres[1]:
|
||||
names.update(cast(Dict[str, str], cres[1]))
|
||||
return names
|
||||
return None
|
||||
|
||||
country_names = await conn.get_cached_value('COUNTRY_NAME',
|
||||
result.country_code,
|
||||
_get_country_names)
|
||||
if country_names:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'country'),
|
||||
names=country_names,
|
||||
fromarea=False, isaddress=True, rank_address=4,
|
||||
distance=0.0))
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'country_code'),
|
||||
names={'ref': result.country_code}, extratags = {},
|
||||
fromarea=True, isaddress=False, rank_address=4,
|
||||
distance=0.0))
|
||||
|
||||
|
||||
def _setup_address_details(result: BaseResultT) -> None:
|
||||
""" Retrieve information about places that make up the address of the result.
|
||||
"""
|
||||
result.address_rows = AddressLines()
|
||||
if result.names:
|
||||
result.address_rows.append(AddressLine(
|
||||
place_id=result.place_id,
|
||||
osm_object=result.osm_object,
|
||||
category=result.category,
|
||||
names=result.names,
|
||||
extratags=result.extratags or {},
|
||||
admin_level=result.admin_level,
|
||||
fromarea=True, isaddress=True,
|
||||
rank_address=result.rank_address, distance=0.0))
|
||||
if result.source_table == SourceTable.PLACEX and result.address:
|
||||
housenumber = result.address.get('housenumber')\
|
||||
or result.address.get('streetnumber')\
|
||||
or result.address.get('conscriptionnumber')
|
||||
elif result.housenumber:
|
||||
housenumber = result.housenumber
|
||||
else:
|
||||
housenumber = None
|
||||
if housenumber:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'house_number'),
|
||||
names={'ref': housenumber},
|
||||
fromarea=True, isaddress=True, rank_address=28, distance=0))
|
||||
if result.address and '_unlisted_place' in result.address:
|
||||
result.address_rows.append(AddressLine(
|
||||
category=('place', 'locality'),
|
||||
names={'name': result.address['_unlisted_place']},
|
||||
fromarea=False, isaddress=True, rank_address=25, distance=0))
|
||||
|
||||
|
||||
async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
|
||||
""" Retrieve information about places that make up the address of the result.
|
||||
"""
|
||||
places, hnrs = _get_housenumber_details(results)
|
||||
for result in results:
|
||||
_setup_address_details(result)
|
||||
|
||||
if not places:
|
||||
### Lookup entries from place_address line
|
||||
|
||||
lookup_ids = [{'pid': r.place_id,
|
||||
'lid': _get_address_lookup_id(r),
|
||||
'names': list(r.address.values()) if r.address else [],
|
||||
'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
|
||||
for r in results if r.place_id]
|
||||
|
||||
if not lookup_ids:
|
||||
return
|
||||
|
||||
def _get_addressdata(place_id: Union[int, SaColumn], hnr: Union[int, SaColumn]) -> Any:
|
||||
return sa.func.get_addressdata(place_id, hnr)\
|
||||
.table_valued( # type: ignore[no-untyped-call]
|
||||
sa.column('place_id', type_=sa.Integer),
|
||||
'osm_type',
|
||||
sa.column('osm_id', type_=sa.BigInteger),
|
||||
sa.column('name', type_=conn.t.types.Composite),
|
||||
'class', 'type', 'place_type',
|
||||
sa.column('admin_level', type_=sa.Integer),
|
||||
sa.column('fromarea', type_=sa.Boolean),
|
||||
sa.column('isaddress', type_=sa.Boolean),
|
||||
sa.column('rank_address', type_=sa.SmallInteger),
|
||||
sa.column('distance', type_=sa.Float),
|
||||
joins_implicitly=True)
|
||||
ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
|
||||
t = conn.t.placex
|
||||
taddr = conn.t.addressline
|
||||
|
||||
if len(places) == 1:
|
||||
# Optimized case for exactly one result (reverse)
|
||||
sql = sa.select(_get_addressdata(places[0], hnrs[0]))\
|
||||
.order_by(sa.column('rank_address').desc(),
|
||||
sa.column('isaddress').desc())
|
||||
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
|
||||
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.extratags,
|
||||
t.c.admin_level, taddr.c.fromarea,
|
||||
sa.case((t.c.rank_address == 11, 5),
|
||||
else_=t.c.rank_address).label('rank_address'),
|
||||
taddr.c.distance, t.c.country_code, t.c.postcode)\
|
||||
.join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
|
||||
taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
|
||||
.join(t, taddr.c.address_place_id == t.c.place_id)\
|
||||
.order_by('src_place_id')\
|
||||
.order_by(sa.column('rank_address').desc())\
|
||||
.order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
|
||||
.order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
|
||||
(taddr.c.isaddress, 0),
|
||||
(sa.and_(taddr.c.fromarea,
|
||||
t.c.geometry.ST_Contains(
|
||||
sa.func.ST_GeomFromEWKT(
|
||||
ltab.c.value['c'].as_string()))), 1),
|
||||
else_=-1).desc())\
|
||||
.order_by(taddr.c.fromarea.desc())\
|
||||
.order_by(taddr.c.distance.desc())\
|
||||
.order_by(t.c.rank_search.desc())
|
||||
|
||||
alines = AddressLines()
|
||||
for row in await conn.execute(sql):
|
||||
alines.append(_result_row_to_address_row(row))
|
||||
|
||||
for result in results:
|
||||
if result.place_id == places[0]:
|
||||
result.address_rows = alines
|
||||
return
|
||||
|
||||
|
||||
darray = sa.func.unnest(conn.t.types.to_array(places), conn.t.types.to_array(hnrs))\
|
||||
.table_valued( # type: ignore[no-untyped-call]
|
||||
sa.column('place_id', type_= sa.Integer),
|
||||
sa.column('housenumber', type_= sa.Integer)
|
||||
).render_derived()
|
||||
|
||||
sfn = _get_addressdata(darray.c.place_id, darray.c.housenumber)
|
||||
|
||||
sql = sa.select(darray.c.place_id.label('result_place_id'), sfn)\
|
||||
.order_by(darray.c.place_id,
|
||||
sa.column('rank_address').desc(),
|
||||
sa.column('isaddress').desc())
|
||||
|
||||
current_result = None
|
||||
current_rank_address = -1
|
||||
for row in await conn.execute(sql):
|
||||
if current_result is None or row.result_place_id != current_result.place_id:
|
||||
for result in results:
|
||||
if result.place_id == row.result_place_id:
|
||||
current_result = result
|
||||
break
|
||||
if current_result is None or row.src_place_id != current_result.place_id:
|
||||
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
|
||||
assert current_result is not None
|
||||
current_rank_address = -1
|
||||
|
||||
location_isaddress = row.rank_address != current_rank_address
|
||||
|
||||
if current_result.country_code is None and row.country_code:
|
||||
current_result.country_code = row.country_code
|
||||
|
||||
if row.type in ('postcode', 'postal_code') and location_isaddress:
|
||||
if not row.fromarea or \
|
||||
(current_result.address and 'postcode' in current_result.address):
|
||||
location_isaddress = False
|
||||
else:
|
||||
assert False
|
||||
current_result.address_rows = AddressLines()
|
||||
current_result.address_rows.append(_result_row_to_address_row(row))
|
||||
current_result.postcode = None
|
||||
|
||||
assert current_result.address_rows is not None
|
||||
current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
|
||||
current_rank_address = row.rank_address
|
||||
|
||||
for result in results:
|
||||
await _finalize_entry(conn, result)
|
||||
|
||||
|
||||
### Finally add the record for the parent entry where necessary.
|
||||
|
||||
parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
|
||||
if parent_lookup_ids:
|
||||
ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
|
||||
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.extratags,
|
||||
t.c.admin_level,
|
||||
t.c.rank_address)\
|
||||
.where(t.c.place_id == ltab.c.value['lid'].as_integer())
|
||||
|
||||
for row in await conn.execute(sql):
|
||||
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
|
||||
assert current_result is not None
|
||||
assert current_result.address_rows is not None
|
||||
|
||||
current_result.address_rows.append(AddressLine(
|
||||
place_id=row.place_id,
|
||||
osm_object=(row.osm_type, row.osm_id),
|
||||
category=(row.class_, row.type),
|
||||
names=row.name, extratags=row.extratags or {},
|
||||
admin_level=row.admin_level,
|
||||
fromarea=True, isaddress=True,
|
||||
rank_address=row.rank_address, distance=0.0))
|
||||
|
||||
### Now sort everything
|
||||
def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
|
||||
return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
|
||||
|
||||
for result in results:
|
||||
assert result.address_rows is not None
|
||||
result.address_rows.sort(key=mk_sort_key(result.place_id))
|
||||
|
||||
|
||||
# pylint: disable=consider-using-f-string
|
||||
def _placex_select_address_row(conn: SearchConnection,
|
||||
centroid: Point) -> SaSelect:
|
||||
t = conn.t.placex
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_.label('class'), t.c.type,
|
||||
t.c.admin_level, t.c.housenumber,
|
||||
sa.literal_column("""ST_GeometryType(geometry) in
|
||||
('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
|
||||
t.c.geometry.is_area().label('fromarea'),
|
||||
t.c.rank_address,
|
||||
sa.literal_column(
|
||||
"""ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
|
||||
'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
|
||||
""" % centroid).label('distance'))
|
||||
t.c.geometry.distance_spheroid(
|
||||
sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
|
||||
|
||||
|
||||
async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
|
||||
@@ -615,10 +729,10 @@ async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
|
||||
sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
|
||||
|
||||
for name_tokens, address_tokens in await conn.execute(sql):
|
||||
for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
|
||||
for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
|
||||
result.name_keywords.append(WordInfo(*row))
|
||||
|
||||
for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
|
||||
for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
|
||||
result.address_keywords.append(WordInfo(*row))
|
||||
|
||||
|
||||
|
||||
@@ -19,7 +19,6 @@ import nominatim.api.results as nres
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
import nominatim.db.sqlalchemy_functions as snfn
|
||||
|
||||
# In SQLAlchemy expression which compare with NULL need to be expressed with
|
||||
# the equal sign.
|
||||
@@ -57,6 +56,7 @@ def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
|
||||
t.c.importance, t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
centroid,
|
||||
t.c.linked_place_id, t.c.admin_level,
|
||||
distance.label('distance'),
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
|
||||
@@ -84,12 +84,6 @@ def _locate_interpolation(table: SaFromClause) -> SaLabel:
|
||||
else_=0).label('position')
|
||||
|
||||
|
||||
def _is_address_point(table: SaFromClause) -> SaColumn:
|
||||
return sa.and_(table.c.rank_address == 30,
|
||||
sa.or_(table.c.housenumber != None,
|
||||
table.c.name.has_key('addr:housename')))
|
||||
|
||||
|
||||
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
|
||||
return min(rows, key=lambda row: 1000 if row is None else row.distance)
|
||||
|
||||
@@ -146,13 +140,13 @@ class ReverseGeocoder:
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
|
||||
|
||||
if self.params.geometry_output & GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if self.params.geometry_output & GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if self.params.geometry_output & GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if self.params.geometry_output & GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
@@ -181,12 +175,12 @@ class ReverseGeocoder:
|
||||
t = self.conn.t.placex
|
||||
|
||||
# PostgreSQL must not get the distance as a parameter because
|
||||
# there is a danger it won't be able to proberly estimate index use
|
||||
# there is a danger it won't be able to properly estimate index use
|
||||
# when used with prepared statements
|
||||
diststr = sa.text(f"{distance}")
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
|
||||
.where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
|
||||
.where(t.c.geometry.within_distance(WKT_PARAM, diststr))
|
||||
.where(t.c.indexed_status == 0)
|
||||
.where(t.c.linked_place_id == None)
|
||||
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
|
||||
@@ -203,7 +197,7 @@ class ReverseGeocoder:
|
||||
max_rank = min(29, self.max_rank)
|
||||
restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
|
||||
if self.max_rank == 30:
|
||||
restrict.append(lambda: _is_address_point(t))
|
||||
restrict.append(lambda: sa.func.IsAddressPoint(t))
|
||||
if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
|
||||
restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
|
||||
t.c.class_.not_in(('place', 'building')),
|
||||
@@ -224,17 +218,21 @@ class ReverseGeocoder:
|
||||
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
|
||||
t = self.conn.t.placex
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
|
||||
.where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
|
||||
.where(t.c.parent_place_id == parent_place_id)
|
||||
.where(_is_address_point(t))
|
||||
.where(t.c.indexed_status == 0)
|
||||
.where(t.c.linked_place_id == None)
|
||||
.order_by('distance')
|
||||
.limit(1))
|
||||
def _base_query() -> SaSelect:
|
||||
return _select_from_placex(t)\
|
||||
.where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
|
||||
.where(t.c.parent_place_id == parent_place_id)\
|
||||
.where(sa.func.IsAddressPoint(t))\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, t.c.geometry)
|
||||
sql = self._add_geometry_columns(_base_query(), t.c.geometry)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
@@ -243,30 +241,26 @@ class ReverseGeocoder:
|
||||
distance: float) -> Optional[SaRow]:
|
||||
t = self.conn.t.osmline
|
||||
|
||||
sql: Any = sa.lambda_stmt(lambda:
|
||||
sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))
|
||||
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
|
||||
.where(t.c.startnumber != None)
|
||||
.order_by('distance')
|
||||
.limit(1))
|
||||
sql = sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))\
|
||||
.where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
|
||||
.where(t.c.startnumber != None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
if parent_place_id is not None:
|
||||
sql += lambda s: s.where(t.c.parent_place_id == parent_place_id)
|
||||
sql = sql.where(t.c.parent_place_id == parent_place_id)
|
||||
|
||||
def _wrap_query(base_sql: SaLambdaSelect) -> SaSelect:
|
||||
inner = base_sql.subquery('ipol')
|
||||
inner = sql.subquery('ipol')
|
||||
|
||||
return sa.select(inner.c.place_id, inner.c.osm_id,
|
||||
sql = sa.select(inner.c.place_id, inner.c.osm_id,
|
||||
inner.c.parent_place_id, inner.c.address,
|
||||
_interpolated_housenumber(inner),
|
||||
_interpolated_position(inner),
|
||||
inner.c.postcode, inner.c.country_code,
|
||||
inner.c.distance)
|
||||
|
||||
sql += _wrap_query
|
||||
|
||||
if self.has_geometries():
|
||||
sub = sql.subquery('geom')
|
||||
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
|
||||
@@ -281,7 +275,7 @@ class ReverseGeocoder:
|
||||
inner = sa.select(t,
|
||||
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
|
||||
_locate_interpolation(t))\
|
||||
.where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
|
||||
.where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
|
||||
.where(t.c.parent_place_id == parent_place_id)\
|
||||
.order_by('distance')\
|
||||
.limit(1)\
|
||||
@@ -294,11 +288,12 @@ class ReverseGeocoder:
|
||||
inner.c.postcode,
|
||||
inner.c.distance)
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
|
||||
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sub = sql.subquery('geom')
|
||||
sub = _base_query().subquery('geom')
|
||||
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
@@ -370,7 +365,7 @@ class ReverseGeocoder:
|
||||
inner = sa.select(t, sa.literal(0.0).label('distance'))\
|
||||
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
|
||||
.where(t.c.geometry.intersects(WKT_PARAM))\
|
||||
.where(snfn.select_index_placex_geometry_reverse_lookuppolygon('placex'))\
|
||||
.where(sa.func.PlacexGeometryReverseLookuppolygon())\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('area')
|
||||
@@ -400,10 +395,7 @@ class ReverseGeocoder:
|
||||
.where(t.c.rank_search > address_rank)\
|
||||
.where(t.c.rank_search <= MAX_RANK_PARAM)\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
|
||||
.where(t.c.geometry
|
||||
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
|
||||
.intersects(WKT_PARAM))\
|
||||
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('places')
|
||||
@@ -412,13 +404,15 @@ class ReverseGeocoder:
|
||||
return _select_from_placex(inner, False)\
|
||||
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
|
||||
.where(touter.c.place_id == address_id)\
|
||||
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
|
||||
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
|
||||
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
|
||||
.limit(1)
|
||||
|
||||
sql = sa.lambda_stmt(_place_inside_area_query)
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, sa.literal_column('places.geometry'))
|
||||
sql = self._add_geometry_columns(_place_inside_area_query(),
|
||||
sa.literal_column('places.geometry'))
|
||||
else:
|
||||
sql = sa.lambda_stmt(_place_inside_area_query)
|
||||
|
||||
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (place node)', place_address_row)
|
||||
@@ -439,10 +433,9 @@ class ReverseGeocoder:
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.where(self._filter_by_layer(t))\
|
||||
.where(t.c.geometry
|
||||
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
|
||||
.intersects(WKT_PARAM))\
|
||||
.where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.order_by('distance')\
|
||||
.limit(50)\
|
||||
.subquery()
|
||||
|
||||
@@ -513,22 +506,22 @@ class ReverseGeocoder:
|
||||
.where(t.c.rank_search <= MAX_RANK_PARAM)\
|
||||
.where(t.c.indexed_status == 0)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(snfn.select_index_placex_geometry_reverse_lookupplacenode('placex'))\
|
||||
.where(t.c.geometry
|
||||
.ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
|
||||
.intersects(WKT_PARAM))\
|
||||
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
|
||||
.order_by(sa.desc(t.c.rank_search))\
|
||||
.limit(50)\
|
||||
.subquery('area')
|
||||
|
||||
return _select_from_placex(inner, False)\
|
||||
.where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
|
||||
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
|
||||
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
|
||||
.limit(1)
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
|
||||
sql: SaLambdaSelect
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
|
||||
sql = self._add_geometry_columns(_base_query(),
|
||||
sa.literal_column('area.geometry'))
|
||||
else:
|
||||
sql = sa.lambda_stmt(_base_query)
|
||||
|
||||
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
log().var_dump('Result (addressable place node)', address_row)
|
||||
@@ -537,16 +530,19 @@ class ReverseGeocoder:
|
||||
|
||||
if address_row is None:
|
||||
# Still nothing, then return a country with the appropriate country code.
|
||||
sql = sa.lambda_stmt(lambda: _select_from_placex(t)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)\
|
||||
.where(t.c.rank_search == 4)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.order_by('distance')\
|
||||
.limit(1))
|
||||
def _country_base_query() -> SaSelect:
|
||||
return _select_from_placex(t)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)\
|
||||
.where(t.c.rank_search == 4)\
|
||||
.where(t.c.linked_place_id == None)\
|
||||
.order_by('distance')\
|
||||
.limit(1)
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, t.c.geometry)
|
||||
sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
|
||||
else:
|
||||
sql = sa.lambda_stmt(_country_base_query)
|
||||
|
||||
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
|
||||
|
||||
|
||||
@@ -5,9 +5,9 @@
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Convertion from token assignment to an abstract DB search.
|
||||
Conversion from token assignment to an abstract DB search.
|
||||
"""
|
||||
from typing import Optional, List, Tuple, Iterator
|
||||
from typing import Optional, List, Tuple, Iterator, Dict
|
||||
import heapq
|
||||
|
||||
from nominatim.api.types import SearchDetails, DataLayer
|
||||
@@ -15,6 +15,7 @@ from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange
|
||||
from nominatim.api.search.token_assignment import TokenAssignment
|
||||
import nominatim.api.search.db_search_fields as dbf
|
||||
import nominatim.api.search.db_searches as dbs
|
||||
import nominatim.api.search.db_search_lookups as lookups
|
||||
|
||||
|
||||
def wrap_near_search(categories: List[Tuple[str, str]],
|
||||
@@ -89,12 +90,14 @@ class SearchBuilder:
|
||||
if sdata is None:
|
||||
return
|
||||
|
||||
categories = self.get_search_categories(assignment)
|
||||
near_items = self.get_near_items(assignment)
|
||||
if near_items is not None and not near_items:
|
||||
return # impossible compbination of near items and category parameter
|
||||
|
||||
if assignment.name is None:
|
||||
if categories and not sdata.postcodes:
|
||||
sdata.qualifiers = categories
|
||||
categories = None
|
||||
if near_items and not sdata.postcodes:
|
||||
sdata.qualifiers = near_items
|
||||
near_items = None
|
||||
builder = self.build_poi_search(sdata)
|
||||
elif assignment.housenumber:
|
||||
hnr_tokens = self.query.get_tokens(assignment.housenumber,
|
||||
@@ -102,16 +105,19 @@ class SearchBuilder:
|
||||
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
|
||||
else:
|
||||
builder = self.build_special_search(sdata, assignment.address,
|
||||
bool(categories))
|
||||
bool(near_items))
|
||||
else:
|
||||
builder = self.build_name_search(sdata, assignment.name, assignment.address,
|
||||
bool(categories))
|
||||
bool(near_items))
|
||||
|
||||
if categories:
|
||||
penalty = min(categories.penalties)
|
||||
categories.penalties = [p - penalty for p in categories.penalties]
|
||||
if near_items:
|
||||
penalty = min(near_items.penalties)
|
||||
near_items.penalties = [p - penalty for p in near_items.penalties]
|
||||
for search in builder:
|
||||
yield dbs.NearSearch(penalty + assignment.penalty, categories, search)
|
||||
search_penalty = search.penalty
|
||||
search.penalty = 0.0
|
||||
yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
|
||||
near_items, search)
|
||||
else:
|
||||
for search in builder:
|
||||
search.penalty += assignment.penalty
|
||||
@@ -147,7 +153,7 @@ class SearchBuilder:
|
||||
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for r in address
|
||||
for t in self.query.get_partials_list(r)],
|
||||
'restrict')]
|
||||
lookups.Restrict)]
|
||||
penalty += 0.2
|
||||
yield dbs.PostcodeSearch(penalty, sdata)
|
||||
|
||||
@@ -157,23 +163,28 @@ class SearchBuilder:
|
||||
""" Build a simple address search for special entries where the
|
||||
housenumber is the main name token.
|
||||
"""
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')]
|
||||
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
|
||||
expected_count = sum(t.count for t in hnrs)
|
||||
|
||||
partials = [t for trange in address
|
||||
for t in self.query.get_partials_list(trange)]
|
||||
partials = {t.token: t.count for trange in address
|
||||
for t in self.query.get_partials_list(trange)}
|
||||
|
||||
if len(partials) != 1 or partials[0].count < 10000:
|
||||
if expected_count < 8000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for t in partials], 'lookup_all'))
|
||||
list(partials), lookups.Restrict))
|
||||
elif len(partials) != 1 or list(partials.values())[0] < 10000:
|
||||
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
|
||||
list(partials), lookups.LookupAll))
|
||||
else:
|
||||
addr_fulls = [t.token for t
|
||||
in self.query.get_tokens(address[0], TokenType.WORD)]
|
||||
if len(addr_fulls) > 5:
|
||||
return
|
||||
sdata.lookups.append(
|
||||
dbf.FieldLookup('nameaddress_vector',
|
||||
[t.token for t
|
||||
in self.query.get_tokens(address[0], TokenType.WORD)],
|
||||
'lookup_any'))
|
||||
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny))
|
||||
|
||||
sdata.housenumbers = dbf.WeightedStrings([], [])
|
||||
yield dbs.PlaceSearch(0.05, sdata, sum(t.count for t in hnrs))
|
||||
yield dbs.PlaceSearch(0.05, sdata, expected_count)
|
||||
|
||||
|
||||
def build_name_search(self, sdata: dbf.SearchData,
|
||||
@@ -198,45 +209,48 @@ class SearchBuilder:
|
||||
are and tries to find a lookup that optimizes index use.
|
||||
"""
|
||||
penalty = 0.0 # extra penalty
|
||||
name_partials = self.query.get_partials_list(name)
|
||||
name_tokens = [t.token for t in name_partials]
|
||||
name_partials = {t.token: t for t in self.query.get_partials_list(name)}
|
||||
|
||||
addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
|
||||
addr_tokens = [t.token for t in addr_partials]
|
||||
addr_tokens = list({t.token for t in addr_partials})
|
||||
|
||||
partials_indexed = all(t.is_indexed for t in name_partials) \
|
||||
partials_indexed = all(t.is_indexed for t in name_partials.values()) \
|
||||
and all(t.is_indexed for t in addr_partials)
|
||||
exp_count = min(t.count for t in name_partials)
|
||||
exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
|
||||
|
||||
if (len(name_partials) > 3 or exp_count < 3000) and partials_indexed:
|
||||
yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
|
||||
if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
|
||||
yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
|
||||
return
|
||||
|
||||
# Partial term to frequent. Try looking up by rare full names first.
|
||||
name_fulls = self.query.get_tokens(name, TokenType.WORD)
|
||||
fulls_count = sum(t.count for t in name_fulls)
|
||||
# At this point drop unindexed partials from the address.
|
||||
# This might yield wrong results, nothing we can do about that.
|
||||
if not partials_indexed:
|
||||
addr_tokens = [t.token for t in addr_partials if t.is_indexed]
|
||||
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
|
||||
# Any of the full names applies with all of the partials from the address
|
||||
yield penalty, fulls_count / (2**len(addr_partials)),\
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
|
||||
'restrict' if fulls_count < 10000 else 'lookup_all')
|
||||
if name_fulls:
|
||||
fulls_count = sum(t.count for t in name_fulls)
|
||||
# At this point drop unindexed partials from the address.
|
||||
# This might yield wrong results, nothing we can do about that.
|
||||
if not partials_indexed:
|
||||
addr_tokens = [t.token for t in addr_partials if t.is_indexed]
|
||||
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
|
||||
# Any of the full names applies with all of the partials from the address
|
||||
yield penalty, fulls_count / (2**len(addr_tokens)),\
|
||||
dbf.lookup_by_any_name([t.token for t in name_fulls],
|
||||
addr_tokens,
|
||||
fulls_count > 30000 / max(1, len(addr_tokens)))
|
||||
|
||||
# To catch remaining results, lookup by name and address
|
||||
# We only do this if there is a reasonable number of results expected.
|
||||
exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
|
||||
if exp_count < 10000 and all(t.is_indexed for t in name_partials):
|
||||
lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')]
|
||||
exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
|
||||
if exp_count < 10000 and all(t.is_indexed for t in name_partials.values()):
|
||||
lookup = [dbf.FieldLookup('name_vector', list(name_partials.keys()), lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
|
||||
penalty += 0.35 * max(0, 5 - len(name_partials) - len(addr_tokens))
|
||||
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll))
|
||||
penalty += 0.35 * max(1 if name_fulls else 0.1,
|
||||
5 - len(name_partials) - len(addr_tokens))
|
||||
yield penalty, exp_count, lookup
|
||||
|
||||
|
||||
def get_name_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
|
||||
def get_name_ranking(self, trange: TokenRange,
|
||||
db_field: str = 'name_vector') -> dbf.FieldRanking:
|
||||
""" Create a ranking expression for a name term in the given range.
|
||||
"""
|
||||
name_fulls = self.query.get_tokens(trange, TokenType.WORD)
|
||||
@@ -245,7 +259,7 @@ class SearchBuilder:
|
||||
# Fallback, sum of penalty for partials
|
||||
name_partials = self.query.get_partials_list(trange)
|
||||
default = sum(t.penalty for t in name_partials) + 0.2
|
||||
return dbf.FieldRanking('name_vector', default, ranks)
|
||||
return dbf.FieldRanking(db_field, default, ranks)
|
||||
|
||||
|
||||
def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
|
||||
@@ -303,11 +317,9 @@ class SearchBuilder:
|
||||
sdata = dbf.SearchData()
|
||||
sdata.penalty = assignment.penalty
|
||||
if assignment.country:
|
||||
tokens = self.query.get_tokens(assignment.country, TokenType.COUNTRY)
|
||||
if self.details.countries:
|
||||
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
|
||||
if not tokens:
|
||||
return None
|
||||
tokens = self.get_country_tokens(assignment.country)
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_strings('countries', tokens)
|
||||
elif self.details.countries:
|
||||
sdata.countries = dbf.WeightedStrings(self.details.countries,
|
||||
@@ -321,34 +333,70 @@ class SearchBuilder:
|
||||
self.query.get_tokens(assignment.postcode,
|
||||
TokenType.POSTCODE))
|
||||
if assignment.qualifier:
|
||||
sdata.set_qualifiers(self.query.get_tokens(assignment.qualifier,
|
||||
TokenType.QUALIFIER))
|
||||
tokens = self.get_qualifier_tokens(assignment.qualifier)
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_qualifiers(tokens)
|
||||
elif self.details.categories:
|
||||
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
|
||||
[0.0] * len(self.details.categories))
|
||||
|
||||
if assignment.address:
|
||||
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
|
||||
if not assignment.name and assignment.housenumber:
|
||||
# housenumber search: the first item needs to be handled like
|
||||
# a name in ranking or penalties are not comparable with
|
||||
# normal searches.
|
||||
sdata.set_ranking([self.get_name_ranking(assignment.address[0],
|
||||
db_field='nameaddress_vector')]
|
||||
+ [self.get_addr_ranking(r) for r in assignment.address[1:]])
|
||||
else:
|
||||
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
|
||||
else:
|
||||
sdata.rankings = []
|
||||
|
||||
return sdata
|
||||
|
||||
|
||||
def get_search_categories(self,
|
||||
assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
|
||||
""" Collect tokens for category search or use the categories
|
||||
def get_country_tokens(self, trange: TokenRange) -> List[Token]:
|
||||
""" Return the list of country tokens for the given range,
|
||||
optionally filtered by the country list from the details
|
||||
parameters.
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, TokenType.COUNTRY)
|
||||
if self.details.countries:
|
||||
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def get_qualifier_tokens(self, trange: TokenRange) -> List[Token]:
|
||||
""" Return the list of qualifier tokens for the given range,
|
||||
optionally filtered by the qualifier list from the details
|
||||
parameters.
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, TokenType.QUALIFIER)
|
||||
if self.details.categories:
|
||||
tokens = [t for t in tokens if t.get_category() in self.details.categories]
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
|
||||
""" Collect tokens for near items search or use the categories
|
||||
requested per parameter.
|
||||
Returns None if no category search is requested.
|
||||
"""
|
||||
if assignment.category:
|
||||
tokens = [t for t in self.query.get_tokens(assignment.category,
|
||||
TokenType.CATEGORY)
|
||||
if not self.details.categories
|
||||
or t.get_category() in self.details.categories]
|
||||
return dbf.WeightedCategories([t.get_category() for t in tokens],
|
||||
[t.penalty for t in tokens])
|
||||
|
||||
if self.details.categories:
|
||||
return dbf.WeightedCategories(self.details.categories,
|
||||
[0.0] * len(self.details.categories))
|
||||
if assignment.near_item:
|
||||
tokens: Dict[Tuple[str, str], float] = {}
|
||||
for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
|
||||
cat = t.get_category()
|
||||
# The category of a near search will be that of near_item.
|
||||
# Thus, if search is restricted to a category parameter,
|
||||
# the two sets must intersect.
|
||||
if (not self.details.categories or cat in self.details.categories)\
|
||||
and t.penalty < tokens.get(cat, 1000.0):
|
||||
tokens[cat] = t.penalty
|
||||
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@@ -7,14 +7,16 @@
|
||||
"""
|
||||
Data structures for more complex fields in abstract search descriptions.
|
||||
"""
|
||||
from typing import List, Tuple, Iterator, cast
|
||||
from typing import List, Tuple, Iterator, Dict, Type
|
||||
import dataclasses
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
|
||||
from nominatim.typing import SaFromClause, SaColumn, SaExpression
|
||||
from nominatim.api.search.query import Token
|
||||
import nominatim.api.search.db_search_lookups as lookups
|
||||
from nominatim.utils.json_writer import JsonWriter
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class WeightedStrings:
|
||||
@@ -92,7 +94,7 @@ class RankedTokens:
|
||||
|
||||
def with_token(self, t: Token, transition_penalty: float) -> 'RankedTokens':
|
||||
""" Create a new RankedTokens list with the given token appended.
|
||||
The tokens penalty as well as the given transision penalty
|
||||
The tokens penalty as well as the given transition penalty
|
||||
are added to the overall penalty.
|
||||
"""
|
||||
return RankedTokens(self.penalty + t.penalty + transition_penalty,
|
||||
@@ -129,11 +131,17 @@ class FieldRanking:
|
||||
"""
|
||||
assert self.rankings
|
||||
|
||||
return sa.func.weigh_search(table.c[self.column],
|
||||
[f"{{{','.join((str(s) for s in r.tokens))}}}"
|
||||
for r in self.rankings],
|
||||
[r.penalty for r in self.rankings],
|
||||
self.default)
|
||||
rout = JsonWriter().start_array()
|
||||
for rank in self.rankings:
|
||||
rout.start_array().value(rank.penalty).next()
|
||||
rout.start_array()
|
||||
for token in rank.tokens:
|
||||
rout.value(token).next()
|
||||
rout.end_array()
|
||||
rout.end_array().next()
|
||||
rout.end_array()
|
||||
|
||||
return sa.func.weigh_search(table.c[self.column], rout(), self.default)
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
@@ -146,19 +154,12 @@ class FieldLookup:
|
||||
"""
|
||||
column: str
|
||||
tokens: List[int]
|
||||
lookup_type: str
|
||||
lookup_type: Type[lookups.LookupType]
|
||||
|
||||
def sql_condition(self, table: SaFromClause) -> SaColumn:
|
||||
""" Create an SQL expression for the given match condition.
|
||||
"""
|
||||
col = table.c[self.column]
|
||||
if self.lookup_type == 'lookup_all':
|
||||
return col.contains(self.tokens)
|
||||
if self.lookup_type == 'lookup_any':
|
||||
return cast(SaColumn, col.overlap(self.tokens))
|
||||
|
||||
return sa.func.array_cat(col, sa.text('ARRAY[]::integer[]'),
|
||||
type_=ARRAY(sa.Integer())).contains(self.tokens)
|
||||
return self.lookup_type(table, self.column, self.tokens)
|
||||
|
||||
|
||||
class SearchData:
|
||||
@@ -195,10 +196,16 @@ class SearchData:
|
||||
""" Set the qulaifier field from the given tokens.
|
||||
"""
|
||||
if tokens:
|
||||
min_penalty = min(t.penalty for t in tokens)
|
||||
categories: Dict[Tuple[str, str], float] = {}
|
||||
min_penalty = 1000.0
|
||||
for t in tokens:
|
||||
min_penalty = min(min_penalty, t.penalty)
|
||||
cat = t.get_category()
|
||||
if t.penalty < categories.get(cat, 1000.0):
|
||||
categories[cat] = t.penalty
|
||||
self.penalty += min_penalty
|
||||
self.qualifiers = WeightedCategories([t.get_category() for t in tokens],
|
||||
[t.penalty - min_penalty for t in tokens])
|
||||
self.qualifiers = WeightedCategories(list(categories.keys()),
|
||||
list(categories.values()))
|
||||
|
||||
|
||||
def set_ranking(self, rankings: List[FieldRanking]) -> None:
|
||||
@@ -217,22 +224,23 @@ def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[Fiel
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_all')]
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
|
||||
def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
|
||||
lookup_type: str) -> List[FieldLookup]:
|
||||
use_index_for_addr: bool) -> List[FieldLookup]:
|
||||
""" Create a lookup list where name tokens are looked up via index
|
||||
and only one of the name tokens must be present.
|
||||
Potential address tokens are used to restrict the search further.
|
||||
"""
|
||||
lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')]
|
||||
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
|
||||
if addr_tokens:
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookup_type))
|
||||
lookup.append(FieldLookup('nameaddress_vector', addr_tokens,
|
||||
lookups.LookupAll if use_index_for_addr else lookups.Restrict))
|
||||
|
||||
return lookup
|
||||
|
||||
@@ -241,5 +249,5 @@ def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[Field
|
||||
""" Create a lookup list where address tokens are looked up via index
|
||||
and the name tokens are only used to restrict the search further.
|
||||
"""
|
||||
return [FieldLookup('name_vector', name_tokens, 'restrict'),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
|
||||
return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
|
||||
FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]
|
||||
|
||||
114
nominatim/api/search/db_search_lookups.py
Normal file
114
nominatim/api/search/db_search_lookups.py
Normal file
@@ -0,0 +1,114 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of lookup functions for the search_name table.
|
||||
"""
|
||||
from typing import List, Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
|
||||
from nominatim.typing import SaFromClause
|
||||
from nominatim.db.sqlalchemy_types import IntArray
|
||||
|
||||
# pylint: disable=consider-using-f-string
|
||||
|
||||
LookupType = sa.sql.expression.FunctionElement[Any]
|
||||
|
||||
class LookupAll(LookupType):
|
||||
""" Find all entries in search_name table that contain all of
|
||||
a given list of tokens using an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(table.c.place_id, getattr(table.c, column), column,
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
_, col, _, tokens = list(element.clauses)
|
||||
return "(%s @> %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
|
||||
@compiles(LookupAll, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_lookup_all(element: LookupAll,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
place, col, colname, tokens = list(element.clauses)
|
||||
return "(%s IN (SELECT CAST(value as bigint) FROM"\
|
||||
" (SELECT array_intersect_fuzzy(places) as p FROM"\
|
||||
" (SELECT places FROM reverse_search_name"\
|
||||
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
|
||||
" AND column = %s"\
|
||||
" ORDER BY length(places)) as x) as u,"\
|
||||
" json_each('[' || u.p || ']'))"\
|
||||
" AND array_contains(%s, %s))"\
|
||||
% (compiler.process(place, **kw),
|
||||
compiler.process(tokens, **kw),
|
||||
compiler.process(colname, **kw),
|
||||
compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw)
|
||||
)
|
||||
|
||||
|
||||
|
||||
class LookupAny(LookupType):
|
||||
""" Find all entries that contain at least one of the given tokens.
|
||||
Use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(table.c.place_id, getattr(table.c, column), column,
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
|
||||
def _default_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
_, col, _, tokens = list(element.clauses)
|
||||
return "(%s && %s)" % (compiler.process(col, **kw),
|
||||
compiler.process(tokens, **kw))
|
||||
|
||||
@compiles(LookupAny, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_lookup_any(element: LookupAny,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
place, _, colname, tokens = list(element.clauses)
|
||||
return "%s IN (SELECT CAST(value as bigint) FROM"\
|
||||
" (SELECT array_union(places) as p FROM reverse_search_name"\
|
||||
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
|
||||
" AND column = %s) as u,"\
|
||||
" json_each('[' || u.p || ']'))" % (compiler.process(place, **kw),
|
||||
compiler.process(tokens, **kw),
|
||||
compiler.process(colname, **kw))
|
||||
|
||||
|
||||
|
||||
class Restrict(LookupType):
|
||||
""" Find all entries that contain all of the given tokens.
|
||||
Do not use an index for the search.
|
||||
"""
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
|
||||
super().__init__(getattr(table.c, column),
|
||||
sa.type_coerce(tokens, IntArray))
|
||||
|
||||
|
||||
@compiles(Restrict) # type: ignore[no-untyped-call, misc]
|
||||
def _default_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
|
||||
compiler.process(arg2, **kw))
|
||||
|
||||
@compiles(Restrict, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_restrict(element: Restrict,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "array_contains(%s)" % compiler.process(element.clauses, **kw)
|
||||
@@ -5,13 +5,12 @@
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the acutal database accesses for forward search.
|
||||
Implementation of the actual database accesses for forward search.
|
||||
"""
|
||||
from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
|
||||
from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
|
||||
import abc
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import ARRAY, array_agg
|
||||
|
||||
from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
|
||||
SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
|
||||
@@ -19,11 +18,18 @@ from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
|
||||
import nominatim.api.results as nres
|
||||
from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
from nominatim.db.sqlalchemy_types import Geometry, IntArray
|
||||
|
||||
#pylint: disable=singleton-comparison,not-callable
|
||||
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
|
||||
|
||||
def no_index(expr: SaColumn) -> SaColumn:
|
||||
""" Wrap the given expression, so that the query planner will
|
||||
refrain from using the expression for index lookup.
|
||||
"""
|
||||
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
|
||||
|
||||
|
||||
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
|
||||
""" Create a dictionary from search parameters that can be used
|
||||
as bind parameter for SQL execute.
|
||||
@@ -48,19 +54,37 @@ NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
|
||||
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
|
||||
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
|
||||
|
||||
def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
|
||||
return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
|
||||
|
||||
def filter_by_area(sql: SaSelect, t: SaFromClause,
|
||||
details: SearchDetails, avoid_index: bool = False) -> SaSelect:
|
||||
""" Apply SQL statements for filtering by viewbox and near point,
|
||||
if applicable.
|
||||
"""
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
if details.near_radius < 0.1 and not avoid_index:
|
||||
sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
|
||||
use_index=not avoid_index and
|
||||
details.viewbox.area < 0.2))
|
||||
|
||||
return sql
|
||||
|
||||
|
||||
def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
|
||||
return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
|
||||
|
||||
|
||||
def _select_placex(t: SaFromClause) -> SaSelect:
|
||||
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.importance, t.c.wikipedia,
|
||||
t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
t.c.linked_place_id, t.c.admin_level,
|
||||
t.c.centroid,
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
|
||||
@@ -72,20 +96,20 @@ def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDet
|
||||
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
|
||||
|
||||
if details.geometry_output & GeometryFormat.GEOJSON:
|
||||
out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
|
||||
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
|
||||
if details.geometry_output & GeometryFormat.TEXT:
|
||||
out.append(sa.func.ST_AsText(col).label('geometry_text'))
|
||||
if details.geometry_output & GeometryFormat.KML:
|
||||
out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
|
||||
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
|
||||
if details.geometry_output & GeometryFormat.SVG:
|
||||
out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
|
||||
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
|
||||
|
||||
return sql.add_columns(*out)
|
||||
|
||||
|
||||
def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
|
||||
numerals: List[int], details: SearchDetails) -> SaScalarSelect:
|
||||
all_ids = array_agg(table.c.place_id) # type: ignore[no-untyped-call]
|
||||
all_ids = sa.func.ArrayAgg(table.c.place_id)
|
||||
sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
|
||||
|
||||
if len(numerals) == 1:
|
||||
@@ -106,14 +130,12 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
|
||||
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
|
||||
orexpr: List[SaExpression] = []
|
||||
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
|
||||
orexpr.append(table.c.rank_address.between(1, 30))
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 30))
|
||||
elif layers & DataLayer.ADDRESS:
|
||||
orexpr.append(table.c.rank_address.between(1, 29))
|
||||
orexpr.append(sa.and_(table.c.rank_address == 30,
|
||||
sa.or_(table.c.housenumber != None,
|
||||
table.c.address.has_key('addr:housename'))))
|
||||
orexpr.append(no_index(table.c.rank_address).between(1, 29))
|
||||
orexpr.append(sa.func.IsAddressPoint(table))
|
||||
elif layers & DataLayer.POI:
|
||||
orexpr.append(sa.and_(table.c.rank_address == 30,
|
||||
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
|
||||
table.c.class_.not_in(('place', 'building'))))
|
||||
|
||||
if layers & DataLayer.MANMADE:
|
||||
@@ -123,7 +145,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
|
||||
if not layers & DataLayer.NATURAL:
|
||||
exclude.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
|
||||
table.c.rank_address == 0))
|
||||
no_index(table.c.rank_address) == 0))
|
||||
else:
|
||||
include = []
|
||||
if layers & DataLayer.RAILWAY:
|
||||
@@ -131,7 +153,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
|
||||
if layers & DataLayer.NATURAL:
|
||||
include.extend(('natural', 'water', 'waterway'))
|
||||
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
|
||||
table.c.rank_address == 0))
|
||||
no_index(table.c.rank_address) == 0))
|
||||
|
||||
if len(orexpr) == 1:
|
||||
return orexpr[0]
|
||||
@@ -150,7 +172,8 @@ async def _get_placex_housenumbers(conn: SearchConnection,
|
||||
place_ids: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.placex
|
||||
sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
|
||||
sql = _select_placex(t).add_columns(t.c.importance)\
|
||||
.where(t.c.place_id.in_(place_ids))
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, t.c.geometry, details)
|
||||
@@ -162,12 +185,21 @@ async def _get_placex_housenumbers(conn: SearchConnection,
|
||||
yield result
|
||||
|
||||
|
||||
def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
|
||||
""" Create a subselect that returns the given list of integers
|
||||
as rows in the column 'nr'.
|
||||
"""
|
||||
vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
|
||||
.table_valued(sa.column('value', type_=sa.JSON))
|
||||
return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
|
||||
|
||||
|
||||
async def _get_osmline(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int],
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.osmline
|
||||
values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
|
||||
.data([(n,) for n in numerals])
|
||||
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.osm_id,
|
||||
t.c.parent_place_id, t.c.address,
|
||||
values.c.nr.label('housenumber'),
|
||||
@@ -190,8 +222,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int],
|
||||
numerals: List[int], osm_id: int,
|
||||
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
|
||||
t = conn.t.tiger
|
||||
values = sa.values(sa.Column('nr', sa.Integer()), name='housenumber')\
|
||||
.data([(n,) for n in numerals])
|
||||
values = _int_list_to_subquery(numerals)
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
sa.literal('W').label('osm_type'),
|
||||
sa.literal(osm_id).label('osm_id'),
|
||||
@@ -214,6 +245,7 @@ async def _get_tiger(conn: SearchConnection, place_ids: List[int],
|
||||
class AbstractSearch(abc.ABC):
|
||||
""" Encapuslation of a single lookup in the database.
|
||||
"""
|
||||
SEARCH_PRIO: int = 2
|
||||
|
||||
def __init__(self, penalty: float) -> None:
|
||||
self.penalty = penalty
|
||||
@@ -247,9 +279,20 @@ class NearSearch(AbstractSearch):
|
||||
|
||||
base.sort(key=lambda r: (r.accuracy, r.rank_search))
|
||||
max_accuracy = base[0].accuracy + 0.5
|
||||
if base[0].rank_address == 0:
|
||||
min_rank = 0
|
||||
max_rank = 0
|
||||
elif base[0].rank_address < 26:
|
||||
min_rank = 1
|
||||
max_rank = min(25, base[0].rank_address + 4)
|
||||
else:
|
||||
min_rank = 26
|
||||
max_rank = 30
|
||||
base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
|
||||
and r.accuracy <= max_accuracy
|
||||
and r.bbox and r.bbox.area < 20)
|
||||
and r.bbox and r.bbox.area < 20
|
||||
and r.rank_address >= min_rank
|
||||
and r.rank_address <= max_rank)
|
||||
|
||||
if base:
|
||||
baseids = [b.place_id for b in base[:5] if b.place_id]
|
||||
@@ -271,30 +314,39 @@ class NearSearch(AbstractSearch):
|
||||
"""
|
||||
table = await conn.get_class_table(*category)
|
||||
|
||||
t = conn.t.placex
|
||||
tgeom = conn.t.placex.alias('pgeom')
|
||||
|
||||
sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
|
||||
.where(t.c.class_ == category[0])\
|
||||
.where(t.c.type == category[1])
|
||||
|
||||
if table is None:
|
||||
# No classtype table available, do a simplified lookup in placex.
|
||||
sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\
|
||||
.order_by(tgeom.c.centroid.ST_Distance(t.c.centroid))
|
||||
table = conn.t.placex
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
|
||||
.where(table.c.class_ == category[0])\
|
||||
.where(table.c.type == category[1])
|
||||
else:
|
||||
# Use classtype table. We can afford to use a larger
|
||||
# radius for the lookup.
|
||||
sql = sql.join(table, t.c.place_id == table.c.place_id)\
|
||||
.join(tgeom,
|
||||
table.c.centroid.ST_CoveredBy(
|
||||
sa.case((sa.and_(tgeom.c.rank_address < 9,
|
||||
sql = sa.select(table.c.place_id,
|
||||
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
.label('dist'))\
|
||||
.join(tgeom,
|
||||
table.c.centroid.ST_CoveredBy(
|
||||
sa.case((sa.and_(tgeom.c.rank_address > 9,
|
||||
tgeom.c.geometry.is_area()),
|
||||
tgeom.c.geometry),
|
||||
else_ = tgeom.c.centroid.ST_Expand(0.05))))\
|
||||
.order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
|
||||
tgeom.c.geometry),
|
||||
else_ = tgeom.c.centroid.ST_Expand(0.05))))
|
||||
|
||||
sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
|
||||
inner = sql.where(tgeom.c.place_id.in_(ids))\
|
||||
.group_by(table.c.place_id).subquery()
|
||||
|
||||
t = conn.t.placex
|
||||
sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
|
||||
.join(inner, inner.c.place_id == t.c.place_id)\
|
||||
.order_by(inner.c.dist)
|
||||
|
||||
sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
|
||||
if details.countries:
|
||||
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
|
||||
if details.excluded:
|
||||
@@ -334,8 +386,10 @@ class PoiSearch(AbstractSearch):
|
||||
# simply search in placex table
|
||||
def _base_query() -> SaSelect:
|
||||
return _select_placex(t) \
|
||||
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))\
|
||||
.where(t.c.linked_place_id == None) \
|
||||
.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
|
||||
.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
|
||||
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
|
||||
.limit(LIMIT_PARAM)
|
||||
|
||||
@@ -362,6 +416,7 @@ class PoiSearch(AbstractSearch):
|
||||
table = await conn.get_class_table(*category)
|
||||
if table is not None:
|
||||
sql = _select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.join(table, t.c.place_id == table.c.place_id)\
|
||||
.where(t.c.class_ == category[0])\
|
||||
.where(t.c.type == category[1])
|
||||
@@ -371,8 +426,8 @@ class PoiSearch(AbstractSearch):
|
||||
|
||||
if details.near and details.near_radius is not None:
|
||||
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
|
||||
.where(table.c.centroid.ST_DWithin(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
.where(table.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
@@ -394,6 +449,8 @@ class PoiSearch(AbstractSearch):
|
||||
class CountrySearch(AbstractSearch):
|
||||
""" Search for a country name or country code.
|
||||
"""
|
||||
SEARCH_PRIO = 0
|
||||
|
||||
def __init__(self, sdata: SearchData) -> None:
|
||||
super().__init__(sdata.penalty)
|
||||
self.countries = sdata.countries
|
||||
@@ -407,6 +464,7 @@ class CountrySearch(AbstractSearch):
|
||||
|
||||
ccodes = self.countries.values
|
||||
sql = _select_placex(t)\
|
||||
.add_columns(t.c.importance)\
|
||||
.where(t.c.country_code.in_(ccodes))\
|
||||
.where(t.c.rank_address == 4)
|
||||
|
||||
@@ -416,11 +474,7 @@ class CountrySearch(AbstractSearch):
|
||||
if details.excluded:
|
||||
sql = sql.where(_exclude_places(t))
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
sql = sql.where(_within_near(t))
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
@@ -430,7 +484,14 @@ class CountrySearch(AbstractSearch):
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
results.append(result)
|
||||
|
||||
return results or await self.lookup_in_country_table(conn, details)
|
||||
if not results:
|
||||
results = await self.lookup_in_country_table(conn, details)
|
||||
|
||||
if results:
|
||||
details.min_rank = min(5, details.max_rank)
|
||||
details.max_rank = min(25, details.max_rank)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
async def lookup_in_country_table(self, conn: SearchConnection,
|
||||
@@ -448,29 +509,28 @@ class CountrySearch(AbstractSearch):
|
||||
|
||||
sql = sa.select(tgrid.c.country_code,
|
||||
tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
|
||||
.label('centroid'))\
|
||||
.label('centroid'),
|
||||
tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
|
||||
.where(tgrid.c.country_code.in_(self.countries.values))\
|
||||
.group_by(tgrid.c.country_code)
|
||||
|
||||
if details.viewbox is not None and details.bounded_viewbox:
|
||||
sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
if details.near is not None and details.near_radius is not None:
|
||||
sql = sql.where(_within_near(tgrid))
|
||||
sql = filter_by_area(sql, tgrid, details, avoid_index=True)
|
||||
|
||||
sub = sql.subquery('grid')
|
||||
|
||||
sql = sa.select(t.c.country_code,
|
||||
(t.c.name
|
||||
+ sa.func.coalesce(t.c.derived_name,
|
||||
sa.cast('', type_=conn.t.types.Composite))
|
||||
).label('name'),
|
||||
sub.c.centroid)\
|
||||
t.c.name.merge(t.c.derived_name).label('name'),
|
||||
sub.c.centroid, sub.c.bbox)\
|
||||
.join(sub, t.c.country_code == sub.c.country_code)
|
||||
|
||||
if details.geometry_output:
|
||||
sql = _add_geometry_columns(sql, sub.c.centroid, details)
|
||||
|
||||
results = nres.SearchResults()
|
||||
for row in await conn.execute(sql, _details_to_bind_params(details)):
|
||||
result = nres.create_from_country_row(row, nres.SearchResult)
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
|
||||
results.append(result)
|
||||
|
||||
@@ -507,19 +567,16 @@ class PostcodeSearch(AbstractSearch):
|
||||
|
||||
penalty: SaExpression = sa.literal(self.penalty)
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
|
||||
else:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
|
||||
else_=2.0)
|
||||
if details.viewbox is not None and not details.bounded_viewbox:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
if details.near_radius is not None:
|
||||
sql = sql.where(_within_near(t))
|
||||
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
|
||||
|
||||
sql = filter_by_area(sql, t, details)
|
||||
|
||||
if self.countries:
|
||||
sql = sql.where(t.c.country_code.in_(self.countries.values))
|
||||
|
||||
@@ -528,13 +585,11 @@ class PostcodeSearch(AbstractSearch):
|
||||
|
||||
if self.lookups:
|
||||
assert len(self.lookups) == 1
|
||||
assert self.lookups[0].lookup_type == 'restrict'
|
||||
tsearch = conn.t.search_name
|
||||
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
|
||||
.where(sa.func.array_cat(tsearch.c.name_vector,
|
||||
tsearch.c.nameaddress_vector,
|
||||
type_=ARRAY(sa.Integer))
|
||||
.contains(self.lookups[0].tokens))
|
||||
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
|
||||
.contains(sa.type_coerce(self.lookups[0].tokens,
|
||||
IntArray)))
|
||||
|
||||
for ranking in self.rankings:
|
||||
penalty += ranking.sql_penalty(conn.t.search_name)
|
||||
@@ -559,6 +614,8 @@ class PostcodeSearch(AbstractSearch):
|
||||
class PlaceSearch(AbstractSearch):
|
||||
""" Generic search for an address or named place.
|
||||
"""
|
||||
SEARCH_PRIO = 1
|
||||
|
||||
def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
|
||||
super().__init__(sdata.penalty + extra_penalty)
|
||||
self.countries = sdata.countries
|
||||
@@ -578,15 +635,7 @@ class PlaceSearch(AbstractSearch):
|
||||
tsearch = conn.t.search_name
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(lambda:
|
||||
sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
t.c.wikipedia,
|
||||
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
|
||||
t.c.centroid,
|
||||
t.c.geometry.ST_Expand(0).label('bbox'))
|
||||
.where(t.c.place_id == tsearch.c.place_id))
|
||||
_select_placex(t).where(t.c.place_id == tsearch.c.place_id))
|
||||
|
||||
|
||||
if details.geometry_output:
|
||||
@@ -607,11 +656,11 @@ class PlaceSearch(AbstractSearch):
|
||||
sql = sql.where(tsearch.c.address_rank > 9)
|
||||
tpc = conn.t.postcode
|
||||
pcs = self.postcodes.values
|
||||
if self.expected_count > 1000:
|
||||
if self.expected_count > 5000:
|
||||
# Many results expected. Restrict by postcode.
|
||||
sql = sql.where(sa.select(tpc.c.postcode)
|
||||
.where(tpc.c.postcode.in_(pcs))
|
||||
.where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
|
||||
.where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
|
||||
.exists())
|
||||
|
||||
# Less results, only have a preference for close postcodes
|
||||
@@ -619,31 +668,30 @@ class PlaceSearch(AbstractSearch):
|
||||
.where(tpc.c.postcode.in_(pcs))\
|
||||
.scalar_subquery()
|
||||
penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
|
||||
else_=sa.func.coalesce(pc_near, 2.0))
|
||||
else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
|
||||
|
||||
if details.viewbox is not None:
|
||||
if details.bounded_viewbox:
|
||||
if details.viewbox.area < 0.2:
|
||||
sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
|
||||
else:
|
||||
sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
|
||||
elif self.expected_count >= 10000:
|
||||
if details.viewbox.area < 0.5:
|
||||
sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
|
||||
else:
|
||||
sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.intersects(VIEWBOX_PARAM,
|
||||
use_index=details.viewbox.area < 0.2))
|
||||
elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.intersects(VIEWBOX2_PARAM,
|
||||
use_index=details.viewbox.area < 0.5))
|
||||
else:
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
|
||||
else_=2.0)
|
||||
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
|
||||
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
|
||||
else_=1.0)
|
||||
|
||||
if details.near is not None:
|
||||
if details.near_radius is not None:
|
||||
if details.near_radius < 0.1:
|
||||
sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
|
||||
sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
else:
|
||||
sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
|
||||
NEAR_RADIUS_PARAM))
|
||||
sql = sql.where(tsearch.c.centroid
|
||||
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
|
||||
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
|
||||
.label('importance'))
|
||||
sql = sql.order_by(sa.desc(sa.text('importance')))
|
||||
@@ -652,7 +700,7 @@ class PlaceSearch(AbstractSearch):
|
||||
or (details.viewbox is not None and details.viewbox.area < 0.5):
|
||||
sql = sql.order_by(
|
||||
penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
|
||||
else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
|
||||
else_=0.40001-(sa.cast(tsearch.c.search_rank, sa.Float())/75)))
|
||||
sql = sql.add_columns(t.c.importance)
|
||||
|
||||
|
||||
@@ -662,10 +710,10 @@ class PlaceSearch(AbstractSearch):
|
||||
sql = sql.order_by(sa.text('accuracy'))
|
||||
|
||||
if self.housenumbers:
|
||||
hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
|
||||
hnr_list = '|'.join(self.housenumbers.values)
|
||||
sql = sql.where(tsearch.c.address_rank.between(16, 30))\
|
||||
.where(sa.or_(tsearch.c.address_rank < 30,
|
||||
t.c.housenumber.op('~*')(hnr_regexp)))
|
||||
sa.func.RegexpWord(hnr_list, t.c.housenumber)))
|
||||
|
||||
# Cross check for housenumbers, need to do that on a rather large
|
||||
# set. Worst case there are 40.000 main streets in OSM.
|
||||
@@ -673,10 +721,10 @@ class PlaceSearch(AbstractSearch):
|
||||
|
||||
# Housenumbers from placex
|
||||
thnr = conn.t.placex.alias('hnr')
|
||||
pid_list = array_agg(thnr.c.place_id) # type: ignore[no-untyped-call]
|
||||
pid_list = sa.func.ArrayAgg(thnr.c.place_id)
|
||||
place_sql = sa.select(pid_list)\
|
||||
.where(thnr.c.parent_place_id == inner.c.place_id)\
|
||||
.where(thnr.c.housenumber.op('~*')(hnr_regexp))\
|
||||
.where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
|
||||
.where(thnr.c.linked_place_id == None)\
|
||||
.where(thnr.c.indexed_status == 0)
|
||||
|
||||
@@ -736,9 +784,6 @@ class PlaceSearch(AbstractSearch):
|
||||
assert result
|
||||
result.bbox = Bbox.from_wkb(row.bbox)
|
||||
result.accuracy = row.accuracy
|
||||
if not details.excluded or not result.place_id in details.excluded:
|
||||
results.append(result)
|
||||
|
||||
if self.housenumbers and row.rank_address < 30:
|
||||
if row.placex_hnr:
|
||||
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
|
||||
@@ -758,6 +803,14 @@ class PlaceSearch(AbstractSearch):
|
||||
sub.accuracy += 0.6
|
||||
results.append(sub)
|
||||
|
||||
result.accuracy += 1.0 # penalty for missing housenumber
|
||||
# Only add the street as a result, if it meets all other
|
||||
# filter conditions.
|
||||
if (not details.excluded or result.place_id not in details.excluded)\
|
||||
and (not self.qualifiers or result.category in self.qualifiers.values)\
|
||||
and result.rank_address >= details.min_rank:
|
||||
result.accuracy += 1.0 # penalty for missing housenumber
|
||||
results.append(result)
|
||||
else:
|
||||
results.append(result)
|
||||
|
||||
return results
|
||||
|
||||
@@ -7,13 +7,15 @@
|
||||
"""
|
||||
Public interface to the search code.
|
||||
"""
|
||||
from typing import List, Any, Optional, Iterator, Tuple
|
||||
from typing import List, Any, Optional, Iterator, Tuple, Dict
|
||||
import itertools
|
||||
import re
|
||||
import datetime as dt
|
||||
import difflib
|
||||
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.types import SearchDetails
|
||||
from nominatim.api.results import SearchResults, add_result_details
|
||||
from nominatim.api.results import SearchResult, SearchResults, add_result_details
|
||||
from nominatim.api.search.token_assignment import yield_token_assignments
|
||||
from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
|
||||
from nominatim.api.search.db_searches import AbstractSearch
|
||||
@@ -62,7 +64,7 @@ class ForwardGeocoder:
|
||||
log().table_dump('Searches for assignment',
|
||||
_dump_searches(searches, query, num_searches))
|
||||
num_searches = len(searches)
|
||||
searches.sort(key=lambda s: s.penalty)
|
||||
searches.sort(key=lambda s: (s.penalty, s.SEARCH_PRIO))
|
||||
|
||||
return query, searches
|
||||
|
||||
@@ -73,42 +75,99 @@ class ForwardGeocoder:
|
||||
is found.
|
||||
"""
|
||||
log().section('Execute database searches')
|
||||
results = SearchResults()
|
||||
results: Dict[Any, SearchResult] = {}
|
||||
|
||||
end_time = dt.datetime.now() + self.timeout
|
||||
|
||||
num_results = 0
|
||||
min_ranking = 1000.0
|
||||
min_ranking = searches[0].penalty + 2.0
|
||||
prev_penalty = 0.0
|
||||
for i, search in enumerate(searches):
|
||||
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
|
||||
break
|
||||
log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
|
||||
for result in await search.lookup(self.conn, self.params):
|
||||
results.append(result)
|
||||
min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
|
||||
log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
|
||||
num_results = len(results)
|
||||
log().var_dump('Params', self.params)
|
||||
lookup_results = await search.lookup(self.conn, self.params)
|
||||
for result in lookup_results:
|
||||
rhash = (result.source_table, result.place_id,
|
||||
result.housenumber, result.country_code)
|
||||
prevresult = results.get(rhash)
|
||||
if prevresult:
|
||||
prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
|
||||
else:
|
||||
results[rhash] = result
|
||||
min_ranking = min(min_ranking, result.accuracy * 1.2)
|
||||
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
|
||||
prev_penalty = search.penalty
|
||||
if dt.datetime.now() >= end_time:
|
||||
break
|
||||
|
||||
if results:
|
||||
min_ranking = min(r.ranking for r in results)
|
||||
results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
|
||||
return SearchResults(results.values())
|
||||
|
||||
if results:
|
||||
min_rank = min(r.rank_search for r in results)
|
||||
|
||||
def pre_filter_results(self, results: SearchResults) -> SearchResults:
|
||||
""" Remove results that are significantly worse than the
|
||||
best match.
|
||||
"""
|
||||
if results:
|
||||
max_ranking = min(r.ranking for r in results) + 0.5
|
||||
results = SearchResults(r for r in results if r.ranking < max_ranking)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
|
||||
""" Remove badly matching results, sort by ranking and
|
||||
limit to the configured number of results.
|
||||
"""
|
||||
if results:
|
||||
results.sort(key=lambda r: r.ranking)
|
||||
min_rank = results[0].rank_search
|
||||
min_ranking = results[0].ranking
|
||||
results = SearchResults(r for r in results
|
||||
if r.ranking + 0.05 * (r.rank_search - min_rank)
|
||||
if r.ranking + 0.03 * (r.rank_search - min_rank)
|
||||
< min_ranking + 0.5)
|
||||
|
||||
results.sort(key=lambda r: r.accuracy - r.calculated_importance())
|
||||
results = SearchResults(results[:self.limit])
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
|
||||
""" Adjust the accuracy of the localized result according to how well
|
||||
they match the original query.
|
||||
"""
|
||||
assert self.query_analyzer is not None
|
||||
qwords = [word for phrase in query.source
|
||||
for word in re.split('[, ]+', phrase.text) if word]
|
||||
if not qwords:
|
||||
return
|
||||
|
||||
for result in results:
|
||||
# Negative importance indicates ordering by distance, which is
|
||||
# more important than word matching.
|
||||
if not result.display_name\
|
||||
or (result.importance is not None and result.importance < 0):
|
||||
continue
|
||||
distance = 0.0
|
||||
norm = self.query_analyzer.normalize_text(' '.join((result.display_name,
|
||||
result.country_code or '')))
|
||||
words = set((w for w in norm.split(' ') if w))
|
||||
if not words:
|
||||
continue
|
||||
for qword in qwords:
|
||||
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
|
||||
if wdist < 0.5:
|
||||
distance += len(qword)
|
||||
else:
|
||||
distance += (1.0 - wdist) * len(qword)
|
||||
# Compensate for the fact that country names do not get a
|
||||
# match penalty yet by the tokenizer.
|
||||
# Temporary hack that needs to be removed!
|
||||
if result.rank_address == 4:
|
||||
distance *= 2
|
||||
result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
|
||||
|
||||
|
||||
async def lookup_pois(self, categories: List[Tuple[str, str]],
|
||||
phrases: List[Phrase]) -> SearchResults:
|
||||
""" Look up places by category. If phrase is given, a place search
|
||||
@@ -123,13 +182,17 @@ class ForwardGeocoder:
|
||||
if query:
|
||||
searches = [wrap_near_search(categories, s) for s in searches[:50]]
|
||||
results = await self.execute_searches(query, searches)
|
||||
results = self.pre_filter_results(results)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
|
||||
results = self.sort_and_cut_results(results)
|
||||
else:
|
||||
results = SearchResults()
|
||||
else:
|
||||
search = build_poi_search(categories, self.params.countries)
|
||||
results = await search.lookup(self.conn, self.params)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
|
||||
|
||||
return results
|
||||
@@ -149,7 +212,12 @@ class ForwardGeocoder:
|
||||
if searches:
|
||||
# Execute SQL until an appropriate result is found.
|
||||
results = await self.execute_searches(query, searches[:50])
|
||||
results = self.pre_filter_results(results)
|
||||
await add_result_details(self.conn, results, self.params)
|
||||
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
|
||||
self.rerank_by_query(query, results)
|
||||
log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
|
||||
results = self.sort_and_cut_results(results)
|
||||
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
|
||||
|
||||
return results
|
||||
|
||||
@@ -8,7 +8,6 @@
|
||||
Implementation of query analysis for the ICU tokenizer.
|
||||
"""
|
||||
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
|
||||
from copy import copy
|
||||
from collections import defaultdict
|
||||
import dataclasses
|
||||
import difflib
|
||||
@@ -22,6 +21,7 @@ from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.search import query as qmod
|
||||
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
|
||||
from nominatim.db.sqlalchemy_types import Json
|
||||
|
||||
|
||||
DB_TO_TOKEN_TYPE = {
|
||||
@@ -101,10 +101,16 @@ class ICUToken(qmod.Token):
|
||||
penalty = 0.0
|
||||
if row.type == 'w':
|
||||
penalty = 0.3
|
||||
elif row.type == 'W':
|
||||
if len(row.word_token) == 1 and row.word_token == row.word:
|
||||
penalty = 0.2 if row.word.isdigit() else 0.3
|
||||
elif row.type == 'H':
|
||||
penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
|
||||
if all(not c.isdigit() for c in row.word_token):
|
||||
penalty += 0.2 * (len(row.word_token) - 1)
|
||||
elif row.type == 'C':
|
||||
if len(row.word_token) == 1:
|
||||
penalty = 0.3
|
||||
|
||||
if row.info is None:
|
||||
lookup_word = row.word
|
||||
@@ -153,7 +159,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
sa.Column('word_token', sa.Text, nullable=False),
|
||||
sa.Column('type', sa.Text, nullable=False),
|
||||
sa.Column('word', sa.Text),
|
||||
sa.Column('info', self.conn.t.types.Json))
|
||||
sa.Column('info', Json))
|
||||
|
||||
|
||||
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
|
||||
@@ -178,13 +184,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
if row.type == 'S':
|
||||
if row.info['op'] in ('in', 'near'):
|
||||
if trange.start == 0:
|
||||
query.add_token(trange, qmod.TokenType.CATEGORY, token)
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
else:
|
||||
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
|
||||
if trange.start == 0 or trange.end == query.num_token_slots():
|
||||
token = copy(token)
|
||||
token.penalty += 0.1 * (query.num_token_slots())
|
||||
query.add_token(trange, qmod.TokenType.CATEGORY, token)
|
||||
if trange.start == 0 and trange.end == query.num_token_slots():
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
else:
|
||||
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
|
||||
else:
|
||||
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ class LegacyToken(qmod.Token):
|
||||
|
||||
@property
|
||||
def info(self) -> Dict[str, Any]:
|
||||
""" Dictionary of additional propoerties of the token.
|
||||
""" Dictionary of additional properties of the token.
|
||||
Should only be used for debugging purposes.
|
||||
"""
|
||||
return {'category': self.category,
|
||||
@@ -107,15 +107,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
for row in await self.lookup_in_db(lookup_words):
|
||||
for trange in words[row.word_token.strip()]:
|
||||
token, ttype = self.make_token(row)
|
||||
if ttype == qmod.TokenType.CATEGORY:
|
||||
if ttype == qmod.TokenType.NEAR_ITEM:
|
||||
if trange.start == 0:
|
||||
query.add_token(trange, qmod.TokenType.CATEGORY, token)
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
elif ttype == qmod.TokenType.QUALIFIER:
|
||||
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
|
||||
if trange.start == 0 or trange.end == query.num_token_slots():
|
||||
token = copy(token)
|
||||
token.penalty += 0.1 * (query.num_token_slots())
|
||||
query.add_token(trange, qmod.TokenType.CATEGORY, token)
|
||||
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
|
||||
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
|
||||
query.add_token(trange, ttype, token)
|
||||
|
||||
@@ -127,6 +127,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
return query
|
||||
|
||||
|
||||
def normalize_text(self, text: str) -> str:
|
||||
""" Bring the given text into a normalized form.
|
||||
|
||||
This only removes case, so some difference with the normalization
|
||||
in the phrase remains.
|
||||
"""
|
||||
return text.lower()
|
||||
|
||||
|
||||
def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
|
||||
Dict[str, List[qmod.TokenRange]]]:
|
||||
""" Transliterate the phrases and split them into tokens.
|
||||
@@ -186,7 +195,7 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
ttype = qmod.TokenType.POSTCODE
|
||||
lookup_word = row.word_token[1:]
|
||||
else:
|
||||
ttype = qmod.TokenType.CATEGORY if row.operator in ('in', 'near')\
|
||||
ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
|
||||
else qmod.TokenType.QUALIFIER
|
||||
lookup_word = row.word
|
||||
elif row.word_token.startswith(' '):
|
||||
|
||||
@@ -46,7 +46,7 @@ class TokenType(enum.Enum):
|
||||
""" Country name or reference. """
|
||||
QUALIFIER = enum.auto()
|
||||
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
|
||||
CATEGORY = enum.auto()
|
||||
NEAR_ITEM = enum.auto()
|
||||
""" Special term used as searchable object(e.g. supermarket in ...). """
|
||||
|
||||
|
||||
@@ -70,14 +70,16 @@ class PhraseType(enum.Enum):
|
||||
COUNTRY = enum.auto()
|
||||
""" Contains the country name or code. """
|
||||
|
||||
def compatible_with(self, ttype: TokenType) -> bool:
|
||||
def compatible_with(self, ttype: TokenType,
|
||||
is_full_phrase: bool) -> bool:
|
||||
""" Check if the given token type can be used with the phrase type.
|
||||
"""
|
||||
if self == PhraseType.NONE:
|
||||
return True
|
||||
return not is_full_phrase or ttype != TokenType.QUALIFIER
|
||||
if self == PhraseType.AMENITY:
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL,
|
||||
TokenType.QUALIFIER, TokenType.CATEGORY)
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL)\
|
||||
or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
|
||||
or (not is_full_phrase and ttype == TokenType.QUALIFIER)
|
||||
if self == PhraseType.STREET:
|
||||
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
|
||||
if self == PhraseType.POSTCODE:
|
||||
@@ -167,7 +169,7 @@ class TokenList:
|
||||
|
||||
@dataclasses.dataclass
|
||||
class QueryNode:
|
||||
""" A node of the querry representing a break between terms.
|
||||
""" A node of the query representing a break between terms.
|
||||
"""
|
||||
btype: BreakType
|
||||
ptype: PhraseType
|
||||
@@ -244,7 +246,9 @@ class QueryStruct:
|
||||
be added to, then the token is silently dropped.
|
||||
"""
|
||||
snode = self.nodes[trange.start]
|
||||
if snode.ptype.compatible_with(ttype):
|
||||
full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
|
||||
and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
|
||||
if snode.ptype.compatible_with(ttype, full_phrase):
|
||||
tlist = snode.get_tokens(trange.end, ttype)
|
||||
if tlist is None:
|
||||
snode.starting.append(TokenList(trange.end, ttype, [token]))
|
||||
|
||||
@@ -19,7 +19,7 @@ if TYPE_CHECKING:
|
||||
from nominatim.api.search.query import Phrase, QueryStruct
|
||||
|
||||
class AbstractQueryAnalyzer(ABC):
|
||||
""" Class for analysing incomming queries.
|
||||
""" Class for analysing incoming queries.
|
||||
|
||||
Query analyzers are tied to the tokenizer used on import.
|
||||
"""
|
||||
@@ -30,6 +30,15 @@ class AbstractQueryAnalyzer(ABC):
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def normalize_text(self, text: str) -> str:
|
||||
""" Bring the given text into a normalized form. That is the
|
||||
standardized form search will work with. All information removed
|
||||
at this stage is inevitably lost.
|
||||
"""
|
||||
|
||||
|
||||
|
||||
async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||
""" Create a query analyzer for the tokenizer used by the database.
|
||||
"""
|
||||
|
||||
@@ -46,7 +46,7 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
|
||||
housenumber: Optional[qmod.TokenRange] = None
|
||||
postcode: Optional[qmod.TokenRange] = None
|
||||
country: Optional[qmod.TokenRange] = None
|
||||
category: Optional[qmod.TokenRange] = None
|
||||
near_item: Optional[qmod.TokenRange] = None
|
||||
qualifier: Optional[qmod.TokenRange] = None
|
||||
|
||||
|
||||
@@ -64,15 +64,15 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
|
||||
out.postcode = token.trange
|
||||
elif token.ttype == qmod.TokenType.COUNTRY:
|
||||
out.country = token.trange
|
||||
elif token.ttype == qmod.TokenType.CATEGORY:
|
||||
out.category = token.trange
|
||||
elif token.ttype == qmod.TokenType.NEAR_ITEM:
|
||||
out.near_item = token.trange
|
||||
elif token.ttype == qmod.TokenType.QUALIFIER:
|
||||
out.qualifier = token.trange
|
||||
return out
|
||||
|
||||
|
||||
class _TokenSequence:
|
||||
""" Working state used to put together the token assignements.
|
||||
""" Working state used to put together the token assignments.
|
||||
|
||||
Represents an intermediate state while traversing the tokenized
|
||||
query.
|
||||
@@ -109,7 +109,7 @@ class _TokenSequence:
|
||||
"""
|
||||
# Country and category must be the final term for left-to-right
|
||||
return len(self.seq) > 1 and \
|
||||
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY)
|
||||
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
|
||||
|
||||
|
||||
def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
|
||||
@@ -132,6 +132,11 @@ class _TokenSequence:
|
||||
|
||||
# Name tokens are always acceptable and don't change direction
|
||||
if ttype == qmod.TokenType.PARTIAL:
|
||||
# qualifiers cannot appear in the middle of the query. They need
|
||||
# to be near the next phrase.
|
||||
if self.direction == -1 \
|
||||
and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]):
|
||||
return None
|
||||
return self.direction
|
||||
|
||||
# Other tokens may only appear once
|
||||
@@ -165,22 +170,22 @@ class _TokenSequence:
|
||||
if ttype == qmod.TokenType.COUNTRY:
|
||||
return None if self.direction == -1 else 1
|
||||
|
||||
if ttype == qmod.TokenType.CATEGORY:
|
||||
if ttype == qmod.TokenType.NEAR_ITEM:
|
||||
return self.direction
|
||||
|
||||
if ttype == qmod.TokenType.QUALIFIER:
|
||||
if self.direction == 1:
|
||||
if (len(self.seq) == 1
|
||||
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \
|
||||
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
|
||||
or (len(self.seq) == 2
|
||||
and self.seq[0].ttype == qmod.TokenType.CATEGORY
|
||||
and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
|
||||
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
|
||||
return 1
|
||||
return None
|
||||
if self.direction == -1:
|
||||
return -1
|
||||
|
||||
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq
|
||||
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
|
||||
if len(tempseq) == 0:
|
||||
return 1
|
||||
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
|
||||
@@ -233,10 +238,10 @@ class _TokenSequence:
|
||||
|
||||
def recheck_sequence(self) -> bool:
|
||||
""" Check that the sequence is a fully valid token assignment
|
||||
and addapt direction and penalties further if necessary.
|
||||
and adapt direction and penalties further if necessary.
|
||||
|
||||
This function catches some impossible assignments that need
|
||||
forward context and can therefore not be exluded when building
|
||||
forward context and can therefore not be excluded when building
|
||||
the assignment.
|
||||
"""
|
||||
# housenumbers may not be further than 2 words from the beginning.
|
||||
@@ -253,7 +258,7 @@ class _TokenSequence:
|
||||
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
|
||||
if not self._adapt_penalty_from_priors(priors, 1):
|
||||
return False
|
||||
if any(t.ttype == qmod.TokenType.CATEGORY for t in self.seq):
|
||||
if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
|
||||
self.penalty += 1.0
|
||||
|
||||
return True
|
||||
@@ -272,10 +277,10 @@ class _TokenSequence:
|
||||
# <address>,<postcode> should give preference to address search
|
||||
if base.postcode.start == 0:
|
||||
penalty = self.penalty
|
||||
self.direction = -1 # name searches are only possbile backwards
|
||||
self.direction = -1 # name searches are only possible backwards
|
||||
else:
|
||||
penalty = self.penalty + 0.1
|
||||
self.direction = 1 # name searches are only possbile forwards
|
||||
self.direction = 1 # name searches are only possible forwards
|
||||
yield dataclasses.replace(base, penalty=penalty)
|
||||
|
||||
|
||||
@@ -368,7 +373,7 @@ class _TokenSequence:
|
||||
|
||||
# Postcode or country-only search
|
||||
if not base.address:
|
||||
if not base.housenumber and (base.postcode or base.country or base.category):
|
||||
if not base.housenumber and (base.postcode or base.country or base.near_item):
|
||||
log().comment('postcode/country search')
|
||||
yield dataclasses.replace(base, penalty=self.penalty)
|
||||
else:
|
||||
@@ -385,7 +390,7 @@ class _TokenSequence:
|
||||
yield from self._get_assignments_address_backward(base, query)
|
||||
|
||||
# variant for special housenumber searches
|
||||
if base.housenumber:
|
||||
if base.housenumber and not base.qualifier:
|
||||
yield dataclasses.replace(base, penalty=self.penalty)
|
||||
|
||||
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Classes and function releated to status call.
|
||||
Classes and function related to status call.
|
||||
"""
|
||||
from typing import Optional
|
||||
import datetime as dt
|
||||
@@ -36,6 +36,12 @@ async def get_status(conn: SearchConnection) -> StatusResult:
|
||||
sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
|
||||
status.data_updated = await conn.scalar(sql)
|
||||
|
||||
if status.data_updated is not None:
|
||||
if status.data_updated.tzinfo is None:
|
||||
status.data_updated = status.data_updated.replace(tzinfo=dt.timezone.utc)
|
||||
else:
|
||||
status.data_updated = status.data_updated.astimezone(dt.timezone.utc)
|
||||
|
||||
# Database version
|
||||
try:
|
||||
verstr = await conn.get_property('database_version')
|
||||
|
||||
@@ -17,6 +17,7 @@ from struct import unpack
|
||||
from binascii import unhexlify
|
||||
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.api.localization import Locales
|
||||
|
||||
# pylint: disable=no-member,too-many-boolean-expressions,too-many-instance-attributes
|
||||
|
||||
@@ -315,7 +316,7 @@ class DataLayer(enum.Flag):
|
||||
for reverse and forward search.
|
||||
"""
|
||||
ADDRESS = enum.auto()
|
||||
""" The address layer contains all places relavant for addresses:
|
||||
""" The address layer contains all places relevant for addresses:
|
||||
fully qualified addresses with a house number (or a house name equivalent,
|
||||
for some addresses) and places that can be part of an address like
|
||||
roads, cities, states.
|
||||
@@ -386,7 +387,7 @@ TParam = TypeVar('TParam', bound='LookupDetails') # pylint: disable=invalid-name
|
||||
|
||||
@dataclasses.dataclass
|
||||
class LookupDetails:
|
||||
""" Collection of parameters that define the amount of details
|
||||
""" Collection of parameters that define which kind of details are
|
||||
returned with a lookup or details result.
|
||||
"""
|
||||
geometry_output: GeometryFormat = GeometryFormat.NONE
|
||||
@@ -413,6 +414,9 @@ class LookupDetails:
|
||||
0.0 means the original geometry is kept. The higher the value, the
|
||||
more the geometry gets simplified.
|
||||
"""
|
||||
locales: Locales = Locales()
|
||||
""" Preferred languages for localization of results.
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam:
|
||||
@@ -534,11 +538,13 @@ class SearchDetails(LookupDetails):
|
||||
or (self.bounded_viewbox
|
||||
and self.viewbox is not None and self.near is not None
|
||||
and self.viewbox.contains(self.near))
|
||||
or self.layers is not None and not self.layers)
|
||||
or (self.layers is not None and not self.layers)
|
||||
or (self.max_rank <= 4 and
|
||||
self.layers is not None and not self.layers & DataLayer.ADDRESS))
|
||||
|
||||
|
||||
def layer_enabled(self, layer: DataLayer) -> bool:
|
||||
""" Check if the given layer has been choosen. Also returns
|
||||
""" Check if the given layer has been chosen. Also returns
|
||||
true when layer restriction has been disabled completely.
|
||||
"""
|
||||
return self.layers is None or bool(self.layers & layer)
|
||||
|
||||
@@ -5,7 +5,7 @@
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Hard-coded information about tag catagories.
|
||||
Hard-coded information about tag categories.
|
||||
|
||||
These tables have been copied verbatim from the old PHP code. For future
|
||||
version a more flexible formatting is required.
|
||||
@@ -44,7 +44,7 @@ def get_label_tag(category: Tuple[str, str], extratags: Optional[Mapping[str, st
|
||||
def bbox_from_result(result: Union[napi.ReverseResult, napi.SearchResult]) -> napi.Bbox:
|
||||
""" Compute a bounding box for the result. For ways and relations
|
||||
a given boundingbox is used. For all other object, a box is computed
|
||||
around the centroid according to dimensions dereived from the
|
||||
around the centroid according to dimensions derived from the
|
||||
search rank.
|
||||
"""
|
||||
if (result.osm_object and result.osm_object[0] == 'N') or result.bbox is None:
|
||||
|
||||
@@ -247,7 +247,8 @@ def format_base_geocodejson(results: Union[napi.ReverseResults, napi.SearchResul
|
||||
out.key('admin').start_object()
|
||||
if result.address_rows:
|
||||
for line in result.address_rows:
|
||||
if line.isaddress and (line.admin_level or 15) < 15 and line.local_name:
|
||||
if line.isaddress and (line.admin_level or 15) < 15 and line.local_name \
|
||||
and line.category[0] == 'boundary' and line.category[1] == 'administrative':
|
||||
out.keyval(f"level{line.admin_level}", line.local_name)
|
||||
out.end_object().next()
|
||||
|
||||
|
||||
@@ -37,7 +37,7 @@ def zoom_to_rank(zoom: int) -> int:
|
||||
return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
|
||||
|
||||
|
||||
FEATURE_TYPE_TO_RANK: Dict[Optional[str], Any] = {
|
||||
FEATURE_TYPE_TO_RANK: Dict[Optional[str], Tuple[int, int]] = {
|
||||
'country': (4, 4),
|
||||
'state': (8, 8),
|
||||
'city': (14, 16),
|
||||
@@ -155,7 +155,7 @@ COORD_REGEX = [re.compile(r'(?:(?P<pre>.*?)\s+)??' + r + r'(?:\s+(?P<post>.*))?'
|
||||
)]
|
||||
|
||||
def extract_coords_from_query(query: str) -> Tuple[str, Optional[float], Optional[float]]:
|
||||
""" Look for something that is formated like a coordinate at the
|
||||
""" Look for something that is formatted like a coordinate at the
|
||||
beginning or end of the query. If found, extract the coordinate and
|
||||
return the remaining query (or the empty string if the query
|
||||
consisted of nothing but a coordinate).
|
||||
|
||||
@@ -240,7 +240,7 @@ class ASGIAdaptor(abc.ABC):
|
||||
|
||||
|
||||
def parse_geometry_details(self, fmt: str) -> Dict[str, Any]:
|
||||
""" Create details strucutre from the supplied geometry parameters.
|
||||
""" Create details structure from the supplied geometry parameters.
|
||||
"""
|
||||
numgeoms = 0
|
||||
output = napi.GeometryFormat.NONE
|
||||
@@ -308,7 +308,8 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
|
||||
keywords=params.get_bool('keywords', False),
|
||||
geometry_output = napi.GeometryFormat.GEOJSON
|
||||
if params.get_bool('polygon_geojson', False)
|
||||
else napi.GeometryFormat.NONE
|
||||
else napi.GeometryFormat.NONE,
|
||||
locales=locales
|
||||
)
|
||||
|
||||
if debug:
|
||||
@@ -317,8 +318,6 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
|
||||
if result is None:
|
||||
params.raise_error('No place with that OSM ID found.', status=404)
|
||||
|
||||
result.localize(locales)
|
||||
|
||||
output = formatting.format_result(result, fmt,
|
||||
{'locales': locales,
|
||||
'group_hierarchy': params.get_bool('group_hierarchy', False),
|
||||
@@ -337,6 +336,7 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
|
||||
details = params.parse_geometry_details(fmt)
|
||||
details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18))
|
||||
details['layers'] = params.get_layers()
|
||||
details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
|
||||
|
||||
result = await api.reverse(coord, **details)
|
||||
|
||||
@@ -357,9 +357,6 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
|
||||
'namedetails': params.get_bool('namedetails', False),
|
||||
'addressdetails': params.get_bool('addressdetails', True)}
|
||||
|
||||
if result:
|
||||
result.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
|
||||
|
||||
output = formatting.format_result(napi.ReverseResults([result] if result else []),
|
||||
fmt, fmt_options)
|
||||
|
||||
@@ -372,6 +369,7 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
|
||||
fmt = params.parse_format(napi.SearchResults, 'xml')
|
||||
debug = params.setup_debugging()
|
||||
details = params.parse_geometry_details(fmt)
|
||||
details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
|
||||
|
||||
places = []
|
||||
for oid in (params.get('osm_ids') or '').split(','):
|
||||
@@ -394,8 +392,6 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
|
||||
'namedetails': params.get_bool('namedetails', False),
|
||||
'addressdetails': params.get_bool('addressdetails', True)}
|
||||
|
||||
results.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
|
||||
|
||||
output = formatting.format_result(results, fmt, fmt_options)
|
||||
|
||||
return params.build_response(output, num_results=len(results))
|
||||
@@ -456,6 +452,8 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
|
||||
else:
|
||||
details['layers'] = params.get_layers()
|
||||
|
||||
details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
|
||||
|
||||
# unstructured query parameters
|
||||
query = params.get('q', None)
|
||||
# structured query parameters
|
||||
@@ -480,8 +478,6 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
|
||||
except UsageError as err:
|
||||
params.raise_error(str(err))
|
||||
|
||||
results.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
|
||||
|
||||
if details['dedupe'] and len(results) > 1:
|
||||
results = helpers.deduplicate_results(results, max_results)
|
||||
|
||||
@@ -535,7 +531,7 @@ async def deletable_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -
|
||||
async def polygons_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
|
||||
""" Server glue for /polygons endpoint.
|
||||
This is a special endpoint that shows polygons that have changed
|
||||
thier size but are kept in the Nominatim database with their
|
||||
their size but are kept in the Nominatim database with their
|
||||
old area to minimize disruption.
|
||||
"""
|
||||
fmt = params.parse_format(RawDataList, 'json')
|
||||
|
||||
@@ -159,13 +159,15 @@ class AdminServe:
|
||||
group = parser.add_argument_group('Server arguments')
|
||||
group.add_argument('--server', default='127.0.0.1:8088',
|
||||
help='The address the server will listen to.')
|
||||
group.add_argument('--engine', default='php',
|
||||
group.add_argument('--engine', default='falcon',
|
||||
choices=('php', 'falcon', 'starlette'),
|
||||
help='Webserver framework to run. (default: php)')
|
||||
help='Webserver framework to run. (default: falcon)')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
if args.engine == 'php':
|
||||
if args.config.lib_dir.php is None:
|
||||
raise UsageError("PHP frontend not configured.")
|
||||
run_php_server(args.server, args.project_dir / 'website')
|
||||
else:
|
||||
import uvicorn # pylint: disable=import-outside-toplevel
|
||||
@@ -206,6 +208,7 @@ def get_set_parser() -> CommandlineParser:
|
||||
parser.add_subcommand('admin', clicmd.AdminFuncs())
|
||||
|
||||
parser.add_subcommand('export', clicmd.QueryExport())
|
||||
parser.add_subcommand('convert', clicmd.ConvertDB())
|
||||
parser.add_subcommand('serve', AdminServe())
|
||||
|
||||
parser.add_subcommand('search', clicmd.APISearch())
|
||||
|
||||
@@ -25,3 +25,4 @@ from nominatim.clicmd.admin import AdminFuncs as AdminFuncs
|
||||
from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze
|
||||
from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
|
||||
from nominatim.clicmd.export import QueryExport as QueryExport
|
||||
from nominatim.clicmd.convert import ConvertDB as ConvertDB
|
||||
|
||||
@@ -41,6 +41,8 @@ class AdminFuncs:
|
||||
help='Print performance analysis of the indexing process')
|
||||
objs.add_argument('--collect-os-info', action="store_true",
|
||||
help="Generate a report about the host system information")
|
||||
objs.add_argument('--clean-deleted', action='store', metavar='AGE',
|
||||
help='Clean up deleted relations')
|
||||
group = parser.add_argument_group('Arguments for cache warming')
|
||||
group.add_argument('--search-only', action='store_const', dest='target',
|
||||
const='search',
|
||||
@@ -55,7 +57,9 @@ class AdminFuncs:
|
||||
mgroup.add_argument('--place-id', type=int,
|
||||
help='Analyse indexing of the given Nominatim object')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
# pylint: disable=too-many-return-statements
|
||||
if args.warm:
|
||||
return self._warm(args)
|
||||
|
||||
@@ -81,6 +85,12 @@ class AdminFuncs:
|
||||
collect_os_info.report_system_information(args.config)
|
||||
return 0
|
||||
|
||||
if args.clean_deleted:
|
||||
LOG.warning('Cleaning up deleted relations')
|
||||
from ..tools import admin
|
||||
admin.clean_deleted_relations(args.config, age=args.clean_deleted)
|
||||
return 0
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
@@ -90,17 +100,20 @@ class AdminFuncs:
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
try:
|
||||
if args.target != 'reverse':
|
||||
if args.target != 'search':
|
||||
for _ in range(1000):
|
||||
api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
|
||||
address_details=True)
|
||||
|
||||
if args.target != 'search':
|
||||
if args.target != 'reverse':
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
words = tokenizer.most_frequent_words(conn, 1000)
|
||||
if conn.table_exists('search_name'):
|
||||
words = tokenizer.most_frequent_words(conn, 1000)
|
||||
else:
|
||||
words = []
|
||||
|
||||
for word in words:
|
||||
api.search(word)
|
||||
|
||||
@@ -109,7 +109,8 @@ class APISearch:
|
||||
'countries': args.countrycodes,
|
||||
'excluded': args.exclude_place_ids,
|
||||
'viewbox': args.viewbox,
|
||||
'bounded_viewbox': args.bounded
|
||||
'bounded_viewbox': args.bounded,
|
||||
'locales': args.get_locales(api.config.DEFAULT_LANGUAGE)
|
||||
}
|
||||
|
||||
if args.query:
|
||||
@@ -124,9 +125,6 @@ class APISearch:
|
||||
country=args.country,
|
||||
**params)
|
||||
|
||||
for result in results:
|
||||
result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
|
||||
|
||||
if args.dedupe and len(results) > 1:
|
||||
results = deduplicate_results(results, args.limit)
|
||||
|
||||
@@ -187,14 +185,14 @@ class APIReverse:
|
||||
layers=args.get_layers(napi.DataLayer.ADDRESS | napi.DataLayer.POI),
|
||||
address_details=True, # needed for display name
|
||||
geometry_output=args.get_geometry_output(),
|
||||
geometry_simplification=args.polygon_threshold)
|
||||
geometry_simplification=args.polygon_threshold,
|
||||
locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
|
||||
|
||||
if args.format == 'debug':
|
||||
print(loglib.get_and_disable())
|
||||
return 0
|
||||
|
||||
if result:
|
||||
result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
|
||||
output = api_output.format_result(
|
||||
napi.ReverseResults([result]),
|
||||
args.format,
|
||||
@@ -249,10 +247,8 @@ class APILookup:
|
||||
results = api.lookup(places,
|
||||
address_details=True, # needed for display name
|
||||
geometry_output=args.get_geometry_output(),
|
||||
geometry_simplification=args.polygon_threshold or 0.0)
|
||||
|
||||
for result in results:
|
||||
result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
|
||||
geometry_simplification=args.polygon_threshold or 0.0,
|
||||
locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
|
||||
|
||||
output = api_output.format_result(
|
||||
results,
|
||||
@@ -326,6 +322,7 @@ class APIDetails:
|
||||
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
|
||||
result = api.details(place,
|
||||
address_details=args.addressdetails,
|
||||
linked_places=args.linkedplaces,
|
||||
@@ -333,13 +330,11 @@ class APIDetails:
|
||||
keywords=args.keywords,
|
||||
geometry_output=napi.GeometryFormat.GEOJSON
|
||||
if args.polygon_geojson
|
||||
else napi.GeometryFormat.NONE)
|
||||
else napi.GeometryFormat.NONE,
|
||||
locales=locales)
|
||||
|
||||
|
||||
if result:
|
||||
locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
|
||||
result.localize(locales)
|
||||
|
||||
output = api_output.format_result(
|
||||
result,
|
||||
'json',
|
||||
|
||||
@@ -72,6 +72,7 @@ class NominatimArgs:
|
||||
check_database: bool
|
||||
migrate: bool
|
||||
collect_os_info: bool
|
||||
clean_deleted: str
|
||||
analyse_indexing: bool
|
||||
target: Optional[str]
|
||||
osm_id: Optional[str]
|
||||
@@ -86,6 +87,7 @@ class NominatimArgs:
|
||||
offline: bool
|
||||
ignore_errors: bool
|
||||
index_noanalyse: bool
|
||||
prepare_database: bool
|
||||
|
||||
# Arguments to 'index'
|
||||
boundaries_only: bool
|
||||
@@ -100,6 +102,9 @@ class NominatimArgs:
|
||||
language: Optional[str]
|
||||
restrict_to_country: Optional[str]
|
||||
|
||||
# Arguments to 'convert'
|
||||
output: Path
|
||||
|
||||
# Arguments to 'refresh'
|
||||
postcodes: bool
|
||||
word_tokens: bool
|
||||
|
||||
95
nominatim/clicmd/convert.py
Normal file
95
nominatim/clicmd/convert.py
Normal file
@@ -0,0 +1,95 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Implementation of the 'convert' subcommand.
|
||||
"""
|
||||
from typing import Set, Any, Union, Optional, Sequence
|
||||
import argparse
|
||||
import asyncio
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim.clicmd.args import NominatimArgs
|
||||
from nominatim.errors import UsageError
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
# Using non-top-level imports to avoid eventually unused imports.
|
||||
# pylint: disable=E0012,C0415
|
||||
|
||||
class WithAction(argparse.Action):
|
||||
""" Special action that saves a list of flags, given on the command-line
|
||||
as `--with-foo` or `--without-foo`.
|
||||
"""
|
||||
def __init__(self, option_strings: Sequence[str], dest: Any,
|
||||
default: bool = True, **kwargs: Any) -> None:
|
||||
if 'nargs' in kwargs:
|
||||
raise ValueError("nargs not allowed.")
|
||||
if option_strings is None:
|
||||
raise ValueError("Positional parameter not allowed.")
|
||||
|
||||
self.dest_set = kwargs.pop('dest_set')
|
||||
full_option_strings = []
|
||||
for opt in option_strings:
|
||||
if not opt.startswith('--'):
|
||||
raise ValueError("short-form options not allowed")
|
||||
if default:
|
||||
self.dest_set.add(opt[2:])
|
||||
full_option_strings.append(f"--with-{opt[2:]}")
|
||||
full_option_strings.append(f"--without-{opt[2:]}")
|
||||
|
||||
super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
|
||||
|
||||
|
||||
def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
|
||||
values: Union[str, Sequence[Any], None],
|
||||
option_string: Optional[str] = None) -> None:
|
||||
assert option_string
|
||||
if option_string.startswith('--with-'):
|
||||
self.dest_set.add(option_string[7:])
|
||||
if option_string.startswith('--without-'):
|
||||
self.dest_set.discard(option_string[10:])
|
||||
|
||||
|
||||
class ConvertDB:
|
||||
""" Convert an existing database into a different format. (EXPERIMENTAL)
|
||||
|
||||
Dump a read-only version of the database in a different format.
|
||||
At the moment only a SQLite database suitable for reverse lookup
|
||||
can be created.
|
||||
"""
|
||||
|
||||
def __init__(self) -> None:
|
||||
self.options: Set[str] = set()
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
parser.add_argument('--format', default='sqlite',
|
||||
choices=('sqlite', ),
|
||||
help='Format of the output database (must be sqlite currently)')
|
||||
parser.add_argument('--output', '-o', required=True, type=Path,
|
||||
help='File to write the database to.')
|
||||
group = parser.add_argument_group('Switches to define database layout'
|
||||
'(currently no effect)')
|
||||
group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for reverse and lookup API'
|
||||
' (default: enabled)')
|
||||
group.add_argument('--search', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for search API (default: disabled)')
|
||||
group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
|
||||
help='Enable/disable support for details API (default: enabled)')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int:
|
||||
if args.output.exists():
|
||||
raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
|
||||
|
||||
if args.format == 'sqlite':
|
||||
from ..tools import convert_sqlite
|
||||
|
||||
asyncio.run(convert_sqlite.convert(args.project_dir, args.output, self.options))
|
||||
return 0
|
||||
|
||||
return 1
|
||||
@@ -102,7 +102,8 @@ async def export(args: NominatimArgs) -> int:
|
||||
async with api.begin() as conn, api.begin() as detail_conn:
|
||||
t = conn.t.placex
|
||||
|
||||
sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
sql = sa.select(t.c.place_id, t.c.parent_place_id,
|
||||
t.c.osm_type, t.c.osm_id, t.c.name,
|
||||
t.c.class_, t.c.type, t.c.admin_level,
|
||||
t.c.address, t.c.extratags,
|
||||
t.c.housenumber, t.c.postcode, t.c.country_code,
|
||||
@@ -153,17 +154,15 @@ async def dump_results(conn: napi.SearchConnection,
|
||||
results: List[ReverseResult],
|
||||
writer: 'csv.DictWriter[str]',
|
||||
lang: Optional[str]) -> None:
|
||||
await add_result_details(conn, results,
|
||||
LookupDetails(address_details=True))
|
||||
|
||||
|
||||
locale = napi.Locales([lang] if lang else None)
|
||||
await add_result_details(conn, results,
|
||||
LookupDetails(address_details=True, locales=locale))
|
||||
|
||||
|
||||
for result in results:
|
||||
data = {'placeid': result.place_id,
|
||||
'postcode': result.postcode}
|
||||
|
||||
result.localize(locale)
|
||||
for line in (result.address_rows or []):
|
||||
if line.isaddress and line.local_name:
|
||||
if line.category[1] == 'postcode':
|
||||
|
||||
@@ -110,7 +110,7 @@ class UpdateRefresh:
|
||||
|
||||
if args.word_counts:
|
||||
LOG.warning('Recompute word statistics')
|
||||
self._get_tokenizer(args.config).update_statistics()
|
||||
self._get_tokenizer(args.config).update_statistics(args.config)
|
||||
|
||||
if args.address_levels:
|
||||
LOG.warning('Updating address levels')
|
||||
@@ -128,7 +128,7 @@ class UpdateRefresh:
|
||||
LOG.warning('Import secondary importance raster data from %s', args.project_dir)
|
||||
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
|
||||
args.project_dir) > 0:
|
||||
LOG.fatal('FATAL: Cannot update sendary importance raster data')
|
||||
LOG.fatal('FATAL: Cannot update secondary importance raster data')
|
||||
return 1
|
||||
|
||||
if args.functions:
|
||||
@@ -141,10 +141,10 @@ class UpdateRefresh:
|
||||
if args.wiki_data:
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
|
||||
or args.project_dir)
|
||||
LOG.warning('Import wikipdia article importance from %s', data_path)
|
||||
LOG.warning('Import wikipedia article importance from %s', data_path)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.fatal('FATAL: Wikipedia importance dump file not found')
|
||||
LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
|
||||
return 1
|
||||
|
||||
# Attention: importance MUST come after wiki data import.
|
||||
|
||||
@@ -142,7 +142,7 @@ class UpdateReplication:
|
||||
if not args.do_index:
|
||||
LOG.fatal("Indexing cannot be disabled when running updates continuously.")
|
||||
raise UsageError("Bad argument '--no-index'.")
|
||||
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
||||
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, args.threads or 1)
|
||||
|
||||
@@ -39,14 +39,15 @@ class SetupAll:
|
||||
"""
|
||||
|
||||
def add_args(self, parser: argparse.ArgumentParser) -> None:
|
||||
group_name = parser.add_argument_group('Required arguments')
|
||||
group1 = group_name.add_mutually_exclusive_group(required=True)
|
||||
group1 = parser.add_argument_group('Required arguments')
|
||||
group1.add_argument('--osm-file', metavar='FILE', action='append',
|
||||
help='OSM file to be imported'
|
||||
' (repeat for importing multiple files)')
|
||||
' (repeat for importing multiple files)',
|
||||
default=None)
|
||||
group1.add_argument('--continue', dest='continue_at',
|
||||
choices=['load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted')
|
||||
choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
|
||||
help='Continue an import that was interrupted',
|
||||
default=None)
|
||||
group2 = parser.add_argument_group('Optional arguments')
|
||||
group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
|
||||
help='Size of cache to be used by osm2pgsql (in MB)')
|
||||
@@ -65,9 +66,11 @@ class SetupAll:
|
||||
help='Continue import even when errors in SQL are present')
|
||||
group3.add_argument('--index-noanalyse', action='store_true',
|
||||
help='Do not perform analyse operations during index (expert only)')
|
||||
group3.add_argument('--prepare-database', action='store_true',
|
||||
help='Create the database but do not import any data')
|
||||
|
||||
|
||||
def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements
|
||||
def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
|
||||
from ..data import country_info
|
||||
from ..tools import database_import, refresh, postcodes, freeze
|
||||
from ..indexer.indexer import Indexer
|
||||
@@ -76,43 +79,61 @@ class SetupAll:
|
||||
|
||||
country_info.setup_country_config(args.config)
|
||||
|
||||
if args.continue_at is None:
|
||||
if args.osm_file is None and args.continue_at is None and not args.prepare_database:
|
||||
raise UsageError("No input files (use --osm-file).")
|
||||
|
||||
if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
|
||||
raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
|
||||
|
||||
if args.continue_at is not None and args.prepare_database:
|
||||
raise UsageError(
|
||||
"Cannot use --continue and --prepare-database together."
|
||||
)
|
||||
|
||||
|
||||
if args.prepare_database or args.continue_at is None:
|
||||
LOG.warning('Creating database')
|
||||
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
|
||||
rouser=args.config.DATABASE_WEBUSER)
|
||||
if args.prepare_database:
|
||||
return 0
|
||||
|
||||
if args.continue_at in (None, 'import-from-file'):
|
||||
files = args.get_osm_file_list()
|
||||
if not files:
|
||||
raise UsageError("No input files (use --osm-file).")
|
||||
|
||||
LOG.warning('Creating database')
|
||||
database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
|
||||
rouser=args.config.DATABASE_WEBUSER)
|
||||
if args.continue_at in ('import-from-file', None):
|
||||
# Check if the correct plugins are installed
|
||||
database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
|
||||
LOG.warning('Setting up country tables')
|
||||
country_info.setup_country_tables(args.config.get_libpq_dsn(),
|
||||
args.config.lib_dir.data,
|
||||
args.no_partitions)
|
||||
|
||||
LOG.warning('Setting up country tables')
|
||||
country_info.setup_country_tables(args.config.get_libpq_dsn(),
|
||||
args.config.lib_dir.data,
|
||||
args.no_partitions)
|
||||
LOG.warning('Importing OSM data file')
|
||||
database_import.import_osm_data(files,
|
||||
args.osm2pgsql_options(0, 1),
|
||||
drop=args.no_updates,
|
||||
ignore_errors=args.ignore_errors)
|
||||
|
||||
LOG.warning('Importing OSM data file')
|
||||
database_import.import_osm_data(files,
|
||||
args.osm2pgsql_options(0, 1),
|
||||
drop=args.no_updates,
|
||||
ignore_errors=args.ignore_errors)
|
||||
LOG.warning('Importing wikipedia importance data')
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.error('Wikipedia importance dump file not found. '
|
||||
'Calculating importance values of locations will not '
|
||||
'use Wikipedia importance data.')
|
||||
|
||||
LOG.warning('Importing wikipedia importance data')
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.error('Wikipedia importance dump file not found. '
|
||||
'Calculating importance values of locations will not '
|
||||
'use Wikipedia importance data.')
|
||||
LOG.warning('Importing secondary importance raster data')
|
||||
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
|
||||
args.project_dir) != 0:
|
||||
LOG.error('Secondary importance file not imported. '
|
||||
'Falling back to default ranking.')
|
||||
|
||||
LOG.warning('Importing secondary importance raster data')
|
||||
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
|
||||
args.project_dir) != 0:
|
||||
LOG.error('Secondary importance file not imported. '
|
||||
'Falling back to default ranking.')
|
||||
self._setup_tables(args.config, args.reverse_only)
|
||||
|
||||
self._setup_tables(args.config, args.reverse_only)
|
||||
|
||||
if args.continue_at is None or args.continue_at == 'load-data':
|
||||
if args.continue_at in ('import-from-file', 'load-data', None):
|
||||
LOG.warning('Initialise tables')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
database_import.truncate_data_tables(conn)
|
||||
@@ -123,12 +144,13 @@ class SetupAll:
|
||||
LOG.warning("Setting up tokenizer")
|
||||
tokenizer = self._get_tokenizer(args.continue_at, args.config)
|
||||
|
||||
if args.continue_at is None or args.continue_at == 'load-data':
|
||||
if args.continue_at in ('import-from-file', 'load-data', None):
|
||||
LOG.warning('Calculate postcodes')
|
||||
postcodes.update_postcodes(args.config.get_libpq_dsn(),
|
||||
args.project_dir, tokenizer)
|
||||
|
||||
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
||||
if args.continue_at in \
|
||||
('import-from-file', 'load-data', 'indexing', None):
|
||||
LOG.warning('Indexing places')
|
||||
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
|
||||
indexer.index_full(analyse=not args.index_noanalyse)
|
||||
@@ -146,7 +168,7 @@ class SetupAll:
|
||||
tokenizer.finalize_import(args.config)
|
||||
|
||||
LOG.warning('Recompute word counts')
|
||||
tokenizer.update_statistics()
|
||||
tokenizer.update_statistics(args.config)
|
||||
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setup website at %s', webdir)
|
||||
@@ -185,7 +207,7 @@ class SetupAll:
|
||||
"""
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
if continue_at is None or continue_at == 'load-data':
|
||||
if continue_at in ('import-from-file', 'load-data', None):
|
||||
# (re)initialise the tokenizer data
|
||||
return tokenizer_factory.create_tokenizer(config)
|
||||
|
||||
@@ -197,12 +219,11 @@ class SetupAll:
|
||||
""" Determine the database date and set the status accordingly.
|
||||
"""
|
||||
with connect(dsn) as conn:
|
||||
if not offline:
|
||||
try:
|
||||
dbdate = status.compute_database_date(conn)
|
||||
status.set_status(conn, dbdate)
|
||||
LOG.info('Database is at %s.', dbdate)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
|
||||
properties.set_property(conn, 'database_version', str(NOMINATIM_VERSION))
|
||||
|
||||
try:
|
||||
dbdate = status.compute_database_date(conn, offline)
|
||||
status.set_status(conn, dbdate)
|
||||
LOG.info('Database is at %s.', dbdate)
|
||||
except Exception as exc: # pylint: disable=broad-except
|
||||
LOG.error('Cannot determine date of database: %s', exc)
|
||||
|
||||
@@ -25,7 +25,7 @@ class CountryPostcodeMatcher:
|
||||
|
||||
pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
|
||||
|
||||
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
|
||||
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
|
||||
self.pattern = re.compile(pc_pattern)
|
||||
|
||||
self.output = config.get('output', r'\g<0>')
|
||||
|
||||
@@ -69,8 +69,8 @@ class DBConnection:
|
||||
self.current_params: Optional[Sequence[Any]] = None
|
||||
self.ignore_sql_errors = ignore_sql_errors
|
||||
|
||||
self.conn: Optional['psycopg2.connection'] = None
|
||||
self.cursor: Optional['psycopg2.cursor'] = None
|
||||
self.conn: Optional['psycopg2._psycopg.connection'] = None
|
||||
self.cursor: Optional['psycopg2._psycopg.cursor'] = None
|
||||
self.connect(cursor_factory=cursor_factory)
|
||||
|
||||
def close(self) -> None:
|
||||
@@ -78,7 +78,7 @@ class DBConnection:
|
||||
"""
|
||||
if self.conn is not None:
|
||||
if self.cursor is not None:
|
||||
self.cursor.close() # type: ignore[no-untyped-call]
|
||||
self.cursor.close()
|
||||
self.cursor = None
|
||||
self.conn.close()
|
||||
|
||||
|
||||
@@ -5,17 +5,19 @@
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Import the base libary to use with asynchronous SQLAlchemy.
|
||||
Import the base library to use with asynchronous SQLAlchemy.
|
||||
"""
|
||||
# pylint: disable=invalid-name
|
||||
# pylint: disable=invalid-name, ungrouped-imports, unused-import
|
||||
|
||||
from typing import Any
|
||||
|
||||
try:
|
||||
import sqlalchemy.dialects.postgresql.psycopg
|
||||
import psycopg
|
||||
PGCORE_LIB = 'psycopg'
|
||||
PGCORE_ERROR: Any = psycopg.Error
|
||||
except ModuleNotFoundError:
|
||||
import sqlalchemy.dialects.postgresql.asyncpg
|
||||
import asyncpg
|
||||
PGCORE_LIB = 'asyncpg'
|
||||
PGCORE_ERROR = asyncpg.PostgresError
|
||||
|
||||
@@ -31,7 +31,7 @@ class Cursor(psycopg2.extras.DictCursor):
|
||||
""" Query execution that logs the SQL query when debugging is enabled.
|
||||
"""
|
||||
if LOG.isEnabledFor(logging.DEBUG):
|
||||
LOG.debug(self.mogrify(query, args).decode('utf-8')) # type: ignore[no-untyped-call]
|
||||
LOG.debug(self.mogrify(query, args).decode('utf-8'))
|
||||
|
||||
super().execute(query, args)
|
||||
|
||||
@@ -174,6 +174,15 @@ class Connection(psycopg2.extensions.connection):
|
||||
|
||||
return (int(version_parts[0]), int(version_parts[1]))
|
||||
|
||||
|
||||
def extension_loaded(self, extension_name: str) -> bool:
|
||||
""" Return True if the hstore extension is loaded in the database.
|
||||
"""
|
||||
with self.cursor() as cur:
|
||||
cur.execute('SELECT extname FROM pg_extension WHERE extname = %s', (extension_name, ))
|
||||
return cur.rowcount > 0
|
||||
|
||||
|
||||
class ConnectionContext(ContextManager[Connection]):
|
||||
""" Context manager of the connection that also provides direct access
|
||||
to the underlying connection.
|
||||
@@ -230,7 +239,7 @@ _PG_CONNECTION_STRINGS = {
|
||||
def get_pg_env(dsn: str,
|
||||
base_env: Optional[SysEnv] = None) -> Dict[str, str]:
|
||||
""" Return a copy of `base_env` with the environment variables for
|
||||
PostgresSQL set up from the given database connection string.
|
||||
PostgreSQL set up from the given database connection string.
|
||||
If `base_env` is None, then the OS environment is used as a base
|
||||
environment.
|
||||
"""
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
"""
|
||||
Preprocessing of SQL files.
|
||||
"""
|
||||
from typing import Set, Dict, Any
|
||||
from typing import Set, Dict, Any, cast
|
||||
import jinja2
|
||||
|
||||
from nominatim.db.connection import Connection
|
||||
@@ -28,13 +28,24 @@ def _get_partitions(conn: Connection) -> Set[int]:
|
||||
|
||||
def _get_tables(conn: Connection) -> Set[str]:
|
||||
""" Return the set of tables currently in use.
|
||||
Only includes non-partitioned
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
|
||||
|
||||
return set((row[0] for row in list(cur)))
|
||||
|
||||
def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
|
||||
""" Returns the version of the slim middle tables.
|
||||
"""
|
||||
if 'osm2pgsql_properties' not in tables:
|
||||
return '1'
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT value FROM osm2pgsql_properties WHERE property = 'db_format'")
|
||||
row = cur.fetchone()
|
||||
|
||||
return cast(str, row[0]) if row is not None else '1'
|
||||
|
||||
|
||||
def _setup_tablespace_sql(config: Configuration) -> Dict[str, str]:
|
||||
""" Returns a dict with tablespace expressions for the different tablespace
|
||||
@@ -84,12 +95,25 @@ class SQLPreprocessor:
|
||||
db_info['tables'] = _get_tables(conn)
|
||||
db_info['reverse_only'] = 'search_name' not in db_info['tables']
|
||||
db_info['tablespace'] = _setup_tablespace_sql(config)
|
||||
db_info['middle_db_format'] = _get_middle_db_format(conn, db_info['tables'])
|
||||
|
||||
self.env.globals['config'] = config
|
||||
self.env.globals['db'] = db_info
|
||||
self.env.globals['postgres'] = _setup_postgresql_features(conn)
|
||||
|
||||
|
||||
def run_string(self, conn: Connection, template: str, **kwargs: Any) -> None:
|
||||
""" Execute the given SQL template string on the connection.
|
||||
The keyword arguments may supply additional parameters
|
||||
for preprocessing.
|
||||
"""
|
||||
sql = self.env.from_string(template).render(**kwargs)
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(sql)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def run_sql_file(self, conn: Connection, name: str, **kwargs: Any) -> None:
|
||||
""" Execute the given SQL file on the connection. The keyword arguments
|
||||
may supply additional parameters for preprocessing.
|
||||
@@ -103,7 +127,7 @@ class SQLPreprocessor:
|
||||
|
||||
def run_parallel_sql_file(self, dsn: str, name: str, num_threads: int = 1,
|
||||
**kwargs: Any) -> None:
|
||||
""" Execure the given SQL files using parallel asynchronous connections.
|
||||
""" Execute the given SQL files using parallel asynchronous connections.
|
||||
The keyword arguments may supply additional parameters for
|
||||
preprocessing.
|
||||
|
||||
|
||||
@@ -7,28 +7,215 @@
|
||||
"""
|
||||
Custom functions and expressions for SQLAlchemy.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
|
||||
def select_index_placex_geometry_reverse_lookuppolygon(table: str) -> 'sa.TextClause':
|
||||
""" Create an expression with the necessary conditions over a placex
|
||||
table that the index 'idx_placex_geometry_reverse_lookupPolygon'
|
||||
can be used.
|
||||
"""
|
||||
return sa.text(f"ST_GeometryType({table}.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
|
||||
f" AND {table}.rank_address between 4 and 25"
|
||||
f" AND {table}.type != 'postcode'"
|
||||
f" AND {table}.name is not null"
|
||||
f" AND {table}.indexed_status = 0"
|
||||
f" AND {table}.linked_place_id is null")
|
||||
from nominatim.typing import SaColumn
|
||||
|
||||
def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
|
||||
""" Create an expression with the necessary conditions over a placex
|
||||
table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
|
||||
can be used.
|
||||
# pylint: disable=all
|
||||
|
||||
class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Check for conditions that allow partial index use on
|
||||
'idx_placex_geometry_reverse_lookupPolygon'.
|
||||
|
||||
Needs to be constant, so that the query planner picks them up correctly
|
||||
in prepared statements.
|
||||
"""
|
||||
return sa.text(f"{table}.rank_address between 4 and 25"
|
||||
f" AND {table}.type != 'postcode'"
|
||||
f" AND {table}.name is not null"
|
||||
f" AND {table}.linked_place_id is null"
|
||||
f" AND {table}.osm_type = 'N'")
|
||||
name = 'PlacexGeometryReverseLookuppolygon'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
|
||||
def _default_intersects(element: PlacexGeometryReverseLookuppolygon,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
|
||||
" AND placex.rank_address between 4 and 25"
|
||||
" AND placex.type != 'postcode'"
|
||||
" AND placex.name is not null"
|
||||
" AND placex.indexed_status = 0"
|
||||
" AND placex.linked_place_id is null)")
|
||||
|
||||
|
||||
@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_intersects(element: PlacexGeometryReverseLookuppolygon,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
|
||||
" AND placex.rank_address between 4 and 25"
|
||||
" AND placex.type != 'postcode'"
|
||||
" AND placex.name is not null"
|
||||
" AND placex.indexed_status = 0"
|
||||
" AND placex.linked_place_id is null)")
|
||||
|
||||
|
||||
class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
|
||||
name = 'IntersectsReverseDistance'
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: sa.Table, geom: SaColumn) -> None:
|
||||
super().__init__(table.c.geometry,
|
||||
table.c.rank_search, geom)
|
||||
self.tablename = table.name
|
||||
|
||||
|
||||
@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
|
||||
def default_reverse_place_diameter(element: IntersectsReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
table = element.tablename
|
||||
return f"({table}.rank_address between 4 and 25"\
|
||||
f" AND {table}.type != 'postcode'"\
|
||||
f" AND {table}.name is not null"\
|
||||
f" AND {table}.linked_place_id is null"\
|
||||
f" AND {table}.osm_type = 'N'" + \
|
||||
" AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s)" % \
|
||||
tuple(map(lambda c: compiler.process(c, **kw), element.clauses))
|
||||
|
||||
|
||||
@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_reverse_place_diameter(element: IntersectsReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
geom1, rank, geom2 = list(element.clauses)
|
||||
table = element.tablename
|
||||
|
||||
return (f"({table}.rank_address between 4 and 25"\
|
||||
f" AND {table}.type != 'postcode'"\
|
||||
f" AND {table}.name is not null"\
|
||||
f" AND {table}.linked_place_id is null"\
|
||||
f" AND {table}.osm_type = 'N'"\
|
||||
" AND MbrIntersects(%s, ST_Expand(%s, 14.0 * exp(-0.2 * %s) - 0.03))"\
|
||||
f" AND {table}.place_id IN"\
|
||||
" (SELECT place_id FROM placex_place_node_areas"\
|
||||
" WHERE ROWID IN (SELECT ROWID FROM SpatialIndex"\
|
||||
" WHERE f_table_name = 'placex_place_node_areas'"\
|
||||
" AND search_frame = %s)))") % (
|
||||
compiler.process(geom1, **kw),
|
||||
compiler.process(geom2, **kw),
|
||||
compiler.process(rank, **kw),
|
||||
compiler.process(geom2, **kw))
|
||||
|
||||
|
||||
class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
|
||||
name = 'IsBelowReverseDistance'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
|
||||
def default_is_below_reverse_distance(element: IsBelowReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
dist, rank = list(element.clauses)
|
||||
return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
|
||||
compiler.process(rank, **kw))
|
||||
|
||||
|
||||
@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_is_below_reverse_distance(element: IsBelowReverseDistance,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
dist, rank = list(element.clauses)
|
||||
return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
|
||||
compiler.process(rank, **kw))
|
||||
|
||||
|
||||
class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
|
||||
name = 'IsAddressPoint'
|
||||
inherit_cache = True
|
||||
|
||||
def __init__(self, table: sa.Table) -> None:
|
||||
super().__init__(table.c.rank_address,
|
||||
table.c.housenumber, table.c.name)
|
||||
|
||||
|
||||
@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
|
||||
def default_is_address_point(element: IsAddressPoint,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
rank, hnr, name = list(element.clauses)
|
||||
return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
|
||||
compiler.process(rank, **kw),
|
||||
compiler.process(hnr, **kw),
|
||||
compiler.process(name, **kw))
|
||||
|
||||
|
||||
@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_is_address_point(element: IsAddressPoint,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
rank, hnr, name = list(element.clauses)
|
||||
return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
|
||||
compiler.process(rank, **kw),
|
||||
compiler.process(hnr, **kw),
|
||||
compiler.process(name, **kw))
|
||||
|
||||
|
||||
class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Check if in the given list of names in parameters 1 any of the names
|
||||
from the JSON array in parameter 2 are contained.
|
||||
"""
|
||||
name = 'CrosscheckNames'
|
||||
inherit_cache = True
|
||||
|
||||
@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
|
||||
def compile_crosscheck_names(element: CrosscheckNames,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
|
||||
compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def compile_sqlite_crosscheck_names(element: CrosscheckNames,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "EXISTS(SELECT *"\
|
||||
" FROM json_each(%s) as name, json_each(%s) as match_name"\
|
||||
" WHERE name.value = match_name.value)"\
|
||||
% (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Return elements of a json array as a set.
|
||||
"""
|
||||
name = 'JsonArrayEach'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
|
||||
def default_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_json_array_each(element: JsonArrayEach, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "json_each(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class Greatest(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Function to compute maximum of all its input parameters.
|
||||
"""
|
||||
name = 'greatest'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_greatest(element: Greatest, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "max(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class RegexpWord(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Check if a full word is in a given string.
|
||||
"""
|
||||
name = 'RegexpWord'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(RegexpWord, 'postgresql') # type: ignore[no-untyped-call, misc]
|
||||
def postgres_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "%s ~* ('\\m(' || %s || ')\\M')::text" % (compiler.process(arg2, **kw), compiler.process(arg1, **kw))
|
||||
|
||||
|
||||
@compiles(RegexpWord, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_regexp_nocase(element: RegexpWord, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "regexp('\\b(' || %s || ')\\b', %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
@@ -7,50 +7,20 @@
|
||||
"""
|
||||
SQLAlchemy definitions for all tables used by the frontend.
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB, array
|
||||
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
|
||||
|
||||
from nominatim.db.sqlalchemy_types import Geometry
|
||||
|
||||
class PostgresTypes:
|
||||
""" Type definitions for complex types as used in Postgres variants.
|
||||
"""
|
||||
Composite = HSTORE
|
||||
Json = JSONB
|
||||
IntArray = ARRAY(sa.Integer()) #pylint: disable=invalid-name
|
||||
to_array = array
|
||||
|
||||
|
||||
class SqliteTypes:
|
||||
""" Type definitions for complex types as used in Postgres variants.
|
||||
"""
|
||||
Composite = sqlite_json
|
||||
Json = sqlite_json
|
||||
IntArray = sqlite_json
|
||||
|
||||
@staticmethod
|
||||
def to_array(arr: Any) -> Any:
|
||||
""" Sqlite has no special conversion for arrays.
|
||||
"""
|
||||
return arr
|
||||
|
||||
import nominatim.db.sqlalchemy_functions #pylint: disable=unused-import
|
||||
from nominatim.db.sqlalchemy_types import Geometry, KeyValueStore, IntArray
|
||||
|
||||
#pylint: disable=too-many-instance-attributes
|
||||
class SearchTables:
|
||||
""" Data class that holds the tables of the Nominatim database.
|
||||
|
||||
This schema strictly reflects the read-access view of the database.
|
||||
Any data used for updates only will not be visible.
|
||||
"""
|
||||
|
||||
def __init__(self, meta: sa.MetaData, engine_name: str) -> None:
|
||||
if engine_name == 'postgresql':
|
||||
self.types: Any = PostgresTypes
|
||||
elif engine_name == 'sqlite':
|
||||
self.types = SqliteTypes
|
||||
else:
|
||||
raise ValueError("Only 'postgresql' and 'sqlite' engines are supported.")
|
||||
|
||||
def __init__(self, meta: sa.MetaData) -> None:
|
||||
self.meta = meta
|
||||
|
||||
self.import_status = sa.Table('import_status', meta,
|
||||
@@ -63,23 +33,22 @@ class SearchTables:
|
||||
sa.Column('value', sa.Text))
|
||||
|
||||
self.placex = sa.Table('placex', meta,
|
||||
sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
|
||||
sa.Column('place_id', sa.BigInteger, nullable=False),
|
||||
sa.Column('parent_place_id', sa.BigInteger),
|
||||
sa.Column('linked_place_id', sa.BigInteger),
|
||||
sa.Column('importance', sa.Float),
|
||||
sa.Column('indexed_date', sa.DateTime),
|
||||
sa.Column('rank_address', sa.SmallInteger),
|
||||
sa.Column('rank_search', sa.SmallInteger),
|
||||
sa.Column('partition', sa.SmallInteger),
|
||||
sa.Column('indexed_status', sa.SmallInteger),
|
||||
sa.Column('osm_type', sa.String(1), nullable=False),
|
||||
sa.Column('osm_id', sa.BigInteger, nullable=False),
|
||||
sa.Column('class', sa.Text, nullable=False, key='class_'),
|
||||
sa.Column('type', sa.Text, nullable=False),
|
||||
sa.Column('admin_level', sa.SmallInteger),
|
||||
sa.Column('name', self.types.Composite),
|
||||
sa.Column('address', self.types.Composite),
|
||||
sa.Column('extratags', self.types.Composite),
|
||||
sa.Column('name', KeyValueStore),
|
||||
sa.Column('address', KeyValueStore),
|
||||
sa.Column('extratags', KeyValueStore),
|
||||
sa.Column('geometry', Geometry, nullable=False),
|
||||
sa.Column('wikipedia', sa.Text),
|
||||
sa.Column('country_code', sa.String(2)),
|
||||
@@ -88,44 +57,41 @@ class SearchTables:
|
||||
sa.Column('centroid', Geometry))
|
||||
|
||||
self.addressline = sa.Table('place_addressline', meta,
|
||||
sa.Column('place_id', sa.BigInteger, index=True),
|
||||
sa.Column('address_place_id', sa.BigInteger, index=True),
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('address_place_id', sa.BigInteger),
|
||||
sa.Column('distance', sa.Float),
|
||||
sa.Column('cached_rank_address', sa.SmallInteger),
|
||||
sa.Column('fromarea', sa.Boolean),
|
||||
sa.Column('isaddress', sa.Boolean))
|
||||
|
||||
self.postcode = sa.Table('location_postcode', meta,
|
||||
sa.Column('place_id', sa.BigInteger, unique=True),
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('parent_place_id', sa.BigInteger),
|
||||
sa.Column('rank_search', sa.SmallInteger),
|
||||
sa.Column('rank_address', sa.SmallInteger),
|
||||
sa.Column('indexed_status', sa.SmallInteger),
|
||||
sa.Column('indexed_date', sa.DateTime),
|
||||
sa.Column('country_code', sa.String(2)),
|
||||
sa.Column('postcode', sa.Text, index=True),
|
||||
sa.Column('postcode', sa.Text),
|
||||
sa.Column('geometry', Geometry))
|
||||
|
||||
self.osmline = sa.Table('location_property_osmline', meta,
|
||||
sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
|
||||
sa.Column('place_id', sa.BigInteger, nullable=False),
|
||||
sa.Column('osm_id', sa.BigInteger),
|
||||
sa.Column('parent_place_id', sa.BigInteger),
|
||||
sa.Column('indexed_date', sa.DateTime),
|
||||
sa.Column('startnumber', sa.Integer),
|
||||
sa.Column('endnumber', sa.Integer),
|
||||
sa.Column('step', sa.SmallInteger),
|
||||
sa.Column('partition', sa.SmallInteger),
|
||||
sa.Column('indexed_status', sa.SmallInteger),
|
||||
sa.Column('linegeo', Geometry),
|
||||
sa.Column('address', self.types.Composite),
|
||||
sa.Column('address', KeyValueStore),
|
||||
sa.Column('postcode', sa.Text),
|
||||
sa.Column('country_code', sa.String(2)))
|
||||
|
||||
self.country_name = sa.Table('country_name', meta,
|
||||
sa.Column('country_code', sa.String(2)),
|
||||
sa.Column('name', self.types.Composite),
|
||||
sa.Column('derived_name', self.types.Composite),
|
||||
sa.Column('country_default_language_code', sa.Text),
|
||||
sa.Column('name', KeyValueStore),
|
||||
sa.Column('derived_name', KeyValueStore),
|
||||
sa.Column('partition', sa.Integer))
|
||||
|
||||
self.country_grid = sa.Table('country_osm_grid', meta,
|
||||
@@ -135,12 +101,12 @@ class SearchTables:
|
||||
|
||||
# The following tables are not necessarily present.
|
||||
self.search_name = sa.Table('search_name', meta,
|
||||
sa.Column('place_id', sa.BigInteger, index=True),
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('importance', sa.Float),
|
||||
sa.Column('search_rank', sa.SmallInteger),
|
||||
sa.Column('address_rank', sa.SmallInteger),
|
||||
sa.Column('name_vector', self.types.IntArray, index=True),
|
||||
sa.Column('nameaddress_vector', self.types.IntArray, index=True),
|
||||
sa.Column('name_vector', IntArray),
|
||||
sa.Column('nameaddress_vector', IntArray),
|
||||
sa.Column('country_code', sa.String(2)),
|
||||
sa.Column('centroid', Geometry))
|
||||
|
||||
@@ -150,6 +116,5 @@ class SearchTables:
|
||||
sa.Column('startnumber', sa.Integer),
|
||||
sa.Column('endnumber', sa.Integer),
|
||||
sa.Column('step', sa.SmallInteger),
|
||||
sa.Column('partition', sa.SmallInteger),
|
||||
sa.Column('linegeo', Geometry),
|
||||
sa.Column('postcode', sa.Text))
|
||||
|
||||
@@ -1,118 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom types for SQLAlchemy.
|
||||
"""
|
||||
from typing import Callable, Any, cast
|
||||
import sys
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy import types
|
||||
|
||||
from nominatim.typing import SaColumn, SaBind
|
||||
|
||||
#pylint: disable=all
|
||||
|
||||
class Geometry(types.UserDefinedType): # type: ignore[type-arg]
|
||||
""" Simplified type decorator for PostGIS geometry. This type
|
||||
only supports geometries in 4326 projection.
|
||||
"""
|
||||
cache_ok = True
|
||||
|
||||
def __init__(self, subtype: str = 'Geometry'):
|
||||
self.subtype = subtype
|
||||
|
||||
|
||||
def get_col_spec(self) -> str:
|
||||
return f'GEOMETRY({self.subtype}, 4326)'
|
||||
|
||||
|
||||
def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
|
||||
def process(value: Any) -> str:
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
|
||||
return cast(str, value.to_wkt())
|
||||
return process
|
||||
|
||||
|
||||
def result_processor(self, dialect: 'sa.Dialect', coltype: object) -> Callable[[Any], str]:
|
||||
def process(value: Any) -> str:
|
||||
assert isinstance(value, str)
|
||||
return value
|
||||
return process
|
||||
|
||||
|
||||
def bind_expression(self, bindvalue: SaBind) -> SaColumn:
|
||||
return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self)
|
||||
|
||||
|
||||
class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
|
||||
|
||||
def intersects(self, other: SaColumn) -> 'sa.Operators':
|
||||
return self.op('&&')(other)
|
||||
|
||||
def is_line_like(self) -> SaColumn:
|
||||
return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_LineString',
|
||||
'ST_MultiLineString'))
|
||||
|
||||
def is_area(self) -> SaColumn:
|
||||
return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_Polygon',
|
||||
'ST_MultiPolygon'))
|
||||
|
||||
|
||||
def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_DWithin(self, other, distance, type_=sa.Boolean)
|
||||
|
||||
|
||||
def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_DWithin(sa.func.coalesce(sa.null(), self),
|
||||
other, distance, type_=sa.Boolean)
|
||||
|
||||
|
||||
def ST_Intersects_no_index(self, other: SaColumn) -> 'sa.Operators':
|
||||
return sa.func.coalesce(sa.null(), self).op('&&')(other)
|
||||
|
||||
|
||||
def ST_Distance(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_Distance(self, other, type_=sa.Float)
|
||||
|
||||
|
||||
def ST_Contains(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_Contains(self, other, type_=sa.Boolean)
|
||||
|
||||
|
||||
def ST_CoveredBy(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean)
|
||||
|
||||
|
||||
def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_ClosestPoint(self, other, type_=Geometry)
|
||||
|
||||
|
||||
def ST_Buffer(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_Buffer(self, other, type_=Geometry)
|
||||
|
||||
|
||||
def ST_Expand(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_Expand(self, other, type_=Geometry)
|
||||
|
||||
|
||||
def ST_Collect(self) -> SaColumn:
|
||||
return sa.func.ST_Collect(self, type_=Geometry)
|
||||
|
||||
|
||||
def ST_Centroid(self) -> SaColumn:
|
||||
return sa.func.ST_Centroid(self, type_=Geometry)
|
||||
|
||||
|
||||
def ST_LineInterpolatePoint(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_LineInterpolatePoint(self, other, type_=Geometry)
|
||||
|
||||
|
||||
def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)
|
||||
17
nominatim/db/sqlalchemy_types/__init__.py
Normal file
17
nominatim/db/sqlalchemy_types/__init__.py
Normal file
@@ -0,0 +1,17 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Module with custom types for SQLAlchemy
|
||||
"""
|
||||
|
||||
# See also https://github.com/PyCQA/pylint/issues/6006
|
||||
# pylint: disable=useless-import-alias
|
||||
|
||||
from .geometry import (Geometry as Geometry)
|
||||
from .int_array import (IntArray as IntArray)
|
||||
from .key_value import (KeyValueStore as KeyValueStore)
|
||||
from .json import (Json as Json)
|
||||
308
nominatim/db/sqlalchemy_types/geometry.py
Normal file
308
nominatim/db/sqlalchemy_types/geometry.py
Normal file
@@ -0,0 +1,308 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom types for SQLAlchemy.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
from typing import Callable, Any, cast
|
||||
import sys
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
from sqlalchemy import types
|
||||
|
||||
from nominatim.typing import SaColumn, SaBind
|
||||
|
||||
#pylint: disable=all
|
||||
|
||||
class Geometry_DistanceSpheroid(sa.sql.expression.FunctionElement[float]):
|
||||
""" Function to compute the spherical distance in meters.
|
||||
"""
|
||||
type = sa.Float()
|
||||
name = 'Geometry_DistanceSpheroid'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(Geometry_DistanceSpheroid) # type: ignore[no-untyped-call, misc]
|
||||
def _default_distance_spheroid(element: Geometry_DistanceSpheroid,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_DistanceSpheroid(%s,"\
|
||||
" 'SPHEROID[\"WGS 84\",6378137,298.257223563, AUTHORITY[\"EPSG\",\"7030\"]]')"\
|
||||
% compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(Geometry_DistanceSpheroid, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _spatialite_distance_spheroid(element: Geometry_DistanceSpheroid,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "COALESCE(Distance(%s, true), 0.0)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
class Geometry_IsLineLike(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Check if the geometry is a line or multiline.
|
||||
"""
|
||||
name = 'Geometry_IsLineLike'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(Geometry_IsLineLike) # type: ignore[no-untyped-call, misc]
|
||||
def _default_is_line_like(element: Geometry_IsLineLike,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_GeometryType(%s) IN ('ST_LineString', 'ST_MultiLineString')" % \
|
||||
compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(Geometry_IsLineLike, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_is_line_like(element: Geometry_IsLineLike,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_GeometryType(%s) IN ('LINESTRING', 'MULTILINESTRING')" % \
|
||||
compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
class Geometry_IsAreaLike(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Check if the geometry is a polygon or multipolygon.
|
||||
"""
|
||||
name = 'Geometry_IsLineLike'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(Geometry_IsAreaLike) # type: ignore[no-untyped-call, misc]
|
||||
def _default_is_area_like(element: Geometry_IsAreaLike,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_GeometryType(%s) IN ('ST_Polygon', 'ST_MultiPolygon')" % \
|
||||
compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(Geometry_IsAreaLike, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_is_area_like(element: Geometry_IsAreaLike,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_GeometryType(%s) IN ('POLYGON', 'MULTIPOLYGON')" % \
|
||||
compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
class Geometry_IntersectsBbox(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Check if the bounding boxes of the given geometries intersect.
|
||||
"""
|
||||
name = 'Geometry_IntersectsBbox'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(Geometry_IntersectsBbox) # type: ignore[no-untyped-call, misc]
|
||||
def _default_intersects(element: Geometry_IntersectsBbox,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
@compiles(Geometry_IntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def _sqlite_intersects(element: Geometry_IntersectsBbox,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "MbrIntersects(%s) = 1" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
class Geometry_ColumnIntersectsBbox(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Check if the bounding box of the geometry intersects with the
|
||||
given table column, using the spatial index for the column.
|
||||
|
||||
The index must exist or the query may return nothing.
|
||||
"""
|
||||
name = 'Geometry_ColumnIntersectsBbox'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(Geometry_ColumnIntersectsBbox) # type: ignore[no-untyped-call, misc]
|
||||
def default_intersects_column(element: Geometry_ColumnIntersectsBbox,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
@compiles(Geometry_ColumnIntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def spatialite_intersects_column(element: Geometry_ColumnIntersectsBbox,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "MbrIntersects(%s, %s) = 1 and "\
|
||||
"%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
|
||||
"WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
|
||||
"AND search_frame = %s)" %(
|
||||
compiler.process(arg1, **kw),
|
||||
compiler.process(arg2, **kw),
|
||||
arg1.table.name, arg1.table.name, arg1.name,
|
||||
compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
class Geometry_ColumnDWithin(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Check if the geometry is within the distance of the
|
||||
given table column, using the spatial index for the column.
|
||||
|
||||
The index must exist or the query may return nothing.
|
||||
"""
|
||||
name = 'Geometry_ColumnDWithin'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(Geometry_ColumnDWithin) # type: ignore[no-untyped-call, misc]
|
||||
def default_dwithin_column(element: Geometry_ColumnDWithin,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "ST_DWithin(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
@compiles(Geometry_ColumnDWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def spatialite_dwithin_column(element: Geometry_ColumnDWithin,
|
||||
compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
geom1, geom2, dist = list(element.clauses)
|
||||
return "ST_Distance(%s, %s) < %s and "\
|
||||
"%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
|
||||
"WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
|
||||
"AND search_frame = ST_Expand(%s, %s))" %(
|
||||
compiler.process(geom1, **kw),
|
||||
compiler.process(geom2, **kw),
|
||||
compiler.process(dist, **kw),
|
||||
geom1.table.name, geom1.table.name, geom1.name,
|
||||
compiler.process(geom2, **kw),
|
||||
compiler.process(dist, **kw))
|
||||
|
||||
|
||||
class Geometry(types.UserDefinedType): # type: ignore[type-arg]
|
||||
""" Simplified type decorator for PostGIS geometry. This type
|
||||
only supports geometries in 4326 projection.
|
||||
"""
|
||||
cache_ok = True
|
||||
|
||||
def __init__(self, subtype: str = 'Geometry'):
|
||||
self.subtype = subtype
|
||||
|
||||
|
||||
def get_col_spec(self) -> str:
|
||||
return f'GEOMETRY({self.subtype}, 4326)'
|
||||
|
||||
|
||||
def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
|
||||
def process(value: Any) -> str:
|
||||
if isinstance(value, str):
|
||||
return value
|
||||
|
||||
return cast(str, value.to_wkt())
|
||||
return process
|
||||
|
||||
|
||||
def result_processor(self, dialect: 'sa.Dialect', coltype: object) -> Callable[[Any], str]:
|
||||
def process(value: Any) -> str:
|
||||
assert isinstance(value, str)
|
||||
return value
|
||||
return process
|
||||
|
||||
|
||||
def column_expression(self, col: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_AsEWKB(col)
|
||||
|
||||
|
||||
def bind_expression(self, bindvalue: SaBind) -> SaColumn:
|
||||
return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self)
|
||||
|
||||
|
||||
class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
|
||||
|
||||
def intersects(self, other: SaColumn, use_index: bool = True) -> 'sa.Operators':
|
||||
if not use_index:
|
||||
return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self.expr), other)
|
||||
|
||||
if isinstance(self.expr, sa.Column):
|
||||
return Geometry_ColumnIntersectsBbox(self.expr, other)
|
||||
|
||||
return Geometry_IntersectsBbox(self.expr, other)
|
||||
|
||||
|
||||
def is_line_like(self) -> SaColumn:
|
||||
return Geometry_IsLineLike(self)
|
||||
|
||||
|
||||
def is_area(self) -> SaColumn:
|
||||
return Geometry_IsAreaLike(self)
|
||||
|
||||
|
||||
def within_distance(self, other: SaColumn, distance: SaColumn) -> SaColumn:
|
||||
if isinstance(self.expr, sa.Column):
|
||||
return Geometry_ColumnDWithin(self.expr, other, distance)
|
||||
|
||||
return self.ST_Distance(other) < distance
|
||||
|
||||
|
||||
def ST_Distance(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_Distance(self, other, type_=sa.Float)
|
||||
|
||||
|
||||
def ST_Contains(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_Contains(self, other, type_=sa.Boolean)
|
||||
|
||||
|
||||
def ST_CoveredBy(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean)
|
||||
|
||||
|
||||
def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.coalesce(sa.func.ST_ClosestPoint(self, other, type_=Geometry),
|
||||
other)
|
||||
|
||||
|
||||
def ST_Buffer(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_Buffer(self, other, type_=Geometry)
|
||||
|
||||
|
||||
def ST_Expand(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_Expand(self, other, type_=Geometry)
|
||||
|
||||
|
||||
def ST_Collect(self) -> SaColumn:
|
||||
return sa.func.ST_Collect(self, type_=Geometry)
|
||||
|
||||
|
||||
def ST_Centroid(self) -> SaColumn:
|
||||
return sa.func.ST_Centroid(self, type_=Geometry)
|
||||
|
||||
|
||||
def ST_LineInterpolatePoint(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_LineInterpolatePoint(self, other, type_=Geometry)
|
||||
|
||||
|
||||
def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn:
|
||||
return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)
|
||||
|
||||
|
||||
def distance_spheroid(self, other: SaColumn) -> SaColumn:
|
||||
return Geometry_DistanceSpheroid(self, other)
|
||||
|
||||
|
||||
@compiles(Geometry, 'sqlite') # type: ignore[no-untyped-call]
|
||||
def get_col_spec(self, *args, **kwargs): # type: ignore[no-untyped-def]
|
||||
return 'GEOMETRY'
|
||||
|
||||
|
||||
SQLITE_FUNCTION_ALIAS = (
|
||||
('ST_AsEWKB', sa.Text, 'AsEWKB'),
|
||||
('ST_GeomFromEWKT', Geometry, 'GeomFromEWKT'),
|
||||
('ST_AsGeoJSON', sa.Text, 'AsGeoJSON'),
|
||||
('ST_AsKML', sa.Text, 'AsKML'),
|
||||
('ST_AsSVG', sa.Text, 'AsSVG'),
|
||||
('ST_LineLocatePoint', sa.Float, 'ST_Line_Locate_Point'),
|
||||
('ST_LineInterpolatePoint', sa.Float, 'ST_Line_Interpolate_Point'),
|
||||
)
|
||||
|
||||
def _add_function_alias(func: str, ftype: type, alias: str) -> None:
|
||||
_FuncDef = type(func, (sa.sql.functions.GenericFunction, ), {
|
||||
"type": ftype(),
|
||||
"name": func,
|
||||
"identifier": func,
|
||||
"inherit_cache": True})
|
||||
|
||||
func_templ = f"{alias}(%s)"
|
||||
|
||||
def _sqlite_impl(element: Any, compiler: Any, **kw: Any) -> Any:
|
||||
return func_templ % compiler.process(element.clauses, **kw)
|
||||
|
||||
compiles(_FuncDef, 'sqlite')(_sqlite_impl) # type: ignore[no-untyped-call]
|
||||
|
||||
for alias in SQLITE_FUNCTION_ALIAS:
|
||||
_add_function_alias(*alias)
|
||||
123
nominatim/db/sqlalchemy_types/int_array.py
Normal file
123
nominatim/db/sqlalchemy_types/int_array.py
Normal file
@@ -0,0 +1,123 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom type for an array of integers.
|
||||
"""
|
||||
from typing import Any, List, cast, Optional
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
from sqlalchemy.dialects.postgresql import ARRAY
|
||||
|
||||
from nominatim.typing import SaDialect, SaColumn
|
||||
|
||||
# pylint: disable=all
|
||||
|
||||
class IntList(sa.types.TypeDecorator[Any]):
|
||||
""" A list of integers saved as a text of comma-separated numbers.
|
||||
"""
|
||||
impl = sa.types.Unicode
|
||||
cache_ok = True
|
||||
|
||||
def process_bind_param(self, value: Optional[Any], dialect: 'sa.Dialect') -> Optional[str]:
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
assert isinstance(value, list)
|
||||
return ','.join(map(str, value))
|
||||
|
||||
def process_result_value(self, value: Optional[Any],
|
||||
dialect: SaDialect) -> Optional[List[int]]:
|
||||
return [int(v) for v in value.split(',')] if value is not None else None
|
||||
|
||||
def copy(self, **kw: Any) -> 'IntList':
|
||||
return IntList(self.impl.length)
|
||||
|
||||
|
||||
class IntArray(sa.types.TypeDecorator[Any]):
|
||||
""" Dialect-independent list of integers.
|
||||
"""
|
||||
impl = IntList
|
||||
cache_ok = True
|
||||
|
||||
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
|
||||
if dialect.name == 'postgresql':
|
||||
return ARRAY(sa.Integer()) #pylint: disable=invalid-name
|
||||
|
||||
return IntList()
|
||||
|
||||
|
||||
class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
|
||||
|
||||
def __add__(self, other: SaColumn) -> 'sa.ColumnOperators':
|
||||
""" Concate the array with the given array. If one of the
|
||||
operants is null, the value of the other will be returned.
|
||||
"""
|
||||
return ArrayCat(self.expr, other)
|
||||
|
||||
|
||||
def contains(self, other: SaColumn, **kwargs: Any) -> 'sa.ColumnOperators':
|
||||
""" Return true if the array contains all the value of the argument
|
||||
array.
|
||||
"""
|
||||
return ArrayContains(self.expr, other)
|
||||
|
||||
|
||||
|
||||
class ArrayAgg(sa.sql.functions.GenericFunction[Any]):
|
||||
""" Aggregate function to collect elements in an array.
|
||||
"""
|
||||
type = IntArray()
|
||||
identifier = 'ArrayAgg'
|
||||
name = 'array_agg'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(ArrayAgg, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_array_agg(element: ArrayAgg, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "group_concat(%s, ',')" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class ArrayContains(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Function to check if an array is fully contained in another.
|
||||
"""
|
||||
name = 'ArrayContains'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(ArrayContains) # type: ignore[no-untyped-call, misc]
|
||||
def generic_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(%s @> %s)" % (compiler.process(arg1, **kw),
|
||||
compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
@compiles(ArrayContains, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_array_contains(element: ArrayContains, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "array_contains(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
|
||||
class ArrayCat(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Function to check if an array is fully contained in another.
|
||||
"""
|
||||
type = IntArray()
|
||||
identifier = 'ArrayCat'
|
||||
inherit_cache = True
|
||||
|
||||
|
||||
@compiles(ArrayCat) # type: ignore[no-untyped-call, misc]
|
||||
def generic_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
return "array_cat(%s)" % compiler.process(element.clauses, **kw)
|
||||
|
||||
|
||||
@compiles(ArrayCat, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_array_cat(element: ArrayCat, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(%s || ',' || %s)" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
30
nominatim/db/sqlalchemy_types/json.py
Normal file
30
nominatim/db/sqlalchemy_types/json.py
Normal file
@@ -0,0 +1,30 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Common json type for different dialects.
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.dialects.postgresql import JSONB
|
||||
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
|
||||
|
||||
from nominatim.typing import SaDialect
|
||||
|
||||
# pylint: disable=all
|
||||
|
||||
class Json(sa.types.TypeDecorator[Any]):
|
||||
""" Dialect-independent type for JSON.
|
||||
"""
|
||||
impl = sa.types.JSON
|
||||
cache_ok = True
|
||||
|
||||
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
|
||||
if dialect.name == 'postgresql':
|
||||
return JSONB(none_as_null=True) # type: ignore[no-untyped-call]
|
||||
|
||||
return sqlite_json(none_as_null=True)
|
||||
62
nominatim/db/sqlalchemy_types/key_value.py
Normal file
62
nominatim/db/sqlalchemy_types/key_value.py
Normal file
@@ -0,0 +1,62 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
A custom type that implements a simple key-value store of strings.
|
||||
"""
|
||||
from typing import Any
|
||||
|
||||
import sqlalchemy as sa
|
||||
from sqlalchemy.ext.compiler import compiles
|
||||
from sqlalchemy.dialects.postgresql import HSTORE
|
||||
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
|
||||
|
||||
from nominatim.typing import SaDialect, SaColumn
|
||||
|
||||
# pylint: disable=all
|
||||
|
||||
class KeyValueStore(sa.types.TypeDecorator[Any]):
|
||||
""" Dialect-independent type of a simple key-value store of strings.
|
||||
"""
|
||||
impl = HSTORE
|
||||
cache_ok = True
|
||||
|
||||
def load_dialect_impl(self, dialect: SaDialect) -> sa.types.TypeEngine[Any]:
|
||||
if dialect.name == 'postgresql':
|
||||
return HSTORE() # type: ignore[no-untyped-call]
|
||||
|
||||
return sqlite_json(none_as_null=True)
|
||||
|
||||
|
||||
class comparator_factory(sa.types.UserDefinedType.Comparator): # type: ignore[type-arg]
|
||||
|
||||
def merge(self, other: SaColumn) -> 'sa.Operators':
|
||||
""" Merge the values from the given KeyValueStore into this
|
||||
one, overwriting values where necessary. When the argument
|
||||
is null, nothing happens.
|
||||
"""
|
||||
return KeyValueConcat(self.expr, other)
|
||||
|
||||
|
||||
class KeyValueConcat(sa.sql.expression.FunctionElement[Any]):
|
||||
""" Return the merged key-value store from the input parameters.
|
||||
"""
|
||||
type = KeyValueStore()
|
||||
name = 'JsonConcat'
|
||||
inherit_cache = True
|
||||
|
||||
@compiles(KeyValueConcat) # type: ignore[no-untyped-call, misc]
|
||||
def default_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "(%s || coalesce(%s, ''::hstore))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
@compiles(KeyValueConcat, 'sqlite') # type: ignore[no-untyped-call, misc]
|
||||
def sqlite_json_concat(element: KeyValueConcat, compiler: 'sa.Compiled', **kw: Any) -> str:
|
||||
arg1, arg2 = list(element.clauses)
|
||||
return "json_patch(%s, coalesce(%s, '{}'))" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
|
||||
|
||||
|
||||
|
||||
122
nominatim/db/sqlite_functions.py
Normal file
122
nominatim/db/sqlite_functions.py
Normal file
@@ -0,0 +1,122 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom functions for SQLite.
|
||||
"""
|
||||
from typing import cast, Optional, Set, Any
|
||||
import json
|
||||
|
||||
# pylint: disable=protected-access
|
||||
|
||||
def weigh_search(search_vector: Optional[str], rankings: str, default: float) -> float:
|
||||
""" Custom weight function for search results.
|
||||
"""
|
||||
if search_vector is not None:
|
||||
svec = [int(x) for x in search_vector.split(',')]
|
||||
for rank in json.loads(rankings):
|
||||
if all(r in svec for r in rank[1]):
|
||||
return cast(float, rank[0])
|
||||
|
||||
return default
|
||||
|
||||
|
||||
class ArrayIntersectFuzzy:
|
||||
""" Compute the array of common elements of all input integer arrays.
|
||||
Very large input parameters may be ignored to speed up
|
||||
computation. Therefore, the result is a superset of common elements.
|
||||
|
||||
Input and output arrays are given as comma-separated lists.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.first = ''
|
||||
self.values: Optional[Set[int]] = None
|
||||
|
||||
def step(self, value: Optional[str]) -> None:
|
||||
""" Add the next array to the intersection.
|
||||
"""
|
||||
if value is not None:
|
||||
if not self.first:
|
||||
self.first = value
|
||||
elif len(value) < 10000000:
|
||||
if self.values is None:
|
||||
self.values = {int(x) for x in self.first.split(',')}
|
||||
self.values.intersection_update((int(x) for x in value.split(',')))
|
||||
|
||||
def finalize(self) -> str:
|
||||
""" Return the final result.
|
||||
"""
|
||||
if self.values is not None:
|
||||
return ','.join(map(str, self.values))
|
||||
|
||||
return self.first
|
||||
|
||||
|
||||
class ArrayUnion:
|
||||
""" Compute the set of all elements of the input integer arrays.
|
||||
|
||||
Input and output arrays are given as strings of comma-separated lists.
|
||||
"""
|
||||
def __init__(self) -> None:
|
||||
self.values: Optional[Set[str]] = None
|
||||
|
||||
def step(self, value: Optional[str]) -> None:
|
||||
""" Add the next array to the union.
|
||||
"""
|
||||
if value is not None:
|
||||
if self.values is None:
|
||||
self.values = set(value.split(','))
|
||||
else:
|
||||
self.values.update(value.split(','))
|
||||
|
||||
def finalize(self) -> str:
|
||||
""" Return the final result.
|
||||
"""
|
||||
return '' if self.values is None else ','.join(self.values)
|
||||
|
||||
|
||||
def array_contains(container: Optional[str], containee: Optional[str]) -> Optional[bool]:
|
||||
""" Is the array 'containee' completely contained in array 'container'.
|
||||
"""
|
||||
if container is None or containee is None:
|
||||
return None
|
||||
|
||||
vset = container.split(',')
|
||||
return all(v in vset for v in containee.split(','))
|
||||
|
||||
|
||||
def array_pair_contains(container1: Optional[str], container2: Optional[str],
|
||||
containee: Optional[str]) -> Optional[bool]:
|
||||
""" Is the array 'containee' completely contained in the union of
|
||||
array 'container1' and array 'container2'.
|
||||
"""
|
||||
if container1 is None or container2 is None or containee is None:
|
||||
return None
|
||||
|
||||
vset = container1.split(',') + container2.split(',')
|
||||
return all(v in vset for v in containee.split(','))
|
||||
|
||||
|
||||
def install_custom_functions(conn: Any) -> None:
|
||||
""" Install helper functions for Nominatim into the given SQLite
|
||||
database connection.
|
||||
"""
|
||||
conn.create_function('weigh_search', 3, weigh_search, deterministic=True)
|
||||
conn.create_function('array_contains', 2, array_contains, deterministic=True)
|
||||
conn.create_function('array_pair_contains', 3, array_pair_contains, deterministic=True)
|
||||
_create_aggregate(conn, 'array_intersect_fuzzy', 1, ArrayIntersectFuzzy)
|
||||
_create_aggregate(conn, 'array_union', 1, ArrayUnion)
|
||||
|
||||
|
||||
async def _make_aggregate(aioconn: Any, *args: Any) -> None:
|
||||
await aioconn._execute(aioconn._conn.create_aggregate, *args)
|
||||
|
||||
|
||||
def _create_aggregate(conn: Any, name: str, nargs: int, aggregate: Any) -> None:
|
||||
try:
|
||||
conn.await_(_make_aggregate(conn._connection, name, nargs, aggregate))
|
||||
except Exception as error: # pylint: disable=broad-exception-caught
|
||||
conn._handle_exception(error)
|
||||
@@ -29,11 +29,24 @@ class StatusRow(TypedDict):
|
||||
indexed: Optional[bool]
|
||||
|
||||
|
||||
def compute_database_date(conn: Connection) -> dt.datetime:
|
||||
def compute_database_date(conn: Connection, offline: bool = False) -> dt.datetime:
|
||||
""" Determine the date of the database from the newest object in the
|
||||
data base.
|
||||
"""
|
||||
# First, find the node with the highest ID in the database
|
||||
# If there is a date from osm2pgsql available, use that.
|
||||
if conn.table_exists('osm2pgsql_properties'):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(""" SELECT value FROM osm2pgsql_properties
|
||||
WHERE property = 'current_timestamp' """)
|
||||
row = cur.fetchone()
|
||||
if row is not None:
|
||||
return dt.datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%SZ")\
|
||||
.replace(tzinfo=dt.timezone.utc)
|
||||
|
||||
if offline:
|
||||
raise UsageError("Cannot determine database date from data in offline mode.")
|
||||
|
||||
# Else, find the node with the highest ID in the database
|
||||
with conn.cursor() as cur:
|
||||
if conn.table_exists('place'):
|
||||
osmid = cur.scalar("SELECT max(osm_id) FROM place WHERE osm_type='N'")
|
||||
|
||||
@@ -118,4 +118,4 @@ class CopyBuffer:
|
||||
"""
|
||||
if self.buffer.tell() > 0:
|
||||
self.buffer.seek(0)
|
||||
cur.copy_from(self.buffer, table, columns=columns) # type: ignore[no-untyped-call]
|
||||
cur.copy_from(self.buffer, table, columns=columns)
|
||||
|
||||
@@ -10,11 +10,13 @@ Server implementation using the falcon webserver framework.
|
||||
from typing import Optional, Mapping, cast, Any, List
|
||||
from pathlib import Path
|
||||
import datetime as dt
|
||||
import asyncio
|
||||
|
||||
from falcon.asgi import App, Request, Response
|
||||
|
||||
from nominatim.api import NominatimAPIAsync
|
||||
import nominatim.api.v1 as api_impl
|
||||
import nominatim.api.logging as loglib
|
||||
from nominatim.config import Configuration
|
||||
|
||||
class HTTPNominatimError(Exception):
|
||||
@@ -44,8 +46,15 @@ async def timeout_error_handler(req: Request, resp: Response, #pylint: disable=u
|
||||
per exception info.
|
||||
"""
|
||||
resp.status = 503
|
||||
resp.text = "Query took too long to process."
|
||||
resp.content_type = 'text/plain; charset=utf-8'
|
||||
|
||||
loglib.log().comment('Aborted: Query took too long to process.')
|
||||
logdata = loglib.get_and_disable()
|
||||
if logdata:
|
||||
resp.text = logdata
|
||||
resp.content_type = 'text/html; charset=utf-8'
|
||||
else:
|
||||
resp.text = "Query took too long to process."
|
||||
resp.content_type = 'text/plain; charset=utf-8'
|
||||
|
||||
|
||||
class ParamWrapper(api_impl.ASGIAdaptor):
|
||||
@@ -119,7 +128,7 @@ class FileLoggingMiddleware:
|
||||
resource: Optional[EndpointWrapper],
|
||||
req_succeeded: bool) -> None:
|
||||
""" Callback after requests writes to the logfile. It only
|
||||
writes logs for sucessful requests for search, reverse and lookup.
|
||||
writes logs for successful requests for search, reverse and lookup.
|
||||
"""
|
||||
if not req_succeeded or resource is None or resp.status != 200\
|
||||
or resource.name not in ('reverse', 'search', 'lookup', 'details'):
|
||||
@@ -164,6 +173,8 @@ def get_application(project_dir: Path,
|
||||
middleware=middleware)
|
||||
app.add_error_handler(HTTPNominatimError, nominatim_error_handler)
|
||||
app.add_error_handler(TimeoutError, timeout_error_handler)
|
||||
# different from TimeoutError in Python <= 3.10
|
||||
app.add_error_handler(asyncio.TimeoutError, timeout_error_handler)
|
||||
|
||||
legacy_urls = api.config.get_bool('SERVE_LEGACY_URLS')
|
||||
for name, func in api_impl.ROUTES:
|
||||
|
||||
@@ -10,11 +10,12 @@ Server implementation using the starlette webserver framework.
|
||||
from typing import Any, Optional, Mapping, Callable, cast, Coroutine, Dict, Awaitable
|
||||
from pathlib import Path
|
||||
import datetime as dt
|
||||
import asyncio
|
||||
|
||||
from starlette.applications import Starlette
|
||||
from starlette.routing import Route
|
||||
from starlette.exceptions import HTTPException
|
||||
from starlette.responses import Response, PlainTextResponse
|
||||
from starlette.responses import Response, PlainTextResponse, HTMLResponse
|
||||
from starlette.requests import Request
|
||||
from starlette.middleware import Middleware
|
||||
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
|
||||
@@ -22,6 +23,7 @@ from starlette.middleware.cors import CORSMiddleware
|
||||
|
||||
from nominatim.api import NominatimAPIAsync
|
||||
import nominatim.api.v1 as api_impl
|
||||
import nominatim.api.logging as loglib
|
||||
from nominatim.config import Configuration
|
||||
|
||||
class ParamWrapper(api_impl.ASGIAdaptor):
|
||||
@@ -114,6 +116,12 @@ async def timeout_error(request: Request, #pylint: disable=unused-argument
|
||||
_: Exception) -> Response:
|
||||
""" Error handler for query timeouts.
|
||||
"""
|
||||
loglib.log().comment('Aborted: Query took too long to process.')
|
||||
logdata = loglib.get_and_disable()
|
||||
|
||||
if logdata:
|
||||
return HTMLResponse(logdata)
|
||||
|
||||
return PlainTextResponse("Query took too long to process.", status_code=503)
|
||||
|
||||
|
||||
@@ -144,7 +152,8 @@ def get_application(project_dir: Path,
|
||||
middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file))
|
||||
|
||||
exceptions: Dict[Any, Callable[[Request, Exception], Awaitable[Response]]] = {
|
||||
TimeoutError: timeout_error
|
||||
TimeoutError: timeout_error,
|
||||
asyncio.TimeoutError: timeout_error
|
||||
}
|
||||
|
||||
async def _shutdown() -> None:
|
||||
|
||||
@@ -201,7 +201,7 @@ class AbstractTokenizer(ABC):
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def update_statistics(self) -> None:
|
||||
def update_statistics(self, config: Configuration) -> None:
|
||||
""" Recompute any tokenizer statistics necessary for efficient lookup.
|
||||
This function is meant to be called from time to time by the user
|
||||
to improve performance. However, the tokenizer must not depend on
|
||||
|
||||
@@ -31,6 +31,11 @@ DBCFG_TERM_NORMALIZATION = "tokenizer_term_normalization"
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
WORD_TYPES =(('country_names', 'C'),
|
||||
('postcodes', 'P'),
|
||||
('full_word', 'W'),
|
||||
('housenumbers', 'H'))
|
||||
|
||||
def create(dsn: str, data_dir: Path) -> 'ICUTokenizer':
|
||||
""" Create a new instance of the tokenizer provided by this module.
|
||||
"""
|
||||
@@ -62,7 +67,8 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
|
||||
if init_db:
|
||||
self.update_sql_functions(config)
|
||||
self._init_db_tables(config)
|
||||
self._setup_db_tables(config)
|
||||
self._create_base_indices(config, 'word')
|
||||
|
||||
|
||||
def init_from_project(self, config: Configuration) -> None:
|
||||
@@ -80,9 +86,7 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
""" Do any required postprocessing to make the tokenizer data ready
|
||||
for use.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
|
||||
self._create_lookup_indices(config, 'word')
|
||||
|
||||
|
||||
def update_sql_functions(self, config: Configuration) -> None:
|
||||
@@ -100,24 +104,39 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
self.init_from_project(config)
|
||||
|
||||
|
||||
def update_statistics(self) -> None:
|
||||
def update_statistics(self, config: Configuration) -> None:
|
||||
""" Recompute frequencies for all name words.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
if conn.table_exists('search_name'):
|
||||
with conn.cursor() as cur:
|
||||
cur.drop_table("word_frequencies")
|
||||
LOG.info("Computing word frequencies")
|
||||
cur.execute("""CREATE TEMP TABLE word_frequencies AS
|
||||
SELECT unnest(name_vector) as id, count(*)
|
||||
FROM search_name GROUP BY id""")
|
||||
cur.execute("CREATE INDEX ON word_frequencies(id)")
|
||||
LOG.info("Update word table with recomputed frequencies")
|
||||
cur.execute("""UPDATE word
|
||||
SET info = info || jsonb_build_object('count', count)
|
||||
FROM word_frequencies WHERE word_id = id""")
|
||||
cur.drop_table("word_frequencies")
|
||||
if not conn.table_exists('search_name'):
|
||||
return
|
||||
|
||||
with conn.cursor() as cur:
|
||||
LOG.info('Computing word frequencies')
|
||||
cur.drop_table('word_frequencies')
|
||||
cur.execute("""CREATE TEMP TABLE word_frequencies AS
|
||||
SELECT unnest(name_vector) as id, count(*)
|
||||
FROM search_name GROUP BY id""")
|
||||
cur.execute('CREATE INDEX ON word_frequencies(id)')
|
||||
LOG.info('Update word table with recomputed frequencies')
|
||||
cur.drop_table('tmp_word')
|
||||
cur.execute("""CREATE TABLE tmp_word AS
|
||||
SELECT word_id, word_token, type, word,
|
||||
(CASE WHEN wf.count is null THEN info
|
||||
ELSE info || jsonb_build_object('count', wf.count)
|
||||
END) as info
|
||||
FROM word LEFT JOIN word_frequencies wf
|
||||
ON word.word_id = wf.id""")
|
||||
cur.drop_table('word_frequencies')
|
||||
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
sqlp.run_string(conn,
|
||||
'GRANT SELECT ON tmp_word TO "{{config.DATABASE_WEBUSER}}"')
|
||||
conn.commit()
|
||||
self._create_base_indices(config, 'tmp_word')
|
||||
self._create_lookup_indices(config, 'tmp_word')
|
||||
self._move_temporary_word_table('tmp_word')
|
||||
|
||||
|
||||
|
||||
def _cleanup_housenumbers(self) -> None:
|
||||
@@ -195,19 +214,20 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
return list(s[0].split('@')[0] for s in cur)
|
||||
|
||||
|
||||
def _install_php(self, phpdir: Path, overwrite: bool = True) -> None:
|
||||
def _install_php(self, phpdir: Optional[Path], overwrite: bool = True) -> None:
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
assert self.loader is not None
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
if phpdir is not None:
|
||||
assert self.loader is not None
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', 10000000);
|
||||
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
|
||||
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
|
||||
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', 10000000);
|
||||
@define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
|
||||
@define('CONST_Transliteration', "{self.loader.get_search_rules()}");
|
||||
require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
|
||||
|
||||
|
||||
def _save_config(self) -> None:
|
||||
@@ -219,16 +239,84 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
self.loader.save_config_to_db(conn)
|
||||
|
||||
|
||||
def _init_db_tables(self, config: Configuration) -> None:
|
||||
def _setup_db_tables(self, config: Configuration) -> None:
|
||||
""" Set up the word table and fill it with pre-computed word
|
||||
frequencies.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.drop_table('word')
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
sqlp.run_string(conn, """
|
||||
CREATE TABLE word (
|
||||
word_id INTEGER,
|
||||
word_token text NOT NULL,
|
||||
type text NOT NULL,
|
||||
word text,
|
||||
info jsonb
|
||||
) {{db.tablespace.search_data}};
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_word;
|
||||
CREATE SEQUENCE seq_word start 1;
|
||||
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
|
||||
""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _create_base_indices(self, config: Configuration, table_name: str) -> None:
|
||||
""" Set up the word table and fill it with pre-computed word
|
||||
frequencies.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer_tables.sql')
|
||||
sqlp.run_string(conn,
|
||||
"""CREATE INDEX idx_{{table_name}}_word_token ON {{table_name}}
|
||||
USING BTREE (word_token) {{db.tablespace.search_index}}""",
|
||||
table_name=table_name)
|
||||
for name, ctype in WORD_TYPES:
|
||||
sqlp.run_string(conn,
|
||||
"""CREATE INDEX idx_{{table_name}}_{{idx_name}} ON {{table_name}}
|
||||
USING BTREE (word) {{db.tablespace.address_index}}
|
||||
WHERE type = '{{column_type}}'
|
||||
""",
|
||||
table_name=table_name, idx_name=name,
|
||||
column_type=ctype)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _create_lookup_indices(self, config: Configuration, table_name: str) -> None:
|
||||
""" Create additional indexes used when running the API.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
sqlp = SQLPreprocessor(conn, config)
|
||||
# Index required for details lookup.
|
||||
sqlp.run_string(conn, """
|
||||
CREATE INDEX IF NOT EXISTS idx_{{table_name}}_word_id
|
||||
ON {{table_name}} USING BTREE (word_id) {{db.tablespace.search_index}}
|
||||
""",
|
||||
table_name=table_name)
|
||||
conn.commit()
|
||||
|
||||
|
||||
def _move_temporary_word_table(self, old: str) -> None:
|
||||
""" Rename all tables and indexes used by the tokenizer.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.drop_table('word')
|
||||
cur.execute(f"ALTER TABLE {old} RENAME TO word")
|
||||
for idx in ('word_token', 'word_id'):
|
||||
cur.execute(f"""ALTER INDEX idx_{old}_{idx}
|
||||
RENAME TO idx_word_{idx}""")
|
||||
for name, _ in WORD_TYPES:
|
||||
cur.execute(f"""ALTER INDEX idx_{old}_{name}
|
||||
RENAME TO idx_word_{name}""")
|
||||
conn.commit()
|
||||
|
||||
|
||||
|
||||
|
||||
class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
""" The ICU analyzer uses the ICU library for splitting names.
|
||||
|
||||
|
||||
@@ -210,7 +210,7 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
self._save_config(conn, config)
|
||||
|
||||
|
||||
def update_statistics(self) -> None:
|
||||
def update_statistics(self, _: Configuration) -> None:
|
||||
""" Recompute the frequency of full words.
|
||||
"""
|
||||
with connect(self.dsn) as conn:
|
||||
@@ -269,15 +269,16 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
if config.lib_dir.php is not None:
|
||||
php_file = self.data_dir / "tokenizer.php"
|
||||
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
|
||||
require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
"""), encoding='utf-8')
|
||||
if not php_file.exists() or overwrite:
|
||||
php_file.write_text(dedent(f"""\
|
||||
<?php
|
||||
@define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
|
||||
@define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
|
||||
require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
|
||||
"""), encoding='utf-8')
|
||||
|
||||
|
||||
def _init_db_tables(self, config: Configuration) -> None:
|
||||
|
||||
@@ -60,5 +60,5 @@ class SanitizerHandler(Protocol):
|
||||
|
||||
Return:
|
||||
The result must be a callable that takes a place description
|
||||
and transforms name and address as reuqired.
|
||||
and transforms name and address as required.
|
||||
"""
|
||||
|
||||
@@ -11,6 +11,7 @@ from typing import Optional, Tuple, Any, cast
|
||||
import logging
|
||||
|
||||
from psycopg2.extras import Json, register_hstore
|
||||
from psycopg2 import DataError
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db.connection import connect, Cursor
|
||||
@@ -87,3 +88,19 @@ def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,
|
||||
|
||||
for msg in conn.notices:
|
||||
print(msg)
|
||||
|
||||
|
||||
def clean_deleted_relations(config: Configuration, age: str) -> None:
|
||||
""" Clean deleted relations older than a given age
|
||||
"""
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
with conn.cursor() as cur:
|
||||
try:
|
||||
cur.execute("""SELECT place_force_delete(p.place_id)
|
||||
FROM import_polygon_delete d, placex p
|
||||
WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
|
||||
AND age(p.indexed_date) > %s::interval""",
|
||||
(age, ))
|
||||
except DataError as exc:
|
||||
raise UsageError('Invalid PostgreSQL time interval format') from exc
|
||||
conn.commit()
|
||||
|
||||
@@ -13,9 +13,11 @@ from textwrap import dedent
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db.connection import connect, Connection
|
||||
from nominatim.db import properties
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.tokenizer import factory as tokenizer_factory
|
||||
from nominatim.tools import freeze
|
||||
from nominatim.version import NOMINATIM_VERSION, parse_version
|
||||
|
||||
CHECKLIST = []
|
||||
|
||||
@@ -125,7 +127,7 @@ def _get_indexes(conn: Connection) -> List[str]:
|
||||
|
||||
# CHECK FUNCTIONS
|
||||
#
|
||||
# Functions are exectured in the order they appear here.
|
||||
# Functions are executed in the order they appear here.
|
||||
|
||||
@_check(hint="""\
|
||||
{error}
|
||||
@@ -146,11 +148,52 @@ def check_connection(conn: Any, config: Configuration) -> CheckResult:
|
||||
|
||||
return CheckState.OK
|
||||
|
||||
@_check(hint="""\
|
||||
Database version ({db_version}) doesn't match Nominatim version ({nom_version})
|
||||
|
||||
Hints:
|
||||
* Are you connecting to the correct database?
|
||||
|
||||
{instruction}
|
||||
|
||||
Check the Migration chapter of the Administration Guide.
|
||||
|
||||
Project directory: {config.project_dir}
|
||||
Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN}
|
||||
""")
|
||||
def check_database_version(conn: Connection, config: Configuration) -> CheckResult:
|
||||
""" Checking database_version matches Nominatim software version
|
||||
"""
|
||||
|
||||
if conn.table_exists('nominatim_properties'):
|
||||
db_version_str = properties.get_property(conn, 'database_version')
|
||||
else:
|
||||
db_version_str = None
|
||||
|
||||
if db_version_str is not None:
|
||||
db_version = parse_version(db_version_str)
|
||||
|
||||
if db_version == NOMINATIM_VERSION:
|
||||
return CheckState.OK
|
||||
|
||||
instruction = (
|
||||
'Run migrations: nominatim admin --migrate'
|
||||
if db_version < NOMINATIM_VERSION
|
||||
else 'You need to upgrade the Nominatim software.'
|
||||
)
|
||||
else:
|
||||
instruction = ''
|
||||
|
||||
return CheckState.FATAL, dict(db_version=db_version_str,
|
||||
nom_version=NOMINATIM_VERSION,
|
||||
instruction=instruction,
|
||||
config=config)
|
||||
|
||||
@_check(hint="""\
|
||||
placex table not found
|
||||
|
||||
Hints:
|
||||
* Are you connecting to the right database?
|
||||
* Are you connecting to the correct database?
|
||||
* Did the import process finish without errors?
|
||||
|
||||
Project directory: {config.project_dir}
|
||||
|
||||
@@ -12,14 +12,13 @@ import os
|
||||
import subprocess
|
||||
import sys
|
||||
from pathlib import Path
|
||||
from typing import List, Optional, Tuple, Union, cast
|
||||
from typing import List, Optional, Tuple, Union
|
||||
|
||||
import psutil
|
||||
from psycopg2.extensions import make_dsn, parse_dsn
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.typing import DictCursorResults
|
||||
from nominatim.version import NOMINATIM_VERSION
|
||||
|
||||
|
||||
@@ -79,7 +78,7 @@ def from_file_find_line_portion(
|
||||
filename: str, start: str, sep: str, fieldnum: int = 1
|
||||
) -> Optional[str]:
|
||||
"""open filename, finds the line starting with the 'start' string.
|
||||
Splits the line using seperator and returns a "fieldnum" from the split."""
|
||||
Splits the line using separator and returns a "fieldnum" from the split."""
|
||||
with open(filename, encoding='utf8') as file:
|
||||
result = ""
|
||||
for line in file:
|
||||
@@ -107,15 +106,15 @@ def report_system_information(config: Configuration) -> None:
|
||||
postgresql_ver: str = convert_version(conn.server_version_tuple())
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f"""
|
||||
SELECT datname FROM pg_catalog.pg_database
|
||||
WHERE datname='{parse_dsn(config.get_libpq_dsn())['dbname']}'""")
|
||||
nominatim_db_exists = cast(Optional[DictCursorResults], cur.fetchall())
|
||||
if nominatim_db_exists:
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
postgis_ver: str = convert_version(conn.postgis_version_tuple())
|
||||
else:
|
||||
postgis_ver = "Unable to connect to database"
|
||||
num = cur.scalar("SELECT count(*) FROM pg_catalog.pg_database WHERE datname=%s",
|
||||
(parse_dsn(config.get_libpq_dsn())['dbname'], ))
|
||||
nominatim_db_exists = num == 1 if isinstance(num, int) else False
|
||||
|
||||
if nominatim_db_exists:
|
||||
with connect(config.get_libpq_dsn()) as conn:
|
||||
postgis_ver: str = convert_version(conn.postgis_version_tuple())
|
||||
else:
|
||||
postgis_ver = "Unable to connect to database"
|
||||
|
||||
postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))
|
||||
|
||||
|
||||
265
nominatim/tools/convert_sqlite.py
Normal file
265
nominatim/tools/convert_sqlite.py
Normal file
@@ -0,0 +1,265 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Exporting a Nominatim database to SQlite.
|
||||
"""
|
||||
from typing import Set, Any
|
||||
import datetime as dt
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
import sqlalchemy as sa
|
||||
|
||||
from nominatim.typing import SaSelect, SaRow
|
||||
from nominatim.db.sqlalchemy_types import Geometry, IntArray
|
||||
from nominatim.api.search.query_analyzer_factory import make_query_analyzer
|
||||
import nominatim.api as napi
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
async def convert(project_dir: Path, outfile: Path, options: Set[str]) -> None:
|
||||
""" Export an existing database to sqlite. The resulting database
|
||||
will be usable against the Python frontend of Nominatim.
|
||||
"""
|
||||
api = napi.NominatimAPIAsync(project_dir)
|
||||
|
||||
try:
|
||||
outapi = napi.NominatimAPIAsync(project_dir,
|
||||
{'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}",
|
||||
'NOMINATIM_DATABASE_RW': '1'})
|
||||
|
||||
try:
|
||||
async with api.begin() as src, outapi.begin() as dest:
|
||||
writer = SqliteWriter(src, dest, options)
|
||||
await writer.write()
|
||||
finally:
|
||||
await outapi.close()
|
||||
finally:
|
||||
await api.close()
|
||||
|
||||
|
||||
class SqliteWriter:
|
||||
""" Worker class which creates a new SQLite database.
|
||||
"""
|
||||
|
||||
def __init__(self, src: napi.SearchConnection,
|
||||
dest: napi.SearchConnection, options: Set[str]) -> None:
|
||||
self.src = src
|
||||
self.dest = dest
|
||||
self.options = options
|
||||
|
||||
|
||||
async def write(self) -> None:
|
||||
""" Create the database structure and copy the data from
|
||||
the source database to the destination.
|
||||
"""
|
||||
LOG.warning('Setting up spatialite')
|
||||
await self.dest.execute(sa.select(sa.func.InitSpatialMetaData(True, 'WGS84')))
|
||||
|
||||
await self.create_tables()
|
||||
await self.copy_data()
|
||||
if 'search' in self.options:
|
||||
await self.create_word_table()
|
||||
await self.create_indexes()
|
||||
|
||||
|
||||
async def create_tables(self) -> None:
|
||||
""" Set up the database tables.
|
||||
"""
|
||||
LOG.warning('Setting up tables')
|
||||
if 'search' not in self.options:
|
||||
self.dest.t.meta.remove(self.dest.t.search_name)
|
||||
else:
|
||||
await self.create_class_tables()
|
||||
|
||||
await self.dest.connection.run_sync(self.dest.t.meta.create_all)
|
||||
|
||||
# Convert all Geometry columns to Spatialite geometries
|
||||
for table in self.dest.t.meta.sorted_tables:
|
||||
for col in table.c:
|
||||
if isinstance(col.type, Geometry):
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.RecoverGeometryColumn(table.name, col.name, 4326,
|
||||
col.type.subtype.upper(), 'XY')))
|
||||
|
||||
|
||||
async def create_class_tables(self) -> None:
|
||||
""" Set up the table that serve class/type-specific geometries.
|
||||
"""
|
||||
sql = sa.text("""SELECT tablename FROM pg_tables
|
||||
WHERE tablename LIKE 'place_classtype_%'""")
|
||||
for res in await self.src.execute(sql):
|
||||
for db in (self.src, self.dest):
|
||||
sa.Table(res[0], db.t.meta,
|
||||
sa.Column('place_id', sa.BigInteger),
|
||||
sa.Column('centroid', Geometry))
|
||||
|
||||
|
||||
async def create_word_table(self) -> None:
|
||||
""" Create the word table.
|
||||
This table needs the property information to determine the
|
||||
correct format. Therefore needs to be done after all other
|
||||
data has been copied.
|
||||
"""
|
||||
await make_query_analyzer(self.src)
|
||||
await make_query_analyzer(self.dest)
|
||||
src = self.src.t.meta.tables['word']
|
||||
dest = self.dest.t.meta.tables['word']
|
||||
|
||||
await self.dest.connection.run_sync(dest.create)
|
||||
|
||||
LOG.warning("Copying word table")
|
||||
async_result = await self.src.connection.stream(sa.select(src))
|
||||
|
||||
async for partition in async_result.partitions(10000):
|
||||
data = [{k: getattr(r, k) for k in r._fields} for r in partition]
|
||||
await self.dest.execute(dest.insert(), data)
|
||||
|
||||
await self.dest.connection.run_sync(sa.Index('idx_word_woken', dest.c.word_token).create)
|
||||
|
||||
|
||||
async def copy_data(self) -> None:
|
||||
""" Copy data for all registered tables.
|
||||
"""
|
||||
def _getfield(row: SaRow, key: str) -> Any:
|
||||
value = getattr(row, key)
|
||||
if isinstance(value, dt.datetime):
|
||||
if value.tzinfo is not None:
|
||||
value = value.astimezone(dt.timezone.utc)
|
||||
return value
|
||||
|
||||
for table in self.dest.t.meta.sorted_tables:
|
||||
LOG.warning("Copying '%s'", table.name)
|
||||
async_result = await self.src.connection.stream(self.select_from(table.name))
|
||||
|
||||
async for partition in async_result.partitions(10000):
|
||||
data = [{('class_' if k == 'class' else k): _getfield(r, k)
|
||||
for k in r._fields}
|
||||
for r in partition]
|
||||
await self.dest.execute(table.insert(), data)
|
||||
|
||||
# Set up a minimal copy of pg_tables used to look up the class tables later.
|
||||
pg_tables = sa.Table('pg_tables', self.dest.t.meta,
|
||||
sa.Column('schemaname', sa.Text, default='public'),
|
||||
sa.Column('tablename', sa.Text))
|
||||
await self.dest.connection.run_sync(pg_tables.create)
|
||||
data = [{'tablename': t} for t in self.dest.t.meta.tables]
|
||||
await self.dest.execute(pg_tables.insert().values(data))
|
||||
|
||||
|
||||
async def create_indexes(self) -> None:
|
||||
""" Add indexes necessary for the frontend.
|
||||
"""
|
||||
# reverse place node lookup needs an extra table to simulate a
|
||||
# partial index with adaptive buffering.
|
||||
await self.dest.execute(sa.text(
|
||||
""" CREATE TABLE placex_place_node_areas AS
|
||||
SELECT place_id, ST_Expand(geometry,
|
||||
14.0 * exp(-0.2 * rank_search) - 0.03) as geometry
|
||||
FROM placex
|
||||
WHERE rank_address between 5 and 25
|
||||
and osm_type = 'N'
|
||||
and linked_place_id is NULL """))
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.RecoverGeometryColumn('placex_place_node_areas', 'geometry',
|
||||
4326, 'GEOMETRY', 'XY')))
|
||||
await self.dest.execute(sa.select(sa.func.CreateSpatialIndex(
|
||||
'placex_place_node_areas', 'geometry')))
|
||||
|
||||
# Remaining indexes.
|
||||
await self.create_spatial_index('country_grid', 'geometry')
|
||||
await self.create_spatial_index('placex', 'geometry')
|
||||
await self.create_spatial_index('osmline', 'linegeo')
|
||||
await self.create_spatial_index('tiger', 'linegeo')
|
||||
await self.create_index('placex', 'place_id')
|
||||
await self.create_index('placex', 'parent_place_id')
|
||||
await self.create_index('placex', 'rank_address')
|
||||
await self.create_index('addressline', 'place_id')
|
||||
await self.create_index('postcode', 'place_id')
|
||||
await self.create_index('osmline', 'place_id')
|
||||
await self.create_index('tiger', 'place_id')
|
||||
|
||||
if 'search' in self.options:
|
||||
await self.create_spatial_index('postcode', 'geometry')
|
||||
await self.create_spatial_index('search_name', 'centroid')
|
||||
await self.create_index('search_name', 'place_id')
|
||||
await self.create_index('osmline', 'parent_place_id')
|
||||
await self.create_index('tiger', 'parent_place_id')
|
||||
await self.create_search_index()
|
||||
|
||||
for t in self.dest.t.meta.tables:
|
||||
if t.startswith('place_classtype_'):
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.CreateSpatialIndex(t, 'centroid')))
|
||||
|
||||
|
||||
async def create_spatial_index(self, table: str, column: str) -> None:
|
||||
""" Create a spatial index on the given table and column.
|
||||
"""
|
||||
await self.dest.execute(sa.select(
|
||||
sa.func.CreateSpatialIndex(getattr(self.dest.t, table).name, column)))
|
||||
|
||||
|
||||
async def create_index(self, table_name: str, column: str) -> None:
|
||||
""" Create a simple index on the given table and column.
|
||||
"""
|
||||
table = getattr(self.dest.t, table_name)
|
||||
await self.dest.connection.run_sync(
|
||||
sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)
|
||||
|
||||
|
||||
async def create_search_index(self) -> None:
|
||||
""" Create the tables and indexes needed for word lookup.
|
||||
"""
|
||||
LOG.warning("Creating reverse search table")
|
||||
rsn = sa.Table('reverse_search_name', self.dest.t.meta,
|
||||
sa.Column('word', sa.Integer()),
|
||||
sa.Column('column', sa.Text()),
|
||||
sa.Column('places', IntArray))
|
||||
await self.dest.connection.run_sync(rsn.create)
|
||||
|
||||
tsrc = self.src.t.search_name
|
||||
for column in ('name_vector', 'nameaddress_vector'):
|
||||
sql = sa.select(sa.func.unnest(getattr(tsrc.c, column)).label('word'),
|
||||
sa.func.ArrayAgg(tsrc.c.place_id).label('places'))\
|
||||
.group_by('word')
|
||||
|
||||
async_result = await self.src.connection.stream(sql)
|
||||
async for partition in async_result.partitions(100):
|
||||
data = []
|
||||
for row in partition:
|
||||
row.places.sort()
|
||||
data.append({'word': row.word,
|
||||
'column': column,
|
||||
'places': row.places})
|
||||
await self.dest.execute(rsn.insert(), data)
|
||||
|
||||
await self.dest.connection.run_sync(
|
||||
sa.Index('idx_reverse_search_name_word', rsn.c.word).create)
|
||||
|
||||
|
||||
def select_from(self, table: str) -> SaSelect:
|
||||
""" Create the SQL statement to select the source columns and rows.
|
||||
"""
|
||||
columns = self.src.t.meta.tables[table].c
|
||||
|
||||
if table == 'placex':
|
||||
# SQLite struggles with Geometries that are larger than 5MB,
|
||||
# so simplify those.
|
||||
return sa.select(*(c for c in columns if not isinstance(c.type, Geometry)),
|
||||
sa.func.ST_AsText(columns.centroid).label('centroid'),
|
||||
sa.func.ST_AsText(
|
||||
sa.case((sa.func.ST_MemSize(columns.geometry) < 5000000,
|
||||
columns.geometry),
|
||||
else_=sa.func.ST_SimplifyPreserveTopology(
|
||||
columns.geometry, 0.0001)
|
||||
)).label('geometry'))
|
||||
|
||||
sql = sa.select(*(sa.func.ST_AsText(c).label(c.name)
|
||||
if isinstance(c.type, Geometry) else c for c in columns))
|
||||
|
||||
return sql
|
||||
@@ -23,7 +23,8 @@ from nominatim.db.async_connection import DBConnection
|
||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||
from nominatim.tools.exec_utils import run_osm2pgsql
|
||||
from nominatim.errors import UsageError
|
||||
from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
|
||||
from nominatim.version import POSTGRESQL_REQUIRED_VERSION, \
|
||||
POSTGIS_REQUIRED_VERSION
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -38,6 +39,25 @@ def _require_version(module: str, actual: Tuple[int, int], expected: Tuple[int,
|
||||
raise UsageError(f'{module} is too old.')
|
||||
|
||||
|
||||
def _require_loaded(extension_name: str, conn: Connection) -> None:
|
||||
""" Check that the given extension is loaded. """
|
||||
if not conn.extension_loaded(extension_name):
|
||||
LOG.fatal('Required module %s is not loaded.', extension_name)
|
||||
raise UsageError(f'{extension_name} is not loaded.')
|
||||
|
||||
|
||||
def check_existing_database_plugins(dsn: str) -> None:
|
||||
""" Check that the database has the required plugins installed."""
|
||||
with connect(dsn) as conn:
|
||||
_require_version('PostgreSQL server',
|
||||
conn.server_version_tuple(),
|
||||
POSTGRESQL_REQUIRED_VERSION)
|
||||
_require_version('PostGIS',
|
||||
conn.postgis_version_tuple(),
|
||||
POSTGIS_REQUIRED_VERSION)
|
||||
_require_loaded('hstore', conn)
|
||||
|
||||
|
||||
def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
|
||||
""" Create a new database for Nominatim and populate it with the
|
||||
essential extensions.
|
||||
|
||||
@@ -31,7 +31,7 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
|
||||
"""
|
||||
env = get_pg_env(options['dsn'])
|
||||
cmd = [str(options['osm2pgsql']),
|
||||
'--hstore', '--latlon', '--slim',
|
||||
'--slim',
|
||||
'--log-progress', 'true',
|
||||
'--number-processes', '1' if options['append'] else str(options['threads']),
|
||||
'--cache', str(options['osm2pgsql_cache']),
|
||||
@@ -43,7 +43,7 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
|
||||
os.environ.get('LUAPATH', ';')))
|
||||
cmd.extend(('--output', 'flex'))
|
||||
else:
|
||||
cmd.extend(('--output', 'gazetteer'))
|
||||
cmd.extend(('--output', 'gazetteer', '--hstore', '--latlon'))
|
||||
|
||||
cmd.append('--append' if options['append'] else '--create')
|
||||
|
||||
|
||||
@@ -213,6 +213,10 @@ def _quote_php_variable(var_type: Type[Any], config: Configuration,
|
||||
def setup_website(basedir: Path, config: Configuration, conn: Connection) -> None:
|
||||
""" Create the website script stubs.
|
||||
"""
|
||||
if config.lib_dir.php is None:
|
||||
LOG.info("Python frontend does not require website setup. Skipping.")
|
||||
return
|
||||
|
||||
if not basedir.exists():
|
||||
LOG.info('Creating website directory.')
|
||||
basedir.mkdir()
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user