Compare commits

..

10 Commits

Author SHA1 Message Date
Sarah Hoffmann
50c3890985 adapt to release 4.3.4 2023-11-17 17:06:43 +01:00
Sarah Hoffmann
f145b466b9 add hint about migrating to 4.2.3 2023-04-24 11:37:17 +02:00
Sarah Hoffmann
928e56f668 adapt to release 4.2.3 2023-04-11 16:27:55 +02:00
Sarah Hoffmann
bfa5f44bf1 adapt to release 4.2.2 2023-03-22 21:00:37 +01:00
Sarah Hoffmann
3757b9f04a use canonical url for nominatim.org 2023-03-22 20:58:45 +01:00
Frederik Ramm
debcf9d54e Fix typo in NOMINATIM_LOG_FILE (#2919)
* fix typo in docs (NOMINATIM_LOG_FILE uses s not ms)
2023-03-22 20:57:30 +01:00
Sarah Hoffmann
738603ad66 add FAQ about finding bad postcodes 2023-03-22 20:57:22 +01:00
Sarah Hoffmann
f8a055b366 adapt to release 4.2.1 2023-02-20 20:34:47 +01:00
Sarah Hoffmann
d71b07d19e fix internal links 2023-02-04 21:11:49 +01:00
Sarah Hoffmann
64eaa6e272 adapt to 4.2.0 release 2022-11-24 10:58:56 +01:00
296 changed files with 4423 additions and 25333 deletions

View File

@@ -1,7 +0,0 @@
# https://github.com/codespell-project/codespell
[codespell]
skip = ./man/nominatim.1,data,./docs/styles.css,lib-php,module,munin,osm2pgsql,./test,./settings/*.lua,./settings/*.yaml,./settings/**/*.yaml,./settings/icu-rules,./nominatim/tokenizer/token_analysis/config_variants.py
# Need to be lowercase in the list
# Unter = Unter den Linden (an example address)
ignore-words-list = inout,unter

View File

@@ -7,13 +7,10 @@ assignees: ''
---
<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first.
Do not send screen shots! Copy any console output directly into the issue.
-->
<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first. -->
**Describe the bug**
<!-- A clear and concise description of what the bug is.-->
<!-- A clear and concise description of what the bug is. -->
**To Reproduce**
<!-- Please describe what you did to get to the issue. -->
@@ -28,15 +25,12 @@ assignees: ''
- RAM:
- number of CPUs:
- type and size of disks:
- bare metal/AWS/other cloud service:
**Postgresql Configuration:**
<!-- List any configuration items you changed in your postgresql configuration. -->
**Nominatim Configuration:**
<!-- List the contents of your customized `.env` file. -->
**Additional context**
<!-- Add any other context about the problem here. -->

View File

@@ -1,10 +1,10 @@
name: 'Build Nominatim'
inputs:
flavour:
ubuntu:
description: 'Version of Ubuntu to install on'
required: false
default: 'ubuntu-20'
default: '20'
cmake-args:
description: 'Additional options to hand to cmake'
required: false
@@ -23,18 +23,17 @@ runs:
sudo rm -rf /opt/hostedtoolcache/go /opt/hostedtoolcache/CodeQL /usr/lib/jvm /usr/local/share/chromium /usr/local/lib/android
df -h
shell: bash
- name: Install${{ matrix.flavour }} prerequisites
- name: Install prerequisites
run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
if [ "$FLAVOUR" == "oldstuff" ]; then
pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg aiosqlite
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION}
if [ "x$UBUNTUVER" == "x18" ]; then
pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 datrie
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
pip3 install sqlalchemy psycopg aiosqlite
fi
shell: bash
env:
FLAVOUR: ${{ inputs.flavour }}
UBUNTUVER: ${{ inputs.ubuntu }}
CMAKE_ARGS: ${{ inputs.cmake-args }}
LUA_VERSION: ${{ inputs.lua }}

View File

@@ -15,9 +15,7 @@ runs:
- name: Remove existing PostgreSQL
run: |
sudo apt-get purge -yq postgresql*
sudo apt install curl ca-certificates gnupg
curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/apt.postgresql.org.gpg >/dev/null
sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
sudo apt-get update -qq
shell: bash

View File

@@ -7,11 +7,11 @@ jobs:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: actions/checkout@v3
with:
submodules: true
- uses: actions/cache@v4
- uses: actions/cache@v3
with:
path: |
data/country_osm_grid.sql.gz
@@ -27,7 +27,7 @@ jobs:
mv nominatim-src.tar.bz2 Nominatim
- name: 'Upload Artifact'
uses: actions/upload-artifact@v4
uses: actions/upload-artifact@v3
with:
name: full-source
path: nominatim-src.tar.bz2
@@ -37,38 +37,45 @@ jobs:
needs: create-archive
strategy:
matrix:
flavour: [oldstuff, "ubuntu-20", "ubuntu-22"]
ubuntu: [18, 20, 22]
include:
- flavour: oldstuff
ubuntu: 20
postgresql: '9.6'
postgis: '2.5'
lua: '5.1'
- flavour: ubuntu-20
ubuntu: 20
- ubuntu: 18
postgresql: 9.6
postgis: 2.5
pytest: pytest
php: 7.2
- ubuntu: 20
postgresql: 13
postgis: 3
lua: '5.3'
- flavour: ubuntu-22
ubuntu: 22
pytest: py.test-3
php: 7.4
- ubuntu: 22
postgresql: 15
postgis: 3
lua: '5.3'
pytest: py.test-3
php: 8.1
runs-on: ubuntu-${{ matrix.ubuntu }}.04
steps:
- uses: actions/download-artifact@v4
- uses: actions/download-artifact@v3
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: ${{ matrix.php }}
tools: phpunit, phpcs, composer
ini-values: opcache.jit=disable
- uses: actions/setup-python@v4
with:
python-version: 3.7
if: matrix.flavour == 'oldstuff'
python-version: 3.6
if: matrix.ubuntu == 18
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
@@ -77,69 +84,59 @@ jobs:
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: ${{ matrix.flavour }}
lua: ${{ matrix.lua }}
ubuntu: ${{ matrix.ubuntu }}
- name: Install test prerequsites (behave from apt)
run: sudo apt-get install -y -qq python3-behave
if: matrix.flavour == 'ubuntu-20'
- name: Install test prerequsites
run: sudo apt-get install -y -qq python3-pytest python3-behave
if: matrix.ubuntu == 20
- name: Install test prerequsites (behave from pip)
run: pip3 install behave==1.2.6
if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')
- name: Install test prerequsites
run: pip3 install pylint pytest behave==1.2.6
if: ${{ (matrix.ubuntu == 18) || (matrix.ubuntu == 22) }}
- name: Install test prerequsites (from apt for Ununtu 2x)
run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
if: matrix.flavour != 'oldstuff'
- name: Install test prerequsites
run: sudo apt-get install -y -qq python3-pytest
if: matrix.ubuntu == 22
- name: Install newer pytest-asyncio
run: pip3 install -U pytest-asyncio==0.21.1
if: matrix.flavour == 'ubuntu-20'
- name: Install latest pylint/mypy
run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests typing-extensions
- name: Install test prerequsites (from pip for Ubuntu 18)
run: pip3 install pytest pytest-asyncio uvicorn
if: matrix.flavour == 'oldstuff'
- name: Install Python webservers
run: pip3 install falcon starlette asgi_lifespan
- name: Install latest pylint
run: pip3 install -U pylint
if: matrix.flavour != 'oldstuff'
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
- name: Python linting
run: python3 -m pylint nominatim
run: pylint nominatim
working-directory: Nominatim
if: matrix.flavour != 'oldstuff'
- name: Python static typechecking
run: mypy --strict nominatim
working-directory: Nominatim
- name: PHP unit tests
run: phpunit ./
working-directory: Nominatim/test/php
if: ${{ (matrix.ubuntu == 20) || (matrix.ubuntu == 22) }}
- name: Python unit tests
run: python3 -m pytest test/python
run: $PYTEST test/python
working-directory: Nominatim
env:
PYTEST: ${{ matrix.pytest }}
- name: BDD tests
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
working-directory: Nominatim/test/bdd
- name: Install mypy and typechecking info
run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions
if: matrix.flavour != 'oldstuff'
- name: Python static typechecking
run: python3 -m mypy --strict nominatim
working-directory: Nominatim
if: matrix.flavour != 'oldstuff'
legacy-test:
needs: create-archive
runs-on: ubuntu-20.04
strategy:
matrix:
postgresql: ["13", "16"]
steps:
- uses: actions/download-artifact@v4
- uses: actions/download-artifact@v3
with:
name: full-source
@@ -149,20 +146,19 @@ jobs:
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: '7.4'
php-version: 7.4
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: ${{ matrix.postgresql }}
postgresql-version: 13
postgis-version: 3
- name: Install Postgresql server dev
run: sudo apt-get install postgresql-server-dev-$PGVER
env:
PGVER: ${{ matrix.postgresql }}
run: sudo apt-get install postgresql-server-dev-13
- uses: ./Nominatim/.github/actions/build-nominatim
with:
ubuntu: 20
cmake-args: -DBUILD_MODULE=on
- name: Install test prerequsites
@@ -170,54 +166,7 @@ jobs:
- name: BDD tests (legacy tokenizer)
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php -DTOKENIZER=legacy --format=progress3
working-directory: Nominatim/test/bdd
php-test:
needs: create-archive
runs-on: ubuntu-22.04
steps:
- uses: actions/download-artifact@v4
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: 15
postgis-version: 3
- name: Setup PHP
uses: shivammathur/setup-php@v2
with:
php-version: 8.1
tools: phpunit:9, phpcs, composer
ini-values: opcache.jit=disable
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
- name: PHP unit tests
run: phpunit ./
working-directory: Nominatim/test/php
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: 'ubuntu-22'
- name: Install test prerequsites
run: sudo apt-get install -y -qq python3-behave
- name: BDD tests (php)
run: |
python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=php --format=progress3
behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy --format=progress3
working-directory: Nominatim/test/bdd
@@ -227,13 +176,20 @@ jobs:
strategy:
matrix:
name: [Ubuntu-20, Ubuntu-22]
name: [Ubuntu-18, Ubuntu-20, Ubuntu-22]
include:
- name: Ubuntu-18
flavour: ubuntu
image: "ubuntu:18.04"
ubuntu: 18
install_mode: install-nginx
- name: Ubuntu-20
flavour: ubuntu
image: "ubuntu:20.04"
ubuntu: 20
install_mode: install-apache
- name: Ubuntu-22
flavour: ubuntu
image: "ubuntu:22.04"
ubuntu: 22
install_mode: install-apache
@@ -256,6 +212,14 @@ jobs:
apt-get install -y git sudo wget
ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
shell: bash
if: matrix.flavour == 'ubuntu'
- name: Prepare container (CentOS)
run: |
dnf update -y
dnf install -y sudo glibc-langpack-en
shell: bash
if: matrix.flavour == 'centos'
- name: Setup import user
run: |
@@ -267,7 +231,7 @@ jobs:
OS: ${{ matrix.name }}
INSTALL_MODE: ${{ matrix.install_mode }}
- uses: actions/download-artifact@v4
- uses: actions/download-artifact@v3
with:
name: full-source
path: /home/nominatim
@@ -285,25 +249,26 @@ jobs:
- name: Prepare import environment
run: |
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
mv Nominatim/settings/flex-base.lua flex-base.lua
mv Nominatim/settings/import-extratags.lua import-extratags.lua
mv Nominatim/settings/taginfo.lua taginfo.lua
rm -rf Nominatim
mkdir data-env-reverse
working-directory: /home/nominatim
- name: Prepare import environment (CentOS)
run: |
sudo ln -s /usr/local/bin/nominatim /usr/bin/nominatim
echo NOMINATIM_DATABASE_WEBUSER="apache" > nominatim-project/.env
cp nominatim-project/.env data-env-reverse/.env
working-directory: /home/nominatim
if: matrix.flavour == 'centos'
- name: Print version
run: nominatim --version
working-directory: /home/nominatim/nominatim-project
- name: Print taginfo
run: lua taginfo.lua
working-directory: /home/nominatim
- name: Collect host OS information
run: nominatim admin --collect-os-info
working-directory: /home/nominatim/nominatim-project
- name: Import
run: nominatim import --osm-file ../test.pbf
working-directory: /home/nominatim/nominatim-project
@@ -323,6 +288,7 @@ jobs:
- name: Prepare update (Ubuntu)
run: apt-get install -y python3-pip
shell: bash
if: matrix.flavour == 'ubuntu'
- name: Run update
run: |
@@ -348,54 +314,3 @@ jobs:
- name: Clean up database (reverse-only import)
run: nominatim refresh --postcodes --word-tokens
working-directory: /home/nominatim/nominatim-project
install-no-superuser:
runs-on: ubuntu-latest
needs: create-archive
steps:
- uses: actions/download-artifact@v4
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: ./Nominatim/.github/actions/setup-postgresql
with:
postgresql-version: 16
postgis-version: 3
- uses: ./Nominatim/.github/actions/build-nominatim
with:
flavour: ubuntu-22
lua: 5.3
- name: Prepare import environment
run: |
mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
rm -rf Nominatim
- name: Prepare Database
run: |
nominatim import --prepare-database
- name: Create import user
run: |
sudo -u postgres createuser osm-import
psql -d nominatim -c "ALTER USER \"osm-import\" WITH PASSWORD 'osm-import'"
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "osm-import"'
- name: Run import
run: |
NOMINATIM_DATABASE_DSN="pgsql:host=127.0.0.1;dbname=nominatim;user=osm-import;password=osm-import" nominatim import --continue import-from-file --osm-file test.pbf
- name: Check full import
run: nominatim admin --check-database
codespell:
runs-on: ubuntu-latest
steps:
- uses: codespell-project/actions-codespell@v2
with:
only_warn: 1

View File

@@ -1,13 +1,9 @@
[mypy]
plugins = sqlalchemy.ext.mypy.plugin
[mypy-sanic_cors.*]
ignore_missing_imports = True
[mypy-icu.*]
ignore_missing_imports = True
[mypy-asyncpg.*]
[mypy-osmium.*]
ignore_missing_imports = True
[mypy-datrie.*]
@@ -15,9 +11,3 @@ ignore_missing_imports = True
[mypy-dotenv.*]
ignore_missing_imports = True
[mypy-falcon.*]
ignore_missing_imports = True
[mypy-geoalchemy2.*]
ignore_missing_imports = True

View File

@@ -1,6 +1,6 @@
[MASTER]
extension-pkg-whitelist=osmium,falcon
extension-pkg-whitelist=osmium
ignored-modules=icu,datrie
[MESSAGES CONTROL]
@@ -13,6 +13,6 @@ ignored-classes=NominatimArgs,closing
# 'too-many-ancestors' is triggered already by deriving from UserDict
# 'not-context-manager' disabled because it causes false positives once
# typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager,use-dict-literal,chained-comparison,attribute-defined-outside-init
disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager
good-names=i,j,x,y,m,t,fd,db,cc,x1,x2,y1,y2,pt,k,v,nr
good-names=i,x,y,m,fd,db,cc

View File

@@ -19,7 +19,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 4)
set(NOMINATIM_VERSION_MINOR 4)
set(NOMINATIM_VERSION_MINOR 2)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
@@ -73,7 +73,7 @@ endif()
#-----------------------------------------------------------------------------
if (BUILD_IMPORTER)
find_package(PythonInterp 3.7 REQUIRED)
find_package(PythonInterp 3.6 REQUIRED)
endif()
#-----------------------------------------------------------------------------
@@ -82,17 +82,26 @@ endif()
# Setting PHP binary variable as to command line (prevailing) or auto detect
if (BUILD_API)
if (BUILD_API OR BUILD_IMPORTER)
if (NOT PHP_BIN)
find_program (PHP_BIN php)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHP_BIN})
message(WARNING "PHP binary not found. Only Python frontend can be used.")
set(PHP_BIN "")
message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
else()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
if (NOT PHPCGI_BIN)
find_program (PHPCGI_BIN php-cgi)
endif()
# sanity check if PHP binary exists
if (NOT EXISTS ${PHPCGI_BIN})
message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
set (PHPCGI_BIN "")
else()
message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
endif()
endif()
#-----------------------------------------------------------------------------
@@ -224,18 +233,7 @@ if (BUILD_IMPORTER)
install(DIRECTORY nominatim
DESTINATION ${NOMINATIM_LIBDIR}/lib-python
FILES_MATCHING PATTERN "*.py"
PATTERN "paths.py" EXCLUDE
PATTERN __pycache__ EXCLUDE)
if (EXISTS ${PHP_BIN})
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
else()
configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py-no-php.tmpl paths-py.installed)
endif()
install(FILES ${PROJECT_BINARY_DIR}/paths-py.installed
DESTINATION ${NOMINATIM_LIBDIR}/lib-python/nominatim
RENAME paths.py)
install(DIRECTORY lib-sql DESTINATION ${NOMINATIM_LIBDIR})
install(FILES ${COUNTRY_GRID_FILE}
@@ -259,19 +257,18 @@ if (BUILD_MODULE)
DESTINATION ${NOMINATIM_LIBDIR}/module)
endif()
if (BUILD_API AND EXISTS ${PHP_BIN})
if (BUILD_API)
install(DIRECTORY lib-php DESTINATION ${NOMINATIM_LIBDIR})
endif()
install(FILES settings/env.defaults
settings/address-levels.json
settings/phrase-settings.json
settings/import-admin.lua
settings/import-street.lua
settings/import-address.lua
settings/import-full.lua
settings/import-extratags.lua
settings/flex-base.lua
settings/import-admin.style
settings/import-street.style
settings/import-address.style
settings/import-full.style
settings/import-extratags.style
settings/icu_tokenizer.yaml
settings/country_settings.yaml
DESTINATION ${NOMINATIM_CONFIGDIR})

View File

@@ -69,7 +69,7 @@ Before submitting a pull request make sure that the tests pass:
Nominatim follows semantic versioning. Major releases are done for large changes
that require (or at least strongly recommend) a reimport of the databases.
Minor releases can usually be applied to existing databases. Patch releases
Minor releases can usually be applied to exisiting databases Patch releases
contain bug fixes only and are released from a separate branch where the
relevant changes are cherry-picked from the master branch.

124
ChangeLog
View File

@@ -1,105 +1,3 @@
4.4.0
* add export to SQLite database and SQLite support for the frontend
* switch to Python frontend as the default frontend
* update to osm2pgsql 1.11.0
* add support for new osm2pgsql middle table format
* simplify geometry for large polygon objects not used in addresses
* various performance tweaks for search in Python frontend
* fix regression in search with categories where it was confused with near
search
* partially roll back use of SQLAlchemy lambda statements due to bugs
in SQLAchemy
* fix handling of timezones for timestamps from the database
* fix handling of full address searches in connection with a viewbox
* fix postcode computation of highway areas
* fix handling of timeout errors for Python <= 3.10
* fix address computation for postcode areas
* fix variable shadowing in osm2pgsql flex script, causing bugs with LuaJIT
* make sure extratags are always null when empty
* reduce importance of places without wikipedia reference
* improve performance of word count computations
* drop support for wikipedia tags with full URLs
* replace get_addressdata() SQL implementation with a Python function
* improve display name for non-address features
* fix postcode validation for postcodes with country code
(thanks @pawel-wroniszewski)
* add possibility to run imports without superuser database rights
(thanks @robbe-haesendonck)
* new CLI command for cleaning deleted relations (thanks @lujoh)
* add check for database version in the CLI check command
* updates to import styles ignoring more unused objects
* various typo fixes (thanks @kumarUjjawal)
4.3.2
* fix potential SQL injection issue for 'nominatim admin --collect-os-info'
* PHP frontend: fix on-the-fly lookup of postcode areas near boundaries
* Python frontend: improve handling of viewbox
* Python frontend: correct deployment instructions
4.3.1
* reintroduce result rematching
* improve search of multi-part names
* fix accidentally switched meaning of --reverse-only and --search-only in
warm command
4.3.0
* fix failing importance recalculation command
* fix merging of linked names into unnamed boundaries
* fix a number of corner cases with interpolation splitting resulting in
invalid geometries
* fix failure in website generation when password contains curly brackets
* fix broken use of ST_Project in PostGIS 3.4
* new NOMINATIM_SEARCH_WITHIN_COUNTRIES setting to restrict reverse lookups
to known countries (thanks @alfmarcua)
* allow negative OSM IDs (thanks @alfmarcua)
* disallow import of Tiger data in a frozen DB
* avoid UPDATE to change settings to be compatible with r/o DBs (thanks @t-tomek)
* update bundled osm2pgsql to 1.9.2
* reorganise osm2pgsql flex style and make it the default
* exclude names ending in :wikipedia from indexing
* no longer accept comma as a list separator in name tags
* process forward dependencies on update to catch updates in geometries
of ways and relations
* fix handling of isolated silent letters during transliteration
* no longer assign postcodes to large linear features like rivers
* introduce nominatim.paths module for finding data and libraries
* documentation layout changed to material theme
* new documentation section for library
* various smaller fixes to existing documentation
(thanks @woodpeck, @bloom256, @biswajit-k)
* updates to vagrant install scripts, drop support for Ubuntu 18
(thanks @n-timofeev)
* removed obsolete configuration variables from env.defaults
* add script for generating a taginfo description (thanks @biswajit-k)
* modernize Python code around BDD test and add testing of Python frontend
* lots of new BDD tests for API output
4.2.3
* fix deletion handling for 'nominatim add-data'
* adapt place_force_delete() to new deletion handling
* flex style: avoid dropping of postcode areas
* fix update errors on address interpolation handling
4.2.2
* extend flex-style library to fully support all default styles
* fix handling of Hebrew aleph
* do not assign postcodes to rivers
* fix string matching in PHP code
* update osm2pgsql (various updates to flex)
* fix slow query when deleting places on update
* fix CLI details query
* fix recalculation of importance values
* fix polygon simplification in reverse results
* add class/type information to reverse geocodejson result
* minor improvements to default tokenizer configuration
* various smaller fixes to documentation
4.2.1
* fix XSS vulnerability in debug view
4.2.0
* add experimental support for osm2pgsql flex style
@@ -123,10 +21,6 @@
* typing fixes to work with latest type annotations from typeshed
* smaller improvements to documentation (thanks to @mausch)
4.1.1
* fix XSS vulnerability in debug view
4.1.0
* switch to ICU tokenizer as default
@@ -163,10 +57,6 @@
* add setup instructions for updates and systemd
* drop support for PostgreSQL 9.5
4.0.2
* fix XSS vulnerability in debug view
4.0.1
* fix initialisation error in replication script
@@ -205,10 +95,6 @@
* add testing of installation scripts via CI
* drop support for Python < 3.6 and Postgresql < 9.5
3.7.3
* fix XSS vulnerability in debug view
3.7.2
* fix database check for reverse-only imports
@@ -284,7 +170,7 @@
* increase splitting for large geometries to improve indexing speed
* remove deprecated get_magic_quotes_gpc() function
* make sure that all postcodes have an entry in word and are thus searchable
* remove use of ST_Covers in conjunction with ST_Intersects,
* remove use of ST_Covers in conjunction woth ST_Intersects,
causes bad query planning and slow updates in Postgis3
* update osm2pgsql
@@ -341,7 +227,7 @@
* exclude postcode ranges separated by colon from centre point calculation
* update osm2pgsql, better handling of imports without flatnode file
* switch to more efficient algorithm for word set computation
* use only boundaries for country and state parts of addresses
* use only boundries for country and state parts of addresses
* improve updates of addresses with housenumbers and interpolations
* remove country from place_addressline table and use country_code instead
* optimise indexes on search_name partition tables
@@ -380,7 +266,7 @@
* complete rewrite of reverse search algorithm
* add new geojson and geocodejson output formats
* add simple export script to export addresses to CSV
* add simple export script to exprot addresses to CSV
* remove is_in terms from address computation
* remove unused search_name_country tables
* various smaller fixes to query parsing
@@ -445,7 +331,7 @@
* move installation documentation into this repo
* add self-documenting vagrant scripts
* remove --create-website, recommend to use website directory in build
* add accessor functions for URL parameters and improve error checking
* add accessor functions for URL parameters and improve erro checking
* remove IP blocking and rate-limiting code
* enable CI via travis
* reformatting for more consistent coding style
@@ -456,7 +342,7 @@
* update to refactored osm2pgsql which use libosmium based types
* switch from osmosis to pyosmium for updates
* be more strict when matching against special search terms
* handle postcode entries with multiple values correctly
* handle postcode entries with mutliple values correctly
2.5

View File

@@ -9,10 +9,11 @@ versions.
| Version | End of support for security updates |
| ------- | ----------------------------------- |
| 4.4.x | 2026-03-07 |
| 4.3.x | 2025-09-07 |
| 4.2.x | 2024-11-24 |
| 4.1.x | 2024-08-05 |
| 4.0.x | 2023-11-02 |
| 3.7.x | 2023-04-05 |
| 3.6.x | 2022-12-12 |
## Reporting a Vulnerability
@@ -36,6 +37,4 @@ incident. Announcements will also be published at the
## List of Previous Incidents
* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html)
* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)

View File

@@ -1,6 +1,6 @@
# Install Nominatim in a virtual machine for development and testing
This document describes how you can install Nominatim inside a Ubuntu 22
This document describes how you can install Nominatim inside a Ubuntu 16
virtual machine on your desktop/laptop (host machine). The goal is to give
you a development environment to easily edit code and run the test suite
without affecting the rest of your system.
@@ -69,7 +69,8 @@ installation.
PHP errors are written to `/var/log/apache2/error.log`.
With `echo` and `var_dump()` you write into the output (HTML/XML/JSON) when
you either add `&debug=1` to the URL.
you either add `&debug=1` to the URL (preferred) or set
`@define('CONST_Debug', true);` in `settings/local.php`.
In the Python BDD test you can use `logger.info()` for temporary debug
statements.
@@ -129,10 +130,6 @@ and then
Yes, Vagrant and Virtualbox can be installed on MS Windows just fine. You need a 64bit
version of Windows.
##### Will it run on Apple Silicon?
You might need to replace Virtualbox with [Parallels](https://www.parallels.com/products/desktop/).
There is no free/open source version of Parallels.
##### Why Monaco, can I use another country?
@@ -144,12 +141,11 @@ No. Long running Nominatim installations will differ once new import features (o
bug fixes) get added since those usually only get applied to new/changed data.
Also this document skips the optional Wikipedia data import which affects ranking
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation)
for details.
of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation) for details.
##### Why Ubuntu? Can I test CentOS/Fedora/CoreOS/FreeBSD?
There used to be a Vagrant script for CentOS available, but the Nominatim directory
There is a Vagrant script for CentOS available, but the Nominatim directory
isn't symlinked/mounted to the host which makes development trickier. We used
it mainly for debugging installation with SELinux.
@@ -158,17 +154,14 @@ are slightly different, e.g. the name of the package manager, Apache2 package
name, location of files. We chose Ubuntu because that is closest to the
nominatim.openstreetmap.org production environment.
You can configure/download other Vagrant boxes from
[https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
You can configure/download other Vagrant boxes from [https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
##### How can I connect to an existing database?
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com`
and port `5432`. The Postgres username is `postgres`. You can edit the `.env` in your
project directory and point Nominatim to it.
NOMINATIM_DATABASE_DSN="pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
Let's say you have a Postgres database named `nominatim_it` on server `your-server.com` and port `5432`. The Postgres username is `postgres`. You can edit `settings/local.php` and point Nominatim to it.
pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
No data import or restarting necessary.
If the Postgres installation is behind a firewall, you can try
@@ -176,12 +169,11 @@ If the Postgres installation is behind a firewall, you can try
ssh -L 9999:localhost:5432 your-username@your-server.com
inside the virtual machine. It will map the port to `localhost:9999` and then
you edit `.env` file with
you edit `settings/local.php` with
NOMINATIM_DATABASE_DSN="pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it"
@define('CONST_Database_DSN', 'pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it');
To access postgres directly remember to specify the hostname,
e.g. `psql --host localhost --port 9999 nominatim_it`
To access postgres directly remember to specify the hostname, e.g. `psql --host localhost --port 9999 nominatim_it`
##### My computer is slow and the import takes too long. Can I start the virtual machine "in the cloud"?

90
Vagrantfile vendored
View File

@@ -17,14 +17,6 @@ Vagrant.configure("2") do |config|
checkout = "no"
end
config.vm.provider "hyperv" do |hv, override|
hv.memory = 2048
hv.linked_clone = true
if ENV['CHECKOUT'] != 'y' then
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: "smb", smb_host: ENV['SMB_HOST'] || ENV['COMPUTERNAME']
end
end
config.vm.provider "virtualbox" do |vb, override|
vb.gui = false
vb.memory = 2048
@@ -38,38 +30,11 @@ Vagrant.configure("2") do |config|
lv.memory = 2048
lv.nested = true
if ENV['CHECKOUT'] != 'y' then
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs', nfs_udp: false
override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs'
end
end
config.vm.define "ubuntu22", primary: true do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "ubuntu22-apache" do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout, "install-apache"]
end
end
config.vm.define "ubuntu22-nginx" do |sub|
sub.vm.box = "generic/ubuntu2204"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-22.sh"
s.privileged = false
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "ubuntu20" do |sub|
config.vm.define "ubuntu", primary: true do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -78,7 +43,7 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "ubuntu20-apache" do |sub|
config.vm.define "ubuntu-apache" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -87,7 +52,7 @@ Vagrant.configure("2") do |config|
end
end
config.vm.define "ubuntu20-nginx" do |sub|
config.vm.define "ubuntu-nginx" do |sub|
sub.vm.box = "generic/ubuntu2004"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -95,4 +60,51 @@ Vagrant.configure("2") do |config|
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "ubuntu18" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "ubuntu18-apache" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout, "install-apache"]
end
end
config.vm.define "ubuntu18-nginx" do |sub|
sub.vm.box = "generic/ubuntu1804"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Ubuntu-18.sh"
s.privileged = false
s.args = [checkout, "install-nginx"]
end
end
config.vm.define "centos7" do |sub|
sub.vm.box = "centos/7"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Centos-7.sh"
s.privileged = false
s.args = [checkout]
end
end
config.vm.define "centos" do |sub|
sub.vm.box = "generic/centos8"
sub.vm.provision :shell do |s|
s.path = "vagrant/Install-on-Centos-8.sh"
s.privileged = false
s.args = [checkout]
end
end
end

View File

@@ -1,15 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Path settings for extra data used by Nominatim (installed version).
"""
from pathlib import Path
PHPLIB_DIR = None
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()

View File

@@ -1,15 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2022 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Path settings for extra data used by Nominatim (installed version).
"""
from pathlib import Path
PHPLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-php').resolve()
SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()

View File

@@ -4,10 +4,17 @@ import os
sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
from nominatim import cli
from nominatim import version
version.GIT_COMMIT_HASH = '@GIT_HASH@'
exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql'))
osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
phplib_dir='@NOMINATIM_LIBDIR@/lib-php',
sqllib_dir='@NOMINATIM_LIBDIR@/lib-sql',
data_dir='@NOMINATIM_DATADIR@',
config_dir='@NOMINATIM_CONFIGDIR@',
phpcgi_path='@PHPCGI_BIN@'))

View File

@@ -4,10 +4,17 @@ import os
sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
from nominatim import cli
from nominatim import version
version.GIT_COMMIT_HASH = '@GIT_HASH@'
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql'))
osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
phplib_dir='@CMAKE_SOURCE_DIR@/lib-php',
sqllib_dir='@CMAKE_SOURCE_DIR@/lib-sql',
data_dir='@CMAKE_SOURCE_DIR@/data',
config_dir='@CMAKE_SOURCE_DIR@/settings',
phpcgi_path='@PHPCGI_BIN@'))

View File

@@ -11,7 +11,6 @@ set (DOC_SOURCES
develop
api
customize
library
index.md
extra.css
styles.css
@@ -24,12 +23,13 @@ foreach (src ${DOC_SOURCES})
endforeach()
ADD_CUSTOM_TARGET(doc
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
)
ADD_CUSTOM_TARGET(serve-doc
COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve
WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
)

View File

@@ -5,35 +5,6 @@ your Nominatim database. It is assumed that you have already successfully
installed the Nominatim software itself, if not return to the
[installation page](Installation.md).
## Importing with a database user without superuser rights
Nominatim usually creates its own PostgreSQL database at the beginning of the
import process. This makes usage easier for the user but means that the
database user doing the import needs the appropriate rights.
If you prefer to run the import with a database user with limited rights,
you can do so by changing the import process as follows:
1. Run the command for database preparation with a database user with
superuser rights. For example, to use a db user 'dbadmin' for a
database 'nominatim', execute:
```
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=dbadmin" nominatim import --prepare-database
```
2. Grant the import user the right to create tables. For example, foe user 'import-user':
```
psql -d nominatim -c 'GRANT CREATE ON SCHEMA public TO "import-user"'
```
3. Now run the reminder of the import with the import user:
```
NOMINATIM_DATABASE_DSN="pgsql:dbname=nominatim;user=import-user" nominatim import --continue import-from-file --osm-file file.pbf
```
## Importing multiple regions (without updates)
To import multiple regions in your database you can simply give multiple
@@ -65,15 +36,16 @@ which has the following structure:
```bash
update
├── europe
├── andorra
│ │ └── sequence.state
└── monaco
└── sequence.state
└── tmp
└── europe
├── andorra-latest.osm.pbf
└── monaco-latest.osm.pbf
   ├── europe
   │   ├── andorra
   │   │   └── sequence.state
   │   └── monaco
   │   └── sequence.state
   └── tmp
└── europe
├── andorra-latest.osm.pbf
└── monaco-latest.osm.pbf
```
@@ -127,7 +99,7 @@ Change into the project directory and run the following command:
This will get diffs from the replication server, import diffs and index
the database. The default replication server in the
script ([Geofabrik](https://download.geofabrik.de)) provides daily updates.
script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
## Using an external PostgreSQL database

View File

@@ -1,140 +0,0 @@
# Deploying the Nominatim Python frontend
The Nominatim can be run as a Python-based
[ASGI web application](https://asgi.readthedocs.io/en/latest/). You have the
choice between [Falcon](https://falcon.readthedocs.io/en/stable/)
and [Starlette](https://www.starlette.io/) as the ASGI framework.
This section gives a quick overview on how to configure Nginx to serve
Nominatim. Please refer to the documentation of
[Nginx](https://nginx.org/en/docs/) for background information on how
to configure it.
!!! Note
Throughout this page, we assume your Nominatim project directory is
located in `/srv/nominatim-project` and you have installed Nominatim
using the default installation prefix `/usr/local`. If you have put it
somewhere else, you need to adjust the commands and configuration
accordingly.
We further assume that your web server runs as user `www-data`. Older
versions of CentOS may still use the user name `apache`. You also need
to adapt the instructions in this case.
### Installing the required packages
The recommended way to deploy a Python ASGI application is to run
the ASGI runner [uvicorn](https://uvicorn.org/)
together with [gunicorn](https://gunicorn.org/) HTTP server. We use
Falcon here as the web framework.
Create a virtual environment for the Python packages and install the necessary
dependencies:
``` sh
sudo apt install virtualenv
virtualenv /srv/nominatim-venv
/srv/nominatim-venv/bin/pip install SQLAlchemy PyICU psycopg[binary] \
psycopg2-binary python-dotenv PyYAML falcon uvicorn gunicorn
```
### Setting up Nominatim as a systemd job
Next you need to set up the service that runs the Nominatim frontend. This is
easiest done with a systemd job.
First you need to tell systemd to create a socket file to be used by
hunicorn. Create the following file `/etc/systemd/system/nominatim.socket`:
``` systemd
[Unit]
Description=Gunicorn socket for Nominatim
[Socket]
ListenStream=/run/nominatim.sock
SocketUser=www-data
[Install]
WantedBy=multi-user.target
```
Now you can add the systemd service for Nominatim itself.
Create the following file `/etc/systemd/system/nominatim.service`:
``` systemd
[Unit]
Description=Nominatim running as a gunicorn application
After=network.target
Requires=nominatim.socket
[Service]
Type=simple
Environment="PYTHONPATH=/usr/local/lib/nominatim/lib-python/"
User=www-data
Group=www-data
WorkingDirectory=/srv/nominatim-project
ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker nominatim.server.falcon.server:run_wsgi
ExecReload=/bin/kill -s HUP $MAINPID
StandardOutput=append:/var/log/gunicorn-nominatim.log
StandardError=inherit
PrivateTmp=true
TimeoutStopSec=5
KillMode=mixed
[Install]
WantedBy=multi-user.target
```
This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
its own Python process using
[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
connections to the database to serve requests in parallel.
Make the new services known to systemd and start it:
``` sh
sudo systemctl daemon-reload
sudo systemctl enable nominatim.socket
sudo systemctl start nominatim.socket
sudo systemctl enable nominatim.service
sudo systemctl start nominatim.service
```
This sets the service up, so that Nominatim is automatically started
on reboot.
### Configuring nginx
To make the service available to the world, you need to proxy it through
nginx. Add the following definition to the default configuration:
``` nginx
upstream nominatim_service {
server unix:/run/nominatim.sock fail_timeout=0;
}
server {
listen 80;
listen [::]:80;
root /var/www/html;
index /search;
location / {
proxy_set_header Host $http_host;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_redirect off;
proxy_pass http://nominatim_service;
}
}
```
Reload nginx with
```
sudo systemctl reload nginx
```
and you should be able to see the status of your server under
`http://localhost/status`.

View File

@@ -1,4 +1,4 @@
# Deploying Nominatim using the PHP frontend
# Deploying Nominatim
The Nominatim API is implemented as a PHP application. The `website/` directory
in the project directory contains the configured website. You can serve this
@@ -8,13 +8,13 @@ PHP scripts.
This section gives a quick overview on how to configure Apache and Nginx to
serve Nominatim. It is not meant as a full system administration guide on how
to run a web service. Please refer to the documentation of
[Apache](https://httpd.apache.org/docs/current/) and
[Apache](http://httpd.apache.org/docs/current/) and
[Nginx](https://nginx.org/en/docs/)
for background information on configuring the services.
!!! Note
Throughout this page, we assume your Nominatim project directory is
located in `/srv/nominatim-project` and you have installed Nominatim
Throughout this page, we assume that your Nominatim project directory is
located in `/srv/nominatim-project` and that you have installed Nominatim
using the default installation prefix `/usr/local`. If you have put it
somewhere else, you need to adjust the commands and configuration
accordingly.

View File

@@ -37,6 +37,40 @@ nominatim import --continue indexing
Otherwise it's best to start the full setup from the beginning.
### PHP "open_basedir restriction in effect" warnings
PHP Warning: file_get_contents(): open_basedir restriction in effect.
You need to adjust the
[open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir)
setting in your PHP configuration (`php.ini` file). By default this setting may
look like this:
open_basedir = /srv/http/:/home/:/tmp/:/usr/share/pear/
Either add reported directories to the list or disable this setting temporarily
by adding ";" at the beginning of the line. Don't forget to enable this setting
again once you are done with the PHP command line operations.
### PHP timezeone warnings
The Apache log may contain lots of PHP warnings like this:
`PHP Warning: date_default_timezone_set() function.`
You should set the default time zone as instructed in the warning in
your `php.ini` file. Find the entry about timezone and set it to
something like this:
; Defines the default timezone used by the date functions
; https://php.net/date.timezone
date.timezone = 'America/Denver'
Or
```
echo "date.timezone = 'America/Denver'" > /etc/php.d/timezone.ini
```
### nominatim.so version mismatch
@@ -136,7 +170,7 @@ recreate `nominatim.so`. Try
cmake $main_Nominatim_path && make
```
### Setup fails with "DB Error: extension not found"
### Setup.php fails with "DB Error: extension not found"
Make sure you have the PostgreSQL extensions "hstore" and "postgis" installed.
See the installation instructions for a full list of required packages.

View File

@@ -254,71 +254,26 @@ successfully.
nominatim admin --check-database
```
Now you can try out your installation by executing a simple query on the
command line:
Now you can try out your installation by running:
``` sh
nominatim search --query Berlin
```
or, when you have a reverse-only installation:
``` sh
nominatim reverse --lat 51 --lon 45
```
If you want to run Nominatim as a service, you need to make a choice between
running the modern Python frontend and the legacy PHP frontend.
Make sure you have installed the right packages as per
[Installation](Installation.md#software).
#### Testing the Python frontend
To run the test server against the Python frontend, you must choose a
web framework to use, either starlette or falcon. Make sure the appropriate
packages are installed. Then run
``` sh
```sh
nominatim serve
```
or, if you prefer to use Starlette instead of Falcon as webserver,
This runs a small test server normally used for development. You can use it
to verify that your installation is working. Go to
`http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
``` sh
nominatim serve --engine starlette
```
Note that search query is not supported for reverse-only imports. You can run a
reverse query, e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
or, for reverse-only installations a reverse query,
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
To run Nominatim via webservers like Apache or nginx, please read the
[Deployment chapter](Deployment.md).
Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the Python frontend](Deployment-Python.md).
## Adding search through category phrases
#### Testing the PHP frontend
You can run a small test server with the PHP frontend like this:
```sh
nominatim serve --engine php
```
Go to `http://localhost:8088/status.php` and you should see the message `OK`.
You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
or, for reverse-only installations a reverse query,
e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
Do not use this test server in production.
To run Nominatim via webservers like Apache or nginx, please continue reading
[Deploy the PHP frontend](Deployment-PHP.md).
## Enabling search by category phrases
To be able to search for places by their type using
If you want to be able to search for places by their type through
[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to import these key phrases like this:

View File

@@ -6,6 +6,7 @@ the following operating systems:
* [Ubuntu 22.04](../appendix/Install-on-Ubuntu-22.md)
* [Ubuntu 20.04](../appendix/Install-on-Ubuntu-20.md)
* [Ubuntu 18.04](../appendix/Install-on-Ubuntu-18.md)
These OS-specific instructions can also be found in executable form
in the `vagrant/` directory.
@@ -35,7 +36,6 @@ For compiling:
* [bzip2](http://www.bzip.org/)
* [zlib](https://www.zlib.net/)
* [ICU](http://site.icu-project.org/)
* [nlohmann/json](https://json.nlohmann.me/)
* [Boost libraries](https://www.boost.org/), including system and filesystem
* PostgreSQL client libraries
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
@@ -44,35 +44,23 @@ For running Nominatim:
* [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
* [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
* [Python 3](https://www.python.org/) (3.7+)
* [Python 3](https://www.python.org/) (3.6+)
* [Psycopg2](https://www.psycopg.org) (2.7+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
* [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
* [PyICU](https://pypi.org/project/PyICU/)
* [PyYaml](https://pyyaml.org/) (5.1+)
* [datrie](https://github.com/pytries/datrie)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
* PHP-cgi (for running queries from the command line)
For running continuous updates:
* [pyosmium](https://osmcode.org/pyosmium/)
For running the Python frontend:
* one of the following web frameworks:
* [falcon](https://falconframework.org/) (3.0+)
* [starlette](https://www.starlette.io/)
* [uvicorn](https://www.uvicorn.org/)
For running the legacy PHP frontend:
* [PHP](https://php.net) (7.3+)
* PHP-pgsql
* PHP-intl (bundled with PHP)
For dependencies for running tests and building documentation, see
the [Development section](../develop/Development-Environment.md).
@@ -87,7 +75,7 @@ Take into account that the OSM database is growing fast.
Fast disks are essential. Using NVME disks is recommended.
Even on a well configured machine the import of a full planet takes
around 2 days. When using traditional SSDs, 4-5 days are more realistic.
around 2 days. On traditional spinning disks, 7-8 days are more realistic.
## Tuning the PostgreSQL database
@@ -119,6 +107,15 @@ you might consider setting:
and even reduce `autovacuum_work_mem` further. This will reduce the amount
of memory that autovacuum takes away from the import process.
For the initial import, you should also set:
fsync = off
full_page_writes = off
Don't forget to re-enable them after the initial import or you risk database
corruption.
## Downloading and building Nominatim
### Downloading the latest release

View File

@@ -60,13 +60,16 @@ to finish the recomputation.
## Removing large deleted objects
Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up.
To run this command you will need to pass a PostgreSQL time interval. For example to
delete any objects that have been deleted more than a month ago you would run:
`nominatim admin --clean-deleted '1 month'`
There is currently no command to do that. You can use the following SQL
query to force a deletion on all objects that have been deleted more than
a certain timespan ago (here: 1 month):
```sql
SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
and age(p.indexed_date) > '1 month'::interval
```

View File

@@ -15,24 +15,13 @@ breaking changes. **Please read them before running the migration.**
If you are migrating from a version <3.6, then you still have to follow
the manual migration steps up to 3.6.
## 4.2.0 -> 4.3.0
## 4.2.2 -> 4.2.3
### New indexes for reverse lookup
### Update interpolation functions
The reverse lookup algorithm has changed slightly to improve performance.
This change needs a different index in the database. The required index
will be automatically build during migration. Until the new index is available
performance of the /reverse endpoint is significantly reduced. You should
therefore either remove traffic from the machine before attempting a
version update or create the index manually **before** starting the update
using the following SQL:
```sql
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
WHERE rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND linked_place_id is null AND osm_type = 'N';
```
When updating to this release, you need to run `nominatim refresh --functions`
after updating and before restarting updates. Otherwise you may see an error
`Splitting of Point geometries is unsupported` or similar.
## 4.0.0 -> 4.1.0

View File

@@ -59,6 +59,47 @@ imported multiple country extracts and want to keep them
up-to-date, [Advanced installations section](Advanced-Installations.md)
contains instructions to set up and update multiple country extracts.
#### Continuous updates
This is the easiest mode. Simply run the replication command without any
parameters:
nominatim replication
The update application keeps running forever and retrieves and applies
new updates from the server as they are published.
You can run this command as a simple systemd service. Create a service
description like that in `/etc/systemd/system/nominatim-updates.service`:
```
[Unit]
Description=Continuous updates of Nominatim
[Service]
WorkingDirectory=/srv/nominatim
ExecStart=nominatim replication
StandardOutput=append:/var/log/nominatim-updates.log
StandardError=append:/var/log/nominatim-updates.error.log
User=nominatim
Group=nominatim
Type=simple
[Install]
WantedBy=multi-user.target
```
Replace the `WorkingDirectory` with your project directory. Also adapt user
and group names as required.
Now activate the service and start the updates:
```
sudo systemctl daemon-reload
sudo systemctl enable nominatim-updates
sudo systemctl start nominatim-updates
```
#### One-time mode
When the `--once` parameter is given, then Nominatim will download exactly one
@@ -180,53 +221,3 @@ replication catch-up at whatever interval you desire.
updated source with daily updates), use the
continuous update mode. It ensures to re-request the newest update until it
is published.
#### Continuous updates
!!! danger
This mode is no longer recommended to use and will removed in future
releases. systemd is much better
suited for running regular updates. Please refer to the setup
instructions for running one-time mode with systemd above.
This is the easiest mode. Simply run the replication command without any
parameters:
nominatim replication
The update application keeps running forever and retrieves and applies
new updates from the server as they are published.
You can run this command as a simple systemd service. Create a service
description like that in `/etc/systemd/system/nominatim-updates.service`:
```
[Unit]
Description=Continuous updates of Nominatim
[Service]
WorkingDirectory=/srv/nominatim
ExecStart=nominatim replication
StandardOutput=append:/var/log/nominatim-updates.log
StandardError=append:/var/log/nominatim-updates.error.log
User=nominatim
Group=nominatim
Type=simple
[Install]
WantedBy=multi-user.target
```
Replace the `WorkingDirectory` with your project directory. Also adapt user
and group names as required.
Now activate the service and start the updates:
```
sudo systemctl daemon-reload
sudo systemctl enable nominatim-updates
sudo systemctl start nominatim-updates
```

View File

@@ -2,17 +2,13 @@
Show all details about a single place saved in the database.
This API endpoint is meant for visual inspection of the data in the database,
mainly together with [Nominatim-UI](https://github.com/osm-search/nominatim-ui/).
The parameters of the endpoint and the output may change occasionally between
versions of Nominatim. Do not rely on the output in scripts or applications.
!!! warning
The details endpoint at https://nominatim.openstreetmap.org
may not used in scripts or bots at all.
The details page exists for debugging only. You may not use it in scripts
or to automatically query details about a result.
See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
## Parameters
The details API supports the following two request formats:
@@ -39,90 +35,59 @@ for a place is different between Nominatim installation (servers) and
changes when data gets reimported. Therefore it cannot be used as
a permanent id and shouldn't be used in bug reports.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/details.php`. This is now deprecated
and will be removed in future versions.
## Parameters
This section lists additional optional parameters.
Additional optional parameters are explained below.
### Output format
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
* `json_callback=<string>`
When set, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| pretty | 0 or 1 | 0 |
* `pretty=[0|1]`
`[PHP-only]` Add indentation to the output to make it more human-readable.
Add indentation to make it more human-readable. (Default: 0)
### Output details
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
* `addressdetails=[0|1]`
When set to 1, include a breakdown of the address into elements.
Include a breakdown of the address into elements. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| keywords | 0 or 1 | 0 |
* `keywords=[0|1]`
When set to 1, include a list of name keywords and address keywords
in the result.
Include a list of name keywords and address keywords (word ids). (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| linkedplaces | 0 or 1 | 1 |
* `linkedplaces=[0|1]`
Include details of places that are linked with this one. Places get linked
Include a details of places that are linked with this one. Places get linked
together when they are different forms of the same physical object. Nominatim
links two kinds of objects together: place nodes get linked with the
corresponding administrative boundaries. Waterway relations get linked together with their
members.
(Default: 1)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| hierarchy | 0 or 1 | 0 |
* `hierarchy=[0|1]`
Include details of places lower in the address hierarchy.
Include details of places lower in the address hierarchy. (Default: 0)
`[Python-only]` will only return properly parented places. These are address
or POI-like places that reuse the address of their parent street or place.
* `group_hierarchy=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| group_hierarchy | 0 or 1 | 0 |
For JSON output will group the places by type. (Default: 0)
When set to 1, the output of the address hierarchy will be
grouped by type.
* `polygon_geojson=[0|1]`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
Include geometry of result.
Include geometry of result. (Default: 0)
### Language of results
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
* `accept-language=<browser language string>`
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
Preferred language order for showing result, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
## Examples

View File

@@ -3,7 +3,7 @@
The lookup API allows to query the address and other details of one or
multiple OSM objects like node, way or relation.
## Endpoint
## Parameters
The lookup API has the following format:
@@ -15,129 +15,75 @@ The lookup API has the following format:
prefixed with its type, one of node(N), way(W) or relation(R). Up to 50 ids
can be queried at the same time.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/lookup.php`. This is now deprecated
and will be removed in future versions.
## Parameters
This section lists additional optional parameters.
Additional optional parameters are explained below.
### Output format
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
* `format=[xml|json|jsonv2|geojson|geocodejson]`
See [Place Output Formats](Output.md) for details on each format.
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
* `json_callback=<string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
### Output details
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
* `addressdetails=[0|1]`
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
Include a breakdown of the address into elements. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
* `extratags=[0|1]`
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
* `namedetails=[0|1]`
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
### Language of results
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
* `accept-language=<browser language string>`
Preferred language order for showing search results, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
### Polygon output
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
* `polygon_threshold=0.0`
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
Return a simplified version of the output geometry. The parameter is the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the geometry.
geometry. Topology is preserved in the result. (Default: 0.0)
### Other
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
* `email=<valid email address>`
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
* `debug=[0|1]`
Output assorted developer debug information. Data on internals of Nominatim's
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
## Examples

View File

@@ -1,16 +1,8 @@
### Nominatim API
!!! Attention
The current version of Nominatim implements two different search frontends:
the old PHP frontend and the new Python frontend. They have a very similar
API but differ in some implementation details. These are marked in the
documentation as `[Python-only]` or `[PHP-only]`.
Nominatim indexes named (or numbered) features within the OpenStreetMap (OSM) dataset and a subset of other unnamed features (pubs, hotels, churches, etc).
`https://nominatim.openstreetmap.org` implements the **Python frontend**.
So users should refer to the **`[Python-only]`** comments.
This section describes the API V1 of the Nominatim web service. The
service offers the following endpoints:
Its API has the following endpoints for querying the data:
* __[/search](Search.md)__ - search OSM objects by name or type
* __[/reverse](Reverse.md)__ - search OSM object by their location
@@ -20,6 +12,3 @@ service offers the following endpoints:
back in Nominatim in case the deletion was accidental
* __/polygons__ - list of broken polygons detected by Nominatim
* __[/details](Details.md)__ - show internal details for an object (for debugging only)

View File

@@ -1,7 +1,6 @@
# Reverse Geocoding
Reverse geocoding generates an address from a coordinate given as
latitude and longitude.
Reverse geocoding generates an address from a latitude and longitude.
## How it works
@@ -19,7 +18,8 @@ The other issue to be aware of is that the closest OSM object may not always
have a similar enough address to the coordinate you were requesting. For
example, in dense city areas it may belong to a completely different street.
## Endpoint
## Parameters
The main format of the reverse API is
@@ -31,101 +31,57 @@ where `lat` and `lon` are latitude and longitude of a coordinate in WGS84
projection. The API returns exactly one result or an error when the coordinate
is in an area with no OSM data coverage.
Additional parameters are accepted as listed below.
!!! danger "Deprecation warning"
!!! warning "Deprecation warning"
The reverse API used to allow address lookup for a single OSM object by
its OSM id for `[PHP-only]`. The use is considered deprecated.
Use the [Address Lookup API](Lookup.md) instead.
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/reverse.php`. This is now deprecated
and will be removed in future versions.
## Parameters
This section lists additional parameters to further influence the output.
its OSM id. This use is now deprecated. Use the [Address Lookup API](Lookup.md)
instead.
### Output format
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `xml` |
* `format=[xml|json|jsonv2|geojson|geocodejson]`
See [Place Output Formats](Output.md) for details on each format.
See [Place Output Formats](Output.md) for details on each format. (Default: xml)
* `json_callback=<string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
### Output details
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 1 |
* `addressdetails=[0|1]`
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
Include a breakdown of the address into elements. (Default: 1)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
* `extratags=[0|1]`
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
* `namedetails=[0|1]`
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
### Language of results
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
* `accept-language=<browser language string>`
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
Preferred language order for showing search results, overrides the value
specified in the "Accept-Language" HTTP header.
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
### Result limitation
* `zoom=[0-18]`
### Result restriction
| Parameter | Value | Default |
|-----------| ----- | ------- |
| zoom | 0-18 | 18 |
Level of detail required for the address. This is a number that
Level of detail required for the address. Default: 18. This is a number that
corresponds roughly to the zoom level used in XYZ tile sources in frameworks
like Leaflet.js, Openlayers etc.
In terms of address details the zoom levels are as follows:
@@ -136,81 +92,41 @@ In terms of address details the zoom levels are as follows:
5 | state
8 | county
10 | city
12 | town / borough
13 | village / suburb
14 | neighbourhood
15 | any settlement
14 | suburb
16 | major streets
17 | major and minor streets
18 | building
| Parameter | Value | Default |
|-----------| ----- | ------- |
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
**`[Python-only]`**
The layer filter allows to select places by themes.
The `address` layer contains all places that make up an address:
address points with house numbers, streets, inhabited places (suburbs, villages,
cities, states etc.) and administrative boundaries.
The `poi` layer selects all point of interest. This includes classic points
of interest like restaurants, shops, hotels but also less obvious features
like recycling bins, guideposts or benches.
The `railway` layer includes railway infrastructure like tracks.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database.
The `natural` layer collects features like rivers, lakes and mountains while
the `manmade` layer functions as a catch-all for features not covered by the
other layers.
### Polygon output
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
* `polygon_threshold=0.0`
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
Return a simplified version of the output geometry. The parameter is the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the geometry.
geometry. Topology is preserved in the result. (Default: 0.0)
### Other
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
* `email=<valid email address>`
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
If you are making a large number of requests, please include an appropriate email
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
* `debug=[0|1]`
Output assorted developer debug information. Data on internals of Nominatim's
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
## Examples

View File

@@ -8,12 +8,12 @@ The search query may also contain
which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
This can be used to narrow down the kind of objects to be returned.
!!! note
!!! warning
Special phrases are not suitable to query all objects of a certain type in an
area. Nominatim will always just return a collection of the best matches. To
download OSM data by object type, use the [Overpass API](https://overpass-api.de/).
## Endpoint
## Parameters
The search API has the following format:
@@ -21,62 +21,35 @@ The search API has the following format:
https://nominatim.openstreetmap.org/search?<params>
```
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/search.php`. This is now deprecated
and will be removed in future versions.
The search term may be specified with two different sets of parameters:
The query term can be given in two different forms: free-form or structured.
* `q=<query>`
### Free-form query
Free-form query string to search for.
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
Commas are optional, but improve performance by reducing the complexity of the search.
| Parameter | Value |
|-----------| ----- |
| q | Free-form query string to search for |
In this form, the query can be unstructured.
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
Commas are optional, but improve performance by reducing the complexity of the search.
* `street=<housenumber> <streetname>`
* `city=<city>`
* `county=<county>`
* `state=<state>`
* `country=<country>`
* `postalcode=<postalcode>`
The free-form may also contain special phrases to describe the type of
place to be returned or a coordinate to search close to a position.
Alternative query string format split into several parameters for structured requests.
Structured requests are faster but are less robust against alternative
OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
### Structured query
| Parameter | Value |
|----------- | ----- |
| amenity | name and/or type of POI |
| street | housenumber and streetname |
| city | city |
| county | county |
| state | state |
| country | country |
| postalcode | postal code |
The structured form of the search query allows to lookup up an address
that is already split into its components. Each parameter represents a field
of the address. All parameters are optional. You should only use the ones
that are relevant for the address you want to geocode.
!!! Attention
Cannot be combined with the `q=<query>` parameter. Newer versions of
the API will return an error if you do so. Older versions simply return
unexpected results.
## Parameters
The following parameters can be used to further restrict the search and
change the output. They are usable for both forms of the search query.
Both query forms accept the additional parameters listed below.
### Output format
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
* `format=[xml|json|jsonv2|geojson|geocodejson]`
See [Place Output Formats](Output.md) for details on each format.
See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
!!! note
The Nominatim service at
@@ -84,150 +57,52 @@ See [Place Output Formats](Output.md) for details on each format.
has a different default behaviour for historical reasons. When the
`format` parameter is omitted, the request will be forwarded to the Web UI.
* `json_callback=<string>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| json_callback | function name | _unset_ |
When given, then JSON output will be wrapped in a callback function with
the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
information.
Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| limit | number | 10 |
Limit the maximum number of returned results. Cannot be more than 40.
Nominatim may decide to return less results than given, if additional
results do not sufficiently match the query.
### Output details
| Parameter | Value | Default |
|-----------| ----- | ------- |
| addressdetails | 0 or 1 | 0 |
* `addressdetails=[0|1]`
When set to 1, include a breakdown of the address into elements.
The exact content of the address breakdown depends on the output format.
!!! tip
If you are interested in a stable classification of address categories
(suburb, city, state, etc), have a look at the `geocodejson` format.
All other formats return classifications according to OSM tagging.
There is a much larger set of categories and they are not always consistent,
which makes them very hard to work with.
Include a breakdown of the address into elements. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| extratags | 0 or 1 | 0 |
* `extratags=[0|1]`
When set to 1, the response include any additional information in the result
that is available in the database, e.g. wikipedia link, opening hours.
Include additional information in the result if available,
e.g. wikipedia link, opening hours. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| namedetails | 0 or 1 | 0 |
* `namedetails=[0|1]`
When set to 1, include a full list of names for the result. These may include
language variants, older names, references and brand.
Include a list of alternative names in the results. These may include
language variants, references, operator and brand. (Default: 0)
### Language of results
| Parameter | Value | Default |
|-----------| ----- | ------- |
| accept-language | browser language string | content of "Accept-Language" HTTP header |
* `accept-language=<browser language string>`
Preferred language order for showing search results. This may either be
a simple comma-separated list of language codes or have the same format
as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
Preferred language order for showing search results, overrides the value
specified in the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
Either use a standard RFC2616 accept-language string or a simple
comma-separated list of language codes.
!!! tip
First-time users of Nominatim tend to be confused that they get different
results when using Nominatim in the browser versus in a command-line tool
like wget or curl. The command-line tools
usually don't send any Accept-Language header, prompting Nominatim
to show results in the local language. Browsers on the contratry always
send the currently chosen browser language.
### Result limitation
### Result restriction
* `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`
There are two ways to influence the results. *Filters* exclude certain
kinds of results completely. *Boost parameters* only change the order of the
results and thus give a preference to some results over others.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| countrycodes | comma-separated list of country codes | _unset_ |
Filer that limits the search results to one or more countries.
The country code must be the
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
of the country, e.g. `gb` for the United Kingdom, `de` for Germany.
Limit search results to one or more countries. `<countrycode>` must be the
[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
e.g. `gb` for the United Kingdom, `de` for Germany.
Each place in Nominatim is assigned to one country code based
on OSM country boundaries. In rare cases a place may not be in any country
at all, for example, when it is in international waters. These places are
also excluded when the filter is set.
at all, for example, in international waters.
!!! Note
This parameter should not be confused with the 'country' parameter of
the structured query. The 'country' parameter contains a search term
and will be handled with some fuzziness. The `countrycodes` parameter
is a hard filter and as such should be preferred. Having both parameters
in the same query will work. If the parameters contradict each other,
the search will come up empty.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
**`[Python-only]`**
The layer filter allows to select places by themes.
The `address` layer contains all places that make up an address:
address points with house numbers, streets, inhabited places (suburbs, villages,
cities, states tec.) and administrative boundaries.
The `poi` layer selects all point of interest. This includes classic POIs like
restaurants, shops, hotels but also less obvious features like recycling bins,
guideposts or benches.
The `railway` layer includes railway infrastructure like tracks.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database.
The `natural` layer collects features like rivers, lakes and mountains while
the `manmade` layer functions as a catch-all for features not covered by the
other layers.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| featureType | one of: `country`, `state`, `city`, `settlement` | _unset_ |
The featureType allows to have a more fine-grained selection for places
from the address layer. Results can be restricted to places that make up
the 'state', 'country' or 'city' part of an address. A featureType of
settlement selects any human inhabited feature from 'state' down to
'neighbourhood'.
When featureType is set, then results are automatically restricted
to the address layer (see above).
!!! tip
Instead of using the featureType filters `country`, `state` or `city`,
you can also use a structured query without the finer-grained parameters
amenity or street.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| exclude_place_ids | comma-separated list of place ids |
* `exclude_place_ids=<place_id,[place_id],[place_id]`
If you do not want certain OSM objects to appear in the search
result, give a comma separated list of the `place_id`s you want to skip.
@@ -235,212 +110,180 @@ This can be used to retrieve additional search results. For example, if a
previous query only returned a few results, then including those here would
cause the search to return other, less accurate, matches (if possible).
| Parameter | Value | Default |
|-----------| ----- | ------- |
| viewbox | `<x1>,<y1>,<x2>,<y2>` | _unset_ |
Boost parameter which focuses the search on the given area.
Any two corner points of the box are accepted as long as they make a proper
box. `x` is longitude, `y` is latitude.
* `limit=<integer>`
| Parameter | Value | Default |
|-----------| ----- | ------- |
| bounded | 0 or 1 | 0 |
Limit the number of returned results. (Default: 10, Maximum: 50)
When set to 1, then it turns the 'viewbox' parameter (see above) into
a filter parameter, excluding any results outside the viewbox.
When `bounded=1` is given and the viewbox is small enough, then an amenity-only
search is allowed. Give the special keyword for the amenity in square
* `viewbox=<x1>,<y1>,<x2>,<y2>`
The preferred area to find search results. Any two corner points of the box
are accepted as long as they span a real box. `x` is longitude,
`y` is latitude.
* `bounded=[0|1]`
When a viewbox is given, restrict the result to items contained within that
viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
only search is allowed. Give the special keyword for the amenity in square
brackets, e.g. `[pub]` and a selection of objects of this type is returned.
There is no guarantee that the result returns all objects in the area.
There is no guarantee that the result is complete. (Default: 0)
### Polygon output
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_geojson | 0 or 1 | 0 |
| polygon_kml | 0 or 1 | 0 |
| polygon_svg | 0 or 1 | 0 |
| polygon_text | 0 or 1 | 0 |
* `polygon_geojson=1`
* `polygon_kml=1`
* `polygon_svg=1`
* `polygon_text=1`
Add the full geometry of the place to the result output. Output formats
in GeoJSON, KML, SVG or WKT are supported. Only one of these
options can be used at a time.
Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
options can be used at a time. (Default: 0)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| polygon_threshold | floating-point number | 0.0 |
* `polygon_threshold=0.0`
When one of the polygon_* outputs is chosen, return a simplified version
of the output geometry. The parameter describes the
Return a simplified version of the output geometry. The parameter is the
tolerance in degrees with which the geometry may differ from the original
geometry. Topology is preserved in the geometry.
geometry. Topology is preserved in the result. (Default: 0.0)
### Other
| Parameter | Value | Default |
|-----------| ----- | ------- |
| email | valid email address | _unset_ |
* `email=<valid email address>`
If you are making large numbers of request please include an appropriate email
address to identify your requests. See Nominatim's
[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
| Parameter | Value | Default |
|-----------| ----- | ------- |
| dedupe | 0 or 1 | 1 |
* `dedupe=[0|1]`
Sometimes you have several objects in OSM identifying the same place or
object in reality. The simplest case is a street being split into many
different OSM ways due to different characteristics. Nominatim will
attempt to detect such duplicates and only return one match. Setting
this parameter to 0 disables this deduplication mechanism and
ensures that all results are returned.
attempt to detect such duplicates and only return one match unless
this parameter is set to 0. (Default: 1)
| Parameter | Value | Default |
|-----------| ----- | ------- |
| debug | 0 or 1 | 0 |
* `debug=[0|1]`
Output assorted developer debug information. Data on internals of Nominatim's
"search loop" logic, and SQL queries. The output is HTML format.
This overrides the specified machine readable format.
"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
This overrides the specified machine readable format. (Default: 0)
## Examples
##### XML with KML polygon
##### XML with kml polygon
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1)
* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)
```xml
<?xml version="1.0" encoding="UTF-8" ?>
<searchresults timestamp="Tue, 08 Aug 2023 15:45:41 +00:00"
attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright"
querystring="135 pilkington avenue, birmingham"
more_url="https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue%2C+birmingham&amp;polygon_kml=1&amp;addressdetails=1&amp;limit=20&amp;exclude_place_ids=125279639&amp;format=xml"
exclude_place_ids="125279639">
<place place_id="125279639"
osm_type="way"
osm_id="90394480"
lat="52.5487921"
lon="-1.8164308"
boundingbox="52.5487473,52.5488481,-1.8165130,-1.8163464"
place_rank="30"
address_rank="30"
display_name="135, Pilkington Avenue, Maney, Sutton Coldfield, Wylde Green, Birmingham, West Midlands Combined Authority, England, B72 1LH, United Kingdom"
class="building"
type="residential"
importance="9.999999994736442e-08">
<geokml>
<Polygon>
<outerBoundaryIs>
<LinearRing>
<coordinates>-1.816513,52.5487566 -1.816434,52.5487473 -1.816429,52.5487629 -1.8163717,52.5487561 -1.8163464,52.5488346 -1.8164599,52.5488481 -1.8164685,52.5488213 -1.8164913,52.548824 -1.816513,52.5487566</coordinates>
</LinearRing>
</outerBoundaryIs>
</Polygon>
</geokml>
<house_number>135</house_number>
<road>Pilkington Avenue</road>
<hamlet>Maney</hamlet>
<town>Sutton Coldfield</town>
<village>Wylde Green</village>
<city>Birmingham</city>
<ISO3166-2-lvl8>GB-BIR</ISO3166-2-lvl8>
<state_district>West Midlands Combined Authority</state_district>
<state>England</state>
<ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
<postcode>B72 1LH</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
</searchresults>
<searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
<place
place_id="1620612" osm_type="node" osm_id="452010817"
boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
lat="52.5487429714954" lon="-1.81602098644987"
display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
class="place" type="house">
<geokml>
<Polygon>
<outerBoundaryIs>
<LinearRing>
<coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
</LinearRing>
</outerBoundaryIs>
</Polygon>
</geokml>
<house_number>135</house_number>
<road>Pilkington Avenue</road>
<village>Wylde Green</village>
<town>Sutton Coldfield</town>
<city>City of Birmingham</city>
<county>West Midlands (county)</county>
<postcode>B72</postcode>
<country>United Kingdom</country>
<country_code>gb</country_code>
</place>
</searchresults>
```
##### JSON with SVG polygon
[https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1)
[https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1)
```json
[
{
"address": {
"ISO3166-2-lvl4": "DE-BE",
"borough": "Mitte",
"city": "Berlin",
"country": "Deutschland",
"country_code": "de",
"historic": "Kommandantenhaus",
"house_number": "1",
"neighbourhood": "Friedrichswerder",
"postcode": "10117",
"road": "Unter den Linden",
"suburb": "Mitte"
},
"boundingbox": [
"52.5170798",
"52.5173311",
"13.3975116",
"13.3981577"
],
"class": "historic",
"display_name": "Kommandantenhaus, 1, Unter den Linden, Friedrichswerder, Mitte, Berlin, 10117, Deutschland",
"importance": 0.8135042058306902,
"lat": "52.51720765",
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
"lon": "13.397834399325466",
"osm_id": 15976890,
"osm_type": "way",
"place_id": 108681845,
"svg": "M 13.3975116 -52.5172905 L 13.397549 -52.5170798 13.397715 -52.5170906 13.3977122 -52.5171064 13.3977392 -52.5171086 13.3977417 -52.5170924 13.3979655 -52.5171069 13.3979623 -52.5171233 13.3979893 -52.5171248 13.3979922 -52.5171093 13.3981577 -52.5171203 13.398121 -52.5173311 13.3978115 -52.5173103 Z",
"type": "house"
}
]
{
"address": {
"city": "Berlin",
"city_district": "Mitte",
"construction": "Unter den Linden",
"continent": "European Union",
"country": "Deutschland",
"country_code": "de",
"house_number": "1",
"neighbourhood": "Scheunenviertel",
"postcode": "10117",
"public_building": "Kommandantenhaus",
"state": "Berlin",
"suburb": "Mitte"
},
"boundingbox": [
"52.5170783996582",
"52.5173187255859",
"13.3975105285645",
"13.3981599807739"
],
"class": "amenity",
"display_name": "Kommandantenhaus, 1, Unter den Linden, Scheunenviertel, Mitte, Berlin, 10117, Deutschland, European Union",
"importance": 0.73606775332943,
"lat": "52.51719785",
"licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
"lon": "13.3978352028938",
"osm_id": "15976890",
"osm_type": "way",
"place_id": "30848715",
"svg": "M 13.397511 -52.517283599999999 L 13.397829400000001 -52.517299800000004 13.398131599999999 -52.517315099999998 13.398159400000001 -52.517112099999999 13.3975388 -52.517080700000001 Z",
"type": "public_building"
}
```
##### JSON with address details
[https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1](https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1)
[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
```json
[
{
"address": {
"ISO3166-2-lvl4": "DE-BE",
"borough": "Mitte",
"city": "Berlin",
"country": "Deutschland",
"country_code": "de",
"neighbourhood": "Sprengelkiez",
"postcode": "13347",
"road": "Lindower Straße",
"shop": "Ditsch",
"suburb": "Wedding"
},
"addresstype": "shop",
"boundingbox": [
"52.5427201",
"52.5427654",
"13.3668619",
"13.3669442"
],
"category": "shop",
"display_name": "Ditsch, Lindower Straße, Sprengelkiez, Wedding, Mitte, Berlin, 13347, Deutschland",
"importance": 9.99999999995449e-06,
"lat": "52.54274275",
"licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright",
"lon": "13.36690305710228",
"name": "Ditsch",
"osm_id": 437595031,
"osm_type": "way",
"place_id": 204751033,
"place_rank": 30,
"type": "bakery"
}
]
{
"address": {
"bakery": "B\u00e4cker Kamps",
"city_district": "Mitte",
"continent": "European Union",
"country": "Deutschland",
"country_code": "de",
"footway": "Bahnsteig U6",
"neighbourhood": "Sprengelkiez",
"postcode": "13353",
"state": "Berlin",
"suburb": "Wedding"
},
"boundingbox": [
"52.5460929870605",
"52.5460968017578",
"13.3591794967651",
"13.3591804504395"
],
"class": "shop",
"display_name": "B\u00e4cker Kamps, Bahnsteig U6, Sprengelkiez, Wedding, Mitte, Berlin, 13353, Deutschland, European Union",
"icon": "https://nominatim.openstreetmap.org/images/mapicons/shopping_bakery.p.20.png",
"importance": 0.201,
"lat": "52.5460941",
"licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
"lon": "13.35918",
"osm_id": "317179427",
"osm_type": "node",
"place_id": "1453068",
"type": "bakery"
}
```
##### GeoJSON

View File

@@ -1,50 +1,35 @@
# Status
Report on the state of the service and database. Useful for checking if the
service is up and running. The JSON output also reports
Useful for checking if the service and database is running. The JSON output also shows
when the database was last updated.
## Endpoint
The status API has the following format:
```
https://nominatim.openstreetmap.org/status
```
!!! danger "Deprecation warning"
The API can also be used with the URL
`https://nominatim.openstreetmap.org/status.php`. This is now deprecated
and will be removed in future versions.
## Parameters
The status endpoint takes a single optional parameter:
| Parameter | Value | Default |
|-----------| ----- | ------- |
| format | one of: `text`, `json` | 'text' |
Selects the output format. See below.
* `format=[text|json]` (defaults to 'text')
## Output
#### Text format
When everything is okay, a status code 200 is returned and a simple message: `OK`
```
https://nominatim.openstreetmap.org/status.php
```
On error it will return HTTP status code 500 and print a detailed error message, e.g.
will return HTTP status code 200 and print `OK`.
On error it will return HTTP status code 500 and print a message, e.g.
`ERROR: Database connection failed`.
#### JSON format
Always returns a HTTP code 200, when the status call could be executed.
```
https://nominatim.openstreetmap.org/status.php?format=json
```
On success a JSON dictionary with the following structure is returned:
will return HTTP code 200 and a structure
```json
{
@@ -60,8 +45,8 @@ The `software_version` field contains the version of Nominatim used to serve
the API. The `database_version` field contains the version of the data format
in the database.
On error will return a shorter JSON dictionary with the error message
and status only, e.g.
On error will also return HTTP status code 200 and a structure with error
code and message, e.g.
```json
{
@@ -69,3 +54,14 @@ and status only, e.g.
"message": "Database connection failed"
}
```
Possible status codes are
| | message | notes |
| --- | ------------------------------ | ----------------------------------------------------------------- |
| 700 | "No database" | connection failed |
| 701 | "Module failed" | database could not load nominatim.so |
| 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
| 703 | "Query failed" | test query against a database table failed |
| 704 | "No value" | test query worked but returned no results |
| 705 | "Import date is not available" | No import dates were returned (enabling replication can fix this) |

View File

@@ -1,439 +1,149 @@
## Configuring the Import
In the very first step of a Nominatim import, OSM data is loaded into the
database. Nominatim uses [osm2pgsql](https://osm2pgsql.org) for this task.
It comes with a [flex style](https://osm2pgsql.org/doc/manual.html#the-flex-output)
specifically tailored to filter and convert OSM data into Nominatim's
internal data representation.
Which OSM objects are added to the database and which of the tags are used
can be configured via the import style configuration file. This
is a JSON file which contains a list of rules which are matched against every
tag of every object and then assign the tag its specific role.
There are a number of default configurations for the flex style which
result in geocoding databases of different detail. The
[Import section](../admin/Import.md#filtering-imported-data) explains
these default configurations in detail.
The style to use is given by the `NOMINATIM_IMPORT_STYLE` configuration
option. There are a number of default styles, which are explained in detail
in the [Import section](../admin/Import.md#filtering-imported-data). These
standard styles may be referenced by their name.
You can also create your own custom style. Put the style file into your
project directory and then set `NOMINATIM_IMPORT_STYLE` to the name of the file.
It is always recommended to start with one of the standard styles and customize
those. You find the standard styles under the name `import-<stylename>.lua`
those. You find the standard styles under the name `import-<stylename>.style`
in the standard Nominatim configuration path (usually `/etc/nominatim` or
`/usr/local/etc/nominatim`).
The remainder of the page describes how the flex style works and how to
customize it.
The remainder of the page describes the format of the file.
### The `flex-base.lua` module
### Configuration Rules
The core of Nominatim's flex import configuration is the `flex-base` module.
It defines the table layout used by Nominatim and provides standard
implementations for the import callbacks that make it easy to customize
how OSM tags are used by Nominatim.
Every custom style should include this module to make sure that the correct
tables are created. Thus start your custom style as follows:
``` lua
local flex = require('flex-base')
A single rule looks like this:
```json
{
"keys" : ["key1", "key2", ...],
"values" : {
"value1" : "prop",
"value2" : "prop1,prop2"
}
}
```
The following sections explain how the module can be customized.
### Changing the recognized tags
If you just want to change which OSM tags are recognized during import,
then there are a number of convenience functions to set the tag lists used
during the processing.
!!! warning
There are no built-in defaults for the tag lists, so all the functions
need to be called from your style script to fully process the data.
Make sure you start from one of the default style and only modify
the data you are interested in. You can also derive your style from an
existing style by importing the appropriate module, e.g.
`local flex = require('import-street')`.
Many of the following functions take _key match lists_. These lists can
contain three kinds of strings to match against tag keys:
A string that ends in an asterisk `*` is a prefix match and accordingly matches
against any key that starts with the given string (minus the `*`).
A suffix match can be defined similarly with a string that starts with a `*`.
Any other string is matched exactly against tag keys.
#### `set_main_tags()` - principal tags
If a principal or main tag is found on an OSM object, then the object
is included in Nominatim's search index. A single object may also have
multiple main tags. In that case, the object will be included multiple
times in the index, once for each main tag.
The flex script distinguishes between four types of main tags:
* __always__: a main tag that is used unconditionally
* __named__: consider this main tag only, if the object has a proper name
(a reference is not enough, see below).
* __named_with_key__: consider this main tag only, when the object has
a proper name with a domain prefix. For example, if the main tag is
`bridge=yes`, then it will only be added as an extra row, if there is
a tag `bridge:name[:XXX]` for the same object. If this property is set,
all other names that are not domain-specific are ignored.
* __fallback__: use this main tag only, if there is no other main tag.
Fallback always implied `named`, i.e. fallbacks are only tried for
named objects.
The `set_main_tags()` function takes exactly one table parameter which
defines the keys and key/value combinations to include and the kind of
main tag. Each lua table key defines an OSM tag key. The value may
be a string defining the kind of main key as described above. Then the tag will
be considered a main tag for any possible value. To further restrict
which values are acceptable, give a table with the permitted values
and their kind of main tag. If the table contains a simple value without
key, then this is used as default for values that are not listed.
!!! example
``` lua
local flex = require('import-full')
flex.set_main_tags{
boundary = {administrative = 'named'},
highway = {'always', street_lamp = 'named'},
landuse = 'fallback'
}
```
In this example an object with a `boundary` tag will only be included
when it has a value of `administrative`. Objects with `highway` tags are
always included. However when the value is `street_lamp` then the object
must have a name, too. With any other value, the object is included
independently of the name. Finally, if a `landuse` tag is present then
it will be used independely of the concrete value if neither boundary
nor highway tags were found and the object is named.
#### `set_prefilters()` - ignoring tags
Pre-filtering of tags allows to ignore them for any further processing.
Thus pre-filtering takes precedence over any other tag processing. This is
useful when some specific key/value combinations need to be excluded from
processing. When tags are filtered, they may either be deleted completely
or moved to `extratags`. Extra tags are saved with the object and returned
to the user when requested, but are not used otherwise.
`set_prefilters()` takes a table with four optional fields:
* __delete_keys__ is a _key match list_ for tags that should be deleted
* __delete_tags__ contains a table of tag keys pointing to a list of tag
values. Tags with matching key/value pairs are deleted.
* __extra_keys__ is a _key match list_ for tags which should be saved into
extratags
* __extra_tags__ contains a table of tag keys pointing to a list of tag
values. Tags with matching key/value pairs are moved to extratags.
Key list may contain three kinds of strings:
A string that ends in an asterisk `*` is a prefix match and accordingly matches
against any key that starts with the given string (minus the `*`).
A suffix match can be defined similarly with a string that starts with a `*`.
Any other string is matched exactly against tag keys.
!!! example
``` lua
local flex = require('import-full')
flex.set_prefilters{
delete_keys = {'source', 'source:*'},
extra_tags = {amenity = {'yes', 'no'}}
}
flex.set_main_tags{
amenity = 'always'
}
```
In this example any tags `source` and tags that begin with `source:` are
deleted before any other processing is done. Getting rid of frequent tags
this way can speed up the import.
Tags with `amenity=yes` or `amenity=no` are moved to extratags. Later
all tags with an `amenity` key are made a main tag. This effectively means
that Nominatim will use all amenity tags except for those with value
yes and no.
#### `set_name_tags()` - defining names
The flex script distinguishes between two kinds of names:
* __main__: the primary names make an object fully searchable.
Main tags of type _named_ will only cause the object to be included when
such a primary name is present. Primary names are usually those found
in the `name` tag and its variants.
* __extra__: extra names are still added to the search index but they are
alone not sufficient to make an object named.
`set_name_tags()` takes a table with two optional fields `main` and `extra`.
They take _key match lists_ for main and extra names respectively.
!!! example
``` lua
local flex = require('flex-base')
flex.set_main_tags{highway = {traffic_light = 'named'}}
flex.set_name_tags{main = {'name', 'name:*'},
extra = {'ref'}
}
```
This example creates a search index over traffic lights but will
only include those that have a common name and not those which just
have some reference ID from the city.
#### `set_address_tags()` - defining address parts
Address tags will be used to build up the address of an object.
`set_address_tags()` takes a table with arbitrary fields pointing to
_key match lists_. To fields have a special meaning:
* __main__: defines
the tags that make a full address object out of the OSM object. This
is usually the housenumber or variants thereof. If a main address tag
appears, then the object will always be included, if necessary with a
fallback of `place=house`. If the key has a prefix of `addr:` or `is_in:`
this will be stripped.
* __extra__: defines all supplementary tags for addresses, tags like `addr:street`, `addr:city` etc. If the key has a prefix of `addr:` or `is_in:` this will be stripped.
All other fields will be handled as summary fields. If a key matches the
key match list, then its value will be added to the address tags with the
name of the field as key. If multiple tags match, then an arbitrary one
wins.
Country tags are handled slightly special. Only tags with a two-letter code
are accepted, all other values are discarded.
!!! example
``` lua
local flex = require('import-full')
flex.set_address_tags{
main = {'addr:housenumber'},
extra = {'addr:*'},
postcode = {'postal_code', 'postcode', 'addr:postcode'},
country = {'country-code', 'ISO3166-1'}
}
```
In this example all tags which begin with `addr:` will be saved in
the address tag list. If one of the tags is `addr:housenumber`, the
object will fall back to be entered as a `place=house` in the database
unless there is another interested main tag to be found.
Tags with keys `country-code` and `ISO3166-1` are saved with their
value under `country` in the address tag list. The same thing happens
to postcodes, they will always be saved under the key `postcode` thus
normalizing the multitude of keys that are used in the OSM database.
#### `set_unused_handling()` - processing remaining tags
This function defines what to do with tags that remain after all tags
have been classified using the functions above. There are two ways in
which the function can be used:
`set_unused_handling(delete_keys = ..., delete_tags = ...)` deletes all
keys that match the descriptions in the parameters and moves all remaining
tags into the extratags list.
`set_unused_handling(extra_keys = ..., extra_tags = ...)` moves all tags
matching the parameters into the extratags list and then deletes the remaining
tags. For the format of the parameters see the description in `set_prefilters()`
above.
A rule first defines a list of keys to apply the rule to. This is always a list
of strings. The string may have four forms. An empty string matches against
any key. A string that ends in an asterisk `*` is a prefix match and accordingly
matches against any key that starts with the given string (minus the `*`). A
suffix match can be defined similarly with a string that starts with a `*`. Any
other string constitutes an exact match.
!!! example
``` lua
local flex = require('import-full')
The second part of the rules defines a list of values and the properties that
apply to a successful match. Value strings may be either empty, which
means that they match any value, or describe an exact match. Prefix
or suffix matching of values is not possible.
flex.set_address_tags{
main = {'addr:housenumber'},
extra = {'addr:*', 'tiger:county'}
}
flex.set_unused_handling{delete_keys = {'tiger:*'}}
```
For a rule to match, it has to find a valid combination of keys and values. The
resulting property is that of the matched values.
In this example all remaining tags except those beginning with `tiger:`
are moved to the extratags list. Note that it is not possible to
already delete the tiger tags with `set_prefilters()` because that
would remove tiger:county before the address tags are processed.
### Customizing osm2pgsql callbacks
The rules in a configuration file are processed sequentially and the first
match for each tag wins.
osm2pgsql expects the flex style to implement three callbacks, one process
function per OSM type. If you want to implement special handling for
certain OSM types, you can override the default implementations provided
by the flex-base module.
A rule where key and value are the empty string is special. This defines the
fallback when none of the rules match. The fallback is always used as a last
resort when nothing else matches, no matter where the rule appears in the file.
Defining multiple fallback rules is not allowed. What happens in this case,
is undefined.
#### Changing the relation types to be handled
### Tag Properties
The default scripts only allows relations of type `multipolygon`, `boundary`
and `waterway`. To add other types relations, set `RELATION_TYPES` for
the type to the kind of geometry that should be created. The following
kinds of geometries can be used:
* __relation_as_multipolygon__ creates a (Multi)Polygon from the ways in
the relation. If the ways do not form a valid area, then the object is
silently discarded.
* __relation_as_multiline__ creates a (Multi)LineString from the ways in
the relations. Ways are combined as much as possible without any regards
to their order in the relation.
!!! Example
``` lua
local flex = require('import-full')
flex.RELATION_TYPES['site'] = flex.relation_as_multipolygon
```
With this line relations of `type=site` will be included in the index
according to main tags found. This only works when the site relation
resolves to a valid area. Nodes in the site relation are not part of the
geometry.
#### Adding additional logic to processing functions
The default processing functions are also exported by the flex-base module
as `process_node`, `process_way` and `process_relation`. These can be used
to implement your own processing functions with some additional processing
logic.
!!! Example
``` lua
local flex = require('import-full')
function osm2pgsql.process_relation(object)
if object.tags.boundary ~= 'administrative' or object.tags.admin_level ~= '2' then
flex.process_relation(object)
end
end
```
This example discards all country-level boundaries and uses standard
handling for everything else. This can be useful if you want to use
your own custom country boundaries.
### Customizing the main processing function
The main processing function of the flex style can be found in the function
`process_tags`. This function is called for all OSM object kinds and is
responsible for filtering the tags and writing out the rows into Postgresql.
!!! Example
``` lua
local flex = require('import-full')
local original_process_tags = flex.process_tags
function flex.process_tags(o)
if o.object.tags.highway ~= nil and o.object.tags.access == 'no' then
return
end
original_process_tags(o)
end
```
This example shows the most simple customization of the process_tags function.
It simply adds some additional processing before running the original code.
To do that, first save the original function and then overwrite process_tags
from the module. In this example all highways which are not accessible
by anyone will be ignored.
#### The `Place` class
The `process_tags` function receives a Lua object of `Place` type which comes
with some handy functions to collect the data necessary for geocoding and
writing it into the place table. Always use this object to fill the table.
The Place class has some attributes which you may access read-only:
* __object__ is the original OSM object data handed in by osm2pgsql
* __admin_level__ is the content of the admin_level tag, parsed into an
integer and normalized to a value between 0 and 15
* __has_name__ is a boolean indicating if the object has a full name
* __names__ is a table with the collected list of name tags
* __address__ is a table with the collected list of address tags
* __extratags__ is a table with the collected list of additional tags to save
There are a number of functions to fill these fields. All functions expect
a table parameter with fields as indicated in the description.
Many of these functions expect match functions which are described in detail
further below.
* __delete{match=...}__ removes all tags that match the match function given
in _match_.
* __grab_extratags{match=...}__ moves all tags that match the match function
given in _match_ into extratags. Returns the number of tags moved.
* __clean{delete=..., extra=...}__ deletes all tags that match _delete_ and
moves the ones that match _extra_ into extratags
* __grab_address_parts{groups=...}__ moves matching tags into the address table.
_groups_ must be a group match function. Tags of the group `main` and
`extra` are added to the address table as is but with `addr:` and `is_in:`
prefixes removed from the tag key. All other groups are added with the
group name as key and the value from the tag. Multiple values of the same
group overwrite each other. The function returns the number of tags saved
from the main group.
* __grab_main_parts{groups=...}__ moves matching tags into the name table.
_groups_ must be a group match function. If a tags of the group `main` is
present, the object will be marked as having a name. Tags of group `house`
produce a fallback to `place=house`. This fallback is return by the function
if present.
There are two functions to write a row into the place table. Both functions
expect the main tag (key and value) for the row and then use the collected
information from the name, address, extratags etc. fields to complete the row.
They also have a boolean parameter `save_extra_mains` which defines how any
unprocessed tags are handled: when True, the tags will be saved as extratags,
when False, they will be simply discarded.
* __write_row(key, value, save_extra_mains)__ creates a new table row from
the current state of the Place object.
* __write_place(key, value, mtype, save_extra_mains)__ creates a new row
conditionally. When value is nil, the function will attempt to look up the
value in the object tags. If value is still nil or mtype is nil, the row
is ignored. An mtype of `always` will then always write out the row,
a mtype of `named` only, when the object has a full name. When mtype
is `named_with_key`, the function checks for a domain name, i.e. a name
tag prefixed with the name of the main key. Only if at least one is found,
the row will be written. The names are replaced with the domain names found.
#### Match functions
The Place functions usually expect either a _match function_ or a
_group match function_ to find the tags to apply their function to.
The __match function__ is a Lua function which takes two parameters,
key and value, and returns a boolean to indicate that a tag matches. The
flex-base module has a convenience function `tag_match()` to create such a
function. It takes a table with two optional fields: `keys` takes a key match
list (see above), `tags` takes a table with keys that point to a list of
possible values, thus defining key/value matches.
The __group match function__ is a Lua function which also takes two parameters,
key and value, and returns a string indicating to which group or type they
belong to. The `tag_group()` can be used to create such a function. It expects
a table where the group names are the keys and the values are a key match list.
### Using the gazetteer output of osm2pgsql
Nominatim still allows you to configure the gazetteer output to remain
backwards compatible with older imports. It will be automatically used
when the style file name ends in `.style`. For documentation of the
old import style, please refer to the documentation of older releases
of Nominatim. Do not use the gazetteer output for new imports. There is no
guarantee that new versions of Nominatim are fully compatible with the
gazetteer output.
One or more of the following properties may be given for each tag:
* `main`
A principal tag. A new row will be added for the object with key and value
as `class` and `type`.
* `with_name`
When the tag is a principal tag (`main` property set): only really add a new
row, if there is any name tag found (a reference tag is not sufficient, see
below).
* `with_name_key`
When the tag is a principal tag (`main` property set): only really add a new
row, if there is also a name tag that matches the key of the principal tag.
For example, if the main tag is `bridge=yes`, then it will only be added as
an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
If this property is set, all other names that are not domain-specific are
ignored.
* `fallback`
When the tag is a principal tag (`main` property set): only really add a new
row, when no other principal tags for this object have been found. Only one
fallback tag can win for an object.
* `operator`
When the tag is a principal tag (`main` property set): also include the
`operator` tag in the list of names. This is a special construct for an
out-dated tagging practise in OSM. Fuel stations and chain restaurants
in particular used to have the name of the chain tagged as `operator`.
These days the chain can be more commonly found in the `brand` tag but
there is still enough old data around to warrant this special case.
* `name`
Add tag to the list of names.
* `ref`
Add tag to the list of names as a reference. At the moment this only means
that the object is not considered to be named for `with_name`.
* `address`
Add tag to the list of address tags. If the tag starts with `addr:` or
`is_in:`, then this prefix is cut off before adding it to the list.
* `postcode`
Add the value as a postcode to the address tags. If multiple tags are
candidate for postcodes, one wins out and the others are dropped.
* `country`
Add the value as a country code to the address tags. The value must be a
two letter country code, otherwise it is ignored. If there are multiple
tags that match, then one wins out and the others are dropped.
* `house`
If no principle tags can be found for the object, still add the object with
`class`=`place` and `type`=`house`. Use this for address nodes that have no
other function.
* `interpolation`
Add this object as an address interpolation (appears as `class`=`place` and
`type`=`houses` in the database).
* `extra`
Add tag to the list of extra tags.
* `skip`
Skip the tag completely. Useful when a custom default fallback is defined
or to define exceptions to rules.
A rule can define as many of these properties for one match as it likes. For
example, if the property is `"main,extra"` then the tag will open a new row
but also have the tag appear in the list of extra tags.
### Changing the Style of Existing Databases

View File

@@ -1,55 +0,0 @@
A Nominatim database can be converted into an SQLite database and used as
a read-only source for geocoding queries. This sections describes how to
create and use an SQLite database.
!!! danger
This feature is in an experimental state at the moment. Use at your own
risk.
## Installing prerequisites
To use a SQLite database, you need to install:
* SQLite (>= 3.30)
* Spatialite (> 5.0.0)
On Ubuntu/Debian, you can run:
sudo apt install sqlite3 libsqlite3-mod-spatialite libspatialite7
## Creating a new SQLite database
Nominatim cannot import directly into SQLite database. Instead you have to
first create a geocoding database in PostgreSQL by running a
[regular Nominatim import](../admin/Import.md).
Once this is done, the database can be converted to SQLite with
nominatim convert -o mydb.sqlite
This will create a database where all geocoding functions are available.
Depending on what functions you need, the database can be made smaller:
* `--without-reverse` omits indexes only needed for reverse geocoding
* `--without-search` omit tables and indexes used for forward search
* `--without-details` leaves out extra information only available in the
details API
## Using an SQLite database
Once you have created the database, you can use it by simply pointing the
database DSN to the SQLite file:
NOMINATIM_DATABASE_DSN=sqlite:dbname=mydb.sqlite
Please note that SQLite support is only available for the Python frontend. To
use the test server with an SQLite database, you therefore need to switch
the frontend engine:
nominatim serve --engine falcon
You need to install falcon or starlette for this, depending on which engine
you choose.
The CLI query commands and the library interface already use the new Python
frontend and therefore work right out of the box.

View File

@@ -91,7 +91,7 @@ The option is only used by the Legacy tokenizer and ignored otherwise.
| -------------- | --------------------------------------------------- |
| **Description:** | Tokenizer used for normalizing and parsing queries and names |
| **Format:** | string |
| **Default:** | icu |
| **Default:** | legacy |
| **After Changes:** | cannot be changed after import |
Sets the tokenizer type to use for the import. For more information on
@@ -148,6 +148,29 @@ Setting this option to 'yes' means that Nominatim skips reindexing of contained
objects when the area becomes too large.
#### NOMINATIM_UPDATE_FORWARD_DEPENDENCIES
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Forward geometry changes to dependet objects |
| **Format:** | bool |
| **Default:** | no |
| **Comment:** | EXPERT ONLY. Must not be enabled after import. |
The geometry of OSM ways and relations may change when a node that is part
of the object is moved around. These changes are not propagated per default.
The geometry of ways/relations is only updated the next time that the object
itself is touched. When enabling this option, then dependent objects will
be marked for update when one of its member objects changes.
Enabling this option may slow down updates significantly.
!!! warning
If you want to enable this option, it must be set already on import.
Do not enable this option on an existing database that was imported with
NOMINATIM_UPDATE_FORWARD_DEPENDENCIES=no.
Updates will become unusably slow.
#### NOMINATIM_LANGUAGES
| Summary | |
@@ -552,8 +575,6 @@ used.
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
This feature is currently undocumented and potentially broken.
@@ -566,7 +587,6 @@ This feature is currently undocumented and potentially broken.
| **Format:** | integer |
| **Default:** | 500 |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
This setting defines the threshold over which a name is no longer considered
as rare. When searching for places with rare names, only the name is used
@@ -607,88 +627,6 @@ with a single query.
Setting this parameter to 0 disables polygon output completely.
#### NOMINATIM_SEARCH_WITHIN_COUNTRIES
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Disable search for elements that are not in the country grid |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --website` |
| **Comment:** | PHP frontend only |
Enable to search elements just within countries.
When enabled, if, despite not finding a point within the static grid of countries, it
finds a geometry of a region, do not return the geometry.
Return "Unable to geocode" instead.
#### NOMINATIM_SERVE_LEGACY_URLS
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable serving via URLs with a .php suffix |
| **Format:** | boolean |
| **Default:** | yes |
| **Comment:** | Python frontend only |
When enabled, then endpoints are reachable as `/<name>` as well as `/<name>.php`.
This can be useful when you want to be backwards-compatible with previous
versions of Nominatim.
#### NOMINATIM_API_POOL_SIZE
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Number of parallel database connections per worker |
| **Format:** | number |
| **Default:** | 10 |
| **Comment:** | Python frontend only |
Sets the maximum number of database connections available for a single instance
of Nominatim. When configuring the maximum number of connections that your
PostgreSQL database can handle, you need at least
`NOMINATIM_API_POOL_SIZE` * `<number of configured workers>` connections.
For configuring the number of workers, refer to the section about
[Deploying the Python frontend](../admin/Deployment-Python.md).
#### NOMINATIM_QUERY_TIMEOUT
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Timeout for SQL queries to the database |
| **Format:** | number (seconds) |
| **Default:** | 10 |
| **Comment:** | Python frontend only |
When this timeout is set, then all SQL queries that run longer than the
specified numbers of seconds will be cancelled and the user receives a
timeout exceptions. Users of the API see a 503 HTTP error.
The timeout does ont apply when using the
[low-level DB access](../library/Low-Level-DB-Access.md)
of the library. A timeout can be manually set, if required.
#### NOMINATIM_REQUEST_TIMEOUT
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Timeout for search queries |
| **Format:** | number (seconds) |
| **Default:** | 60 |
| **Comment:** | Python frontend only |
When this timeout is set, a search query will finish sending queries
to the database after the timeout has passed and immediately return the
results gathered so far.
Note that under high load you may observe that users receive different results
than usual without seeing an error. This may cause some confusion.
### Logging Settings
#### NOMINATIM_LOG_DB
@@ -732,20 +670,3 @@ given in seconds and corresponds to the time the query took executing in PHP.
type contains the name of the endpoint used.
Can be used as the same time as NOMINATIM_LOG_DB.
#### NOMINATIM_DEBUG_SQL
| Summary | |
| -------------- | --------------------------------------------------- |
| **Description:** | Enable printing of raw SQL by SQLAlchemy |
| **Format:** | boolean |
| **Default:** | no |
| **Comment:** | **For developers only.** |
This settings enables
[SQL debugging](https://docs.sqlalchemy.org/en/20/core/engines.html#dbengine-logging)
by SQLAlchemy. This can be helpful when debugging some bugs with internal
query handling. It should only be used together with the CLI query functions.
Enabling it for server mode may have unintended consequences. Use the `debug`
parameter instead, which prints information on how the search is executed
including SQL statements.

View File

@@ -102,7 +102,7 @@ Here is an example configuration file:
``` yaml
normalization:
- ":: lower ()"
- "ß > 'ss'" # German szet is unambiguously equal to double ss
- "ß > 'ss'" # German szet is unimbigiously equal to double ss
transliteration:
- !include /etc/nominatim/icu-rules/extended-unicode-to-asccii.yaml
- ":: Ascii ()"
@@ -128,7 +128,7 @@ The configuration file contains four sections:
The normalization and transliteration sections each define a set of
ICU rules that are applied to the names.
The **normalization** rules are applied after sanitation. They should remove
The **normalisation** rules are applied after sanitation. They should remove
any information that is not relevant for search at all. Usual rules to be
applied here are: lower-casing, removing of special characters, cleanup of
spaces.
@@ -176,66 +176,52 @@ The following is a list of sanitizers that are shipped with Nominatim.
##### split-name-list
::: nominatim.tokenizer.sanitizers.split_name_list
options:
selection:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
##### strip-brace-terms
::: nominatim.tokenizer.sanitizers.strip_brace_terms
options:
selection:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
##### tag-analyzer-by-language
::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
options:
selection:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
##### clean-housenumbers
::: nominatim.tokenizer.sanitizers.clean_housenumbers
options:
selection:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
##### clean-postcodes
::: nominatim.tokenizer.sanitizers.clean_postcodes
options:
selection:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
##### clean-tiger-tags
::: nominatim.tokenizer.sanitizers.clean_tiger_tags
options:
selection:
members: False
rendering:
heading_level: 6
docstring_section_style: spacy
#### delete-tags
::: nominatim.tokenizer.sanitizers.delete_tags
options:
members: False
heading_level: 6
docstring_section_style: spacy
#### tag-japanese
::: nominatim.tokenizer.sanitizers.tag_japanese
options:
members: False
heading_level: 6
docstring_section_style: spacy
#### Token Analysis
@@ -394,7 +380,7 @@ The analyzer cannot be customized.
##### Postcode token analyzer
The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
a 'lookup' variant of the token, which produces variants with optional
a 'lookup' varaint of the token, which produces variants with optional
spaces. Use together with the clean-postcodes sanitizer.
The analyzer cannot be customized.

View File

@@ -36,19 +36,12 @@ It has the following additional requirements:
* [mypy](http://mypy-lang.org/) (plus typing information for external libs)
* [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9)
* [pytest](https://pytest.org)
* [pytest-asyncio](https://pytest-asyncio.readthedocs.io)
For testing the Python search frontend, you need to install extra dependencies
depending on your choice of webserver framework:
* [httpx](https://www.python-httpx.org/) (starlette only)
* [asgi-lifespan](https://github.com/florimondmanca/asgi-lifespan) (starlette only)
The documentation is built with mkdocs:
* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
* [mkdocstrings-python](https://mkdocstrings.github.io/python/)
* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.16
* [mkdocstrings-python-legacy](https://mkdocstrings.github.io/python-legacy/)
### Installing prerequisites on Ubuntu/Debian
@@ -62,10 +55,8 @@ To install all necessary packages run:
sudo apt install php-cgi phpunit php-codesniffer \
python3-pip python3-setuptools python3-dev
pip3 install --user behave mkdocs mkdocstrings pytest pytest-asyncio pylint \
mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \
types-ujson types-requests types-Pygments typing-extensions\
httpx asgi-lifespan
pip3 install --user behave mkdocs mkdocstrings pytest pylint \
mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil
```
The `mkdocs` executable will be located in `.local/bin`. You may have to add

View File

@@ -53,7 +53,8 @@ the function.
### Sanitizer configuration
::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
options:
rendering:
show_source: no
heading_level: 6
### The main filter function of the sanitizer
@@ -61,10 +62,12 @@ the function.
The filter function receives a single object of type `ProcessInfo`
which has with three members:
* `place: PlaceInfo`: read-only information about the place being processed.
* `place`: read-only information about the place being processed.
See PlaceInfo below.
* `names: List[PlaceName]`: The current list of names for the place.
* `address: List[PlaceName]`: The current list of address names for the place.
* `names`: The current list of names for the place. Each name is a
PlaceName object.
* `address`: The current list of address names for the place. Each name
is a PlaceName object.
While the `place` member is provided for information only, the `names` and
`address` lists are meant to be manipulated by the sanitizer. It may add and
@@ -74,14 +77,16 @@ adding extra attributes) or completely replace the list with a different one.
#### PlaceInfo - information about the place
::: nominatim.data.place_info.PlaceInfo
options:
rendering:
show_source: no
heading_level: 6
#### PlaceName - extended naming information
::: nominatim.data.place_name.PlaceName
options:
rendering:
show_source: no
heading_level: 6
@@ -129,7 +134,7 @@ sanitizers:
!!! warning
This example is just a simplified show case on how to create a sanitizer.
It is not really read for real-world use: while the sanitizer would
correctly transform `West 5th Street` into `5th Street`. it would also
correcly transform `West 5th Street` into `5th Street`. it would also
shorten a simple `North Street` to `Street`.
For more sanitizer examples, have a look at the sanitizers provided by Nominatim.
@@ -140,12 +145,14 @@ They can be found in the directory
## Custom token analysis module
::: nominatim.tokenizer.token_analysis.base.AnalysisModule
options:
rendering:
show_source: no
heading_level: 6
::: nominatim.tokenizer.token_analysis.base.Analyzer
options:
rendering:
show_source: no
heading_level: 6
### Example: Creating acronym variants for long names

View File

@@ -10,7 +10,7 @@ There are two kind of tests in this test suite. There are functional tests
which test the API interface using a BDD test framework and there are unit
tests for specific PHP functions.
This test directory is structured as follows:
This test directory is sturctured as follows:
```
-+- bdd Functional API tests
@@ -84,8 +84,6 @@ The tests can be configured with a set of environment variables (`behave -D key=
* `TEST_DB` - name of test database (db tests)
* `API_TEST_DB` - name of the database containing the API test data (api tests)
* `API_TEST_FILE` - OSM file to be imported into the API test database (api tests)
* `API_ENGINE` - webframe to use for running search queries, same values as
`nominatim serve --engine` parameter
* `DB_HOST` - (optional) hostname of database host
* `DB_PORT` - (optional) port of database on host
* `DB_USER` - (optional) username of database login
@@ -122,7 +120,7 @@ and compromises the following data:
API tests should only be testing the functionality of the website PHP code.
Most tests should be formulated as BDD DB creation tests (see below) instead.
#### Code Coverage (PHP engine only)
#### Code Coverage
The API tests also support code coverage tests. You need to install
[PHP_CodeCoverage](https://github.com/sebastianbergmann/php-code-coverage).
@@ -155,3 +153,7 @@ needs superuser rights for postgres.
These tests check that data is imported correctly into the place table. They
use the same template database as the DB Creation tests, so the same remarks apply.
Note that most testing of the gazetteer output of osm2pgsql is done in the tests
of osm2pgsql itself. The BDD tests are just there to ensure compatibility of
the osm2pgsql and Nominatim code.

View File

@@ -134,14 +134,14 @@ All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
and implement the abstract functions defined there.
::: nominatim.tokenizer.base.AbstractTokenizer
options:
heading_level: 6
rendering:
heading_level: 4
### Python Analyzer Class
::: nominatim.tokenizer.base.AbstractAnalyzer
options:
heading_level: 6
rendering:
heading_level: 4
### PL/pgSQL Functions
@@ -189,28 +189,6 @@ a house number token text. If a place has multiple house numbers they must
be listed with a semicolon as delimiter. Must be NULL when the place has no
house numbers.
```sql
FUNCTION token_is_street_address(info JSONB) RETURNS BOOLEAN
```
Return true if this is an object that should be parented against a street.
Only relevant for objects with address rank 30.
```sql
FUNCTION token_has_addr_street(info JSONB) RETURNS BOOLEAN
```
Return true if there are street names to match against for finding the
parent of the object.
```sql
FUNCTION token_has_addr_place(info JSONB) RETURNS BOOLEAN
```
Return true if there are place names to match against for finding the
parent of the object.
```sql
FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[]) RETURNS BOOLEAN
```

View File

@@ -18,7 +18,7 @@ elseif (has 'addr:place'?) then (yes)
**with same name**;
kill
else (no)
:add addr:place to address;
:add addr:place to adress;
:**Use closest place**\n**rank 16 to 25**;
kill
endif

File diff suppressed because one or more lines are too long

Before

Width:  |  Height:  |  Size: 9.8 KiB

After

Width:  |  Height:  |  Size: 9.8 KiB

View File

@@ -1,11 +1,3 @@
.toctree-l3 {
display: none!important
}
.wy-nav-content {
max-width: 900px!important
}
table {
margin-bottom: 12pt
}
@@ -20,17 +12,9 @@ th {
.doc-object h6 {
margin-bottom: 0.8em;
font-size: 130%;
font-size: 120%;
}
.doc-object {
margin-bottom: 1.3em;
}
.doc-children .doc-contents {
margin-left: 3em;
}
.md-footer__inner {
display: none;
}

View File

@@ -1,15 +1,10 @@
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and
address and to generate synthetic addresses of OSM points (reverse geocoding).
It has also limited capability to search features by their type
(pubs, hotels, churches, etc).
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).
This guide comes in five parts:
This guide comes in four parts:
* __[API reference](api/Overview.md)__ for users of Nominatim
* __[Administration Guide](admin/Installation.md)__ for those who want
to install their own Nominatim server
* __[Customization Guide](customize/Overview.md)__ for those who want to
adapt their own installation to their special requirements
* __[Library Guide](library/Getting-Started.md)__ for Python developers who
want to use Nominatim as a library in their project
* __[Developer's Guide](develop/overview.md)__ for developers of the software

View File

@@ -1,31 +0,0 @@
# Configuration
When using Nominatim through the library, it can be configured in exactly
the same way as when running as a service. This means that you should have
created a [project directory](../admin/Import.md#creating-the-project-directory)
which contains all files belonging to the Nominatim instance. It can also contain
an `.env` file with configuration options. Setting configuration parameters
via environment variables works as well.
Configuration options are resolved in the following order:
* from the OS environment (or the dictionary given in `environ`,
(see NominatimAPI.md#nominatim.api.core.NominatimAPI.__init__)
* from the .env file in the project directory of the installation
* from the default installation in the configuration directory
For more information on configuration via dotenv and a list of possible
configuration parameters, see the [Configuration page](../customize/Settings.md).
## `Configuration` class
::: nominatim.config.Configuration
options:
members:
- get_bool
- get_int
- get_str_list
- get_path
heading_level: 6
show_signature_annotations: True

View File

@@ -1,248 +0,0 @@
# Getting Started
The Nominatim search frontend can directly be used as a Python library in
scripts and applications. When you have imported your own Nominatim database,
then it is no longer necessary to run a full web service for it and access
the database through http requests. There are
also less constraints on the kinds of data that can be accessed. The library
allows to get access to more detailed information about the objects saved
in the database.
!!! danger
The library interface is currently in an experimental stage. There might
be some smaller adjustments to the public interface until the next version.
The library also misses a proper installation routine, so some manipulation
of the PYTHONPATH is required. At the moment, use is only recommended for
developers with some experience in Python.
## Installation
To use the Nominatim library, you need access to a local Nominatim database.
Follow the [installation](../admin/Installation.md) and
[import](../admin/Import.md) instructions to set up your database.
It is not yet possible to install it in the usual way via pip or inside a
virtualenv. To get access to the library you need to set an appropriate
`PYTHONPATH`. With the default installation, the python library can be found
under `/usr/local/share/nominatim/lib-python`. If you have installed
Nominatim under a different prefix, adapt the `/usr/local/` part accordingly.
You can also point the `PYTHONPATH` to the Nominatim source code.
### A simple search example
To query the Nominatim database you need to first set up a connection. This
is done by creating an Nominatim API object. This object exposes all the
search functions of Nominatim that are also known from its web API.
This code snippet implements a simple search for the town of 'Brugge':
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
async def search(query):
api = napi.NominatimAPIAsync(Path('.'))
return await api.search(query)
results = asyncio.run(search('Brugge'))
if not results:
print('Cannot find Brugge')
else:
print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
api = napi.NominatimAPI(Path('.'))
results = api.search('Brugge')
if not results:
print('Cannot find Brugge')
else:
print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
```
The Nominatim library is designed around
[asyncio](https://docs.python.org/3/library/asyncio.html). `NominatimAPIAsync`
provides you with an interface of coroutines.
If you have many requests to make, coroutines can speed up your applications
significantly.
For smaller scripts there is also a synchronous wrapper around the API. By
using `NominatimAPI`, you get exactly the same interface using classic functions.
The examples in this chapter will always show-case both
implementations. The documentation itself will usually refer only to
'Nominatim API class' when both flavours are meant. If a functionality is
available only for the synchronous or asynchronous version, this will be
explicitly mentioned.
### Defining which database to use
The [Configuration](../admin/Import.md#configuration-setup-in-env)
section explains how Nominatim is configured using the
[dotenv](https://github.com/theskumar/python-dotenv) library.
The same configuration mechanism is used with the
Nominatim API library. You should therefore be sure you are familiar with
the section.
The constructor of the 'Nominatim API class' takes one mandatory parameter:
the path to the [project directory](../admin/Import.md#creating-the-project-directory).
You should have set up this directory as part of the Nominatim import.
Any configuration found in the `.env` file in this directory will automatically
used.
Yo may also configure Nominatim be setting environment variables.
Normally, Nominatim will check the operating system environment. This can be
overwritten by giving the constructor a dictionary of configuration parameters.
Let us look up 'Brugge' in the special database named 'belgium' instead of the
standard 'nominatim' database:
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
config_params = {
'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
}
async def search(query):
api = napi.NominatimAPIAsync(Path('.'), environ=config_params)
return await api.search(query)
results = asyncio.run(search('Brugge'))
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
config_params = {
'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
}
api = napi.NominatimAPI(Path('.'), environ=config_params)
results = api.search('Brugge')
```
### Presenting results to humans
All search functions return the raw results from the database. There is no
full human-readable label. To create such a label, you need two things:
* the address details of the place
* adapt the result to the language you wish to use for display
Again searching for 'Brugge', this time with a nicely formatted result:
!!! example
=== "NominatimAPIAsync"
``` python
from pathlib import Path
import asyncio
import nominatim.api as napi
async def search(query):
api = napi.NominatimAPIAsync(Path('.'))
return await api.search(query, address_details=True)
results = asyncio.run(search('Brugge'))
locale = napi.Locales(['fr', 'en'])
for i, result in enumerate(results):
address_parts = result.address_rows.localize(locale)
print(f"{i + 1}. {', '.join(address_parts)}")
```
=== "NominatimAPI"
``` python
from pathlib import Path
import nominatim.api as napi
api = napi.NominatimAPI(Path('.'))
results = api.search('Brugge', address_details=True)
locale = napi.Locales(['fr', 'en'])
for i, result in enumerate(results):
address_parts = result.address_rows.localize(locale)
print(f"{i + 1}. {', '.join(address_parts)}")
```
To request information about the address of a result, add the optional
parameter 'address_details' to your search:
``` python
>>> results = api.search('Brugge', address_details=True)
```
An additional field `address_rows` will set in results that are returned.
It contains a list of all places that make up the address of the place. For
simplicity, this includes name and house number of the place itself. With
the names in this list it is possible to create a human-readable description
of the result. To do that, you first need to decide in which language the
results should be presented. As with the names in the result itself, the
places in `address_rows` contain all possible name translation for each row.
The library has a helper class `Locale` which helps extracting a name of a
place in the preferred language. It takes a single parameter with a list
of language codes in the order of preference. So
``` python
locale = napi.Locale(['fr', 'en'])
```
creates a helper class that returns the name preferably in French. If that is
not possible, it tries English and eventually falls back to the default `name`
or `ref`.
The `Locale` object can be applied to a name dictionary to return the best-matching
name out of it:
``` python
>>> print(locale.display_name(results[0].names))
'Brugges'
```
The `address_row` field has a helper function to apply the function to all
its members and save the result in the `local_name` field. It also returns
all the localized names as a convenient simple list. This list can be used
to create a human-readable output:
``` python
>>> address_parts = results[0].address_rows.localize(locale)
>>> print(', '.join(address_parts))
Bruges, Flandre-Occidentale, Flandre, Belgique
```
This is a fairly simple way to create a human-readable description. The
place information in `address_rows` contains further information about each
place. For example, which OSM `adlin_level` was used, what category the place
belongs to or what rank Nominatim has assigned. Use this to adapt the output
to local address formats.
For more information on address rows, see
[detailed address description](Result-Handling.md#detailed-address-description).

View File

@@ -1,62 +0,0 @@
# Input Parameter Types
This page describes in more detail some of the input parameter types used
in the query functions of the API object.
## Place identification
The [details](NominatimAPI.md#nominatim.api.core.NominatimAPI.details) and
[lookup](NominatimAPI.md#nominatim.api.core.NominatimAPI.lookup) functions
require references to places in the database. Below the possible
types for place identification are listed. All types are dataclasses.
### PlaceID
::: nominatim.api.PlaceID
options:
heading_level: 6
### OsmID
::: nominatim.api.OsmID
options:
heading_level: 6
## Geometry types
::: nominatim.api.GeometryFormat
options:
heading_level: 6
members_order: source
## Geometry input
### Point
::: nominatim.api.Point
options:
heading_level: 6
show_signature_annotations: True
### Bbox
::: nominatim.api.Bbox
options:
heading_level: 6
show_signature_annotations: True
members_order: source
group_by_category: False
## Layers
Layers allow to restrict the search result to thematic groups. This is
orthogonal to restriction by address ranks, which groups places by their
geographic extent.
::: nominatim.api.DataLayer
options:
heading_level: 6
members_order: source

View File

@@ -1,56 +0,0 @@
# Low-level connections
The `NominatimAPIAsync` class allows to directly access the underlying
database connection to explore the raw data. Nominatim uses
[SQLAlchemy](https://docs.sqlalchemy.org/) for building queries. Please
refer to the documentation of the library to understand how to write SQL.
To get access to a search connection, use the `begin()` function of your
API object. This returns a `SearchConnection` object described below
wrapped in a context manager. Its
`t` property has definitions for all Nominatim search tables. For an
overview of available tables, refer to the
[Development Layout](../develop/Database-Layout.md) in in the development
chapter. Note that only tables that are needed for search are accessible
as SQLAlchemy tables.
!!! warning
The database layout is not part of the API definition and may change
without notice. If you play with the low-level access functions, you
need to be prepared for such changes.
Here is a simple example, which prints how many places are available in
the placex table:
```
import asyncio
from pathlib import Path
import sqlalchemy as sa
from nominatim.api import NominatimAPIAsync
async def print_table_size():
api = NominatimAPIAsync(Path('.'))
async with api.begin() as conn:
cnt = await conn.scalar(sa.select(sa.func.count()).select_from(conn.t.placex))
print(f'placex table has {cnt} rows.')
asyncio.run(print_table_size())
```
!!! warning
Low-level connections may only be used to read data from the database.
Do not use it to add or modify data or you might break Nominatim's
normal functions.
## SearchConnection class
::: nominatim.api.SearchConnection
options:
members:
- scalar
- execute
- get_class_table
- get_db_property
- get_property
heading_level: 6

View File

@@ -1,36 +0,0 @@
# The Nominatim API classes
The API classes are the core object of the search library. Always instantiate
one of these classes first. The API classes are **not threadsafe**. You need
to instantiate a separate instance for each thread.
### NominatimAPI
::: nominatim.api.NominatimAPI
options:
members:
- __init__
- config
- close
- status
- details
- lookup
- reverse
- search
- search_address
- search_category
heading_level: 6
group_by_category: False
### NominatimAPIAsync
::: nominatim.api.NominatimAPIAsync
options:
members:
- __init__
- setup_database
- close
- begin
heading_level: 6
group_by_category: False

View File

@@ -1,58 +0,0 @@
# Result handling
The search functions of the Nominatim API always return a result object
with the raw information about the place that is available in the
database. This section discusses data types used in the results and utility
functions that allow further processing of the results.
## Result fields
### Sources
Nominatim takes the result data from multiple sources. The `source_table` field
in the result describes, from which source the result was retrieved.
::: nominatim.api.SourceTable
options:
heading_level: 6
members_order: source
### Detailed address description
When the `address_details` parameter is set, then functions return not
only information about the result place but also about the place that
make up the address. This information is almost always required when you
want to present the user with a human-readable description of the result.
See also [Localization](#localization) below.
The address details are available in the `address_rows` field as a ordered
list of `AddressLine` objects with the country information last. The list also
contains the result place itself and some artificial entries, for example,
for the house number or the country code. This makes processing and creating
a full address easier.
::: nominatim.api.AddressLine
options:
heading_level: 6
members_order: source
### Detailed search terms
The `details` function can return detailed information about which search terms
may be used to find a place, when the `keywords` parameter is set. Search
terms are split into terms for the name of the place and search terms for
its address.
::: nominatim.api.WordInfo
options:
heading_level: 6
## Localization
Results are always returned with the full list of available names.
### Locale
::: nominatim.api.Locales
options:
heading_level: 6

View File

@@ -1,9 +1,5 @@
site_name: Nominatim Manual
theme:
name: material
features:
- navigation.tabs
copyright: Copyright &copy; Nominatim developer community
site_name: Nominatim 4.2.4
theme: readthedocs
docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
site_url: https://nominatim.org
repo_url: https://github.com/openstreetmap/Nominatim
@@ -22,8 +18,7 @@ nav:
- 'Basic Installation': 'admin/Installation.md'
- 'Import' : 'admin/Import.md'
- 'Update' : 'admin/Update.md'
- 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
- 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
- 'Deploy' : 'admin/Deployment.md'
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
- 'Maintenance' : 'admin/Maintenance.md'
@@ -40,14 +35,6 @@ nav:
- 'Special Phrases': 'customize/Special-Phrases.md'
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
- 'External data: Postcodes': 'customize/Postcodes.md'
- 'Conversion to SQLite': 'customize/SQLite.md'
- 'Library Guide':
- 'Getting Started': 'library/Getting-Started.md'
- 'Nominatim API class': 'library/NominatimAPI.md'
- 'Configuration': 'library/Configuration.md'
- 'Input Parameter Types': 'library/Input-Parameter-Types.md'
- 'Result Handling': 'library/Result-Handling.md'
- 'Low-level DB Access': 'library/Low-Level-DB-Access.md'
- 'Developers Guide':
- 'Architecture Overview' : 'develop/overview.md'
- 'Database Layout' : 'develop/Database-Layout.md'
@@ -58,14 +45,12 @@ nav:
- 'Testing' : 'develop/Testing.md'
- 'External Data Sources': 'develop/data-sources.md'
- 'Appendix':
- 'Installation on Ubuntu 18' : 'appendix/Install-on-Ubuntu-18.md'
- 'Installation on Ubuntu 20' : 'appendix/Install-on-Ubuntu-20.md'
- 'Installation on Ubuntu 22' : 'appendix/Install-on-Ubuntu-22.md'
markdown_extensions:
- codehilite
- admonition
- pymdownx.superfences
- pymdownx.tabbed:
alternate_style: true
- def_list
- toc:
permalink:
@@ -74,8 +59,7 @@ plugins:
- search
- mkdocstrings:
handlers:
python:
paths: ["${PROJECT_SOURCE_DIR}"]
options:
show_source: False
show_bases: False
python-legacy:
rendering:
show_source: false
show_signature_annotations: false

View File

@@ -38,25 +38,23 @@ class DB
// https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
try {
$this->connection = new \PDO($this->sDSN, null, null, $aConnOptions);
$conn = new \PDO($this->sDSN, null, null, $aConnOptions);
} catch (\PDOException $e) {
$sMsg = 'Failed to establish database connection:' . $e->getMessage();
throw new \Nominatim\DatabaseError($sMsg, 500, null, $e->getMessage());
}
$this->connection->exec("SET DateStyle TO 'sql,european'");
$this->connection->exec("SET client_encoding TO 'utf-8'");
$conn->exec("SET DateStyle TO 'sql,european'");
$conn->exec("SET client_encoding TO 'utf-8'");
// Disable JIT and parallel workers. They interfere badly with search SQL.
$this->connection->exec('SET max_parallel_workers_per_gather TO 0');
if ($this->getPostgresVersion() >= 11) {
$this->connection->exec('SET jit_above_cost TO -1');
}
$conn->exec("UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost'");
$conn->exec("UPDATE pg_settings SET setting = 0 WHERE name = 'max_parallel_workers_per_gather'");
$iMaxExecution = ini_get('max_execution_time');
if ($iMaxExecution > 0) {
$this->connection->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
$conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
}
$this->connection = $conn;
return true;
}

View File

@@ -135,7 +135,7 @@ class Debug
public static function printSQL($sSQL)
{
echo '<p><tt><b>'.date('c').'</b> <font color="#aaa">'.htmlspecialchars($sSQL, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401).'</font></tt></p>'."\n";
echo '<p><tt><font color="#aaa">'.$sSQL.'</font></tt></p>'."\n";
}
private static function outputVar($mVar, $sPreNL)
@@ -178,12 +178,11 @@ class Debug
}
if (is_string($mVar)) {
$sOut = "'$mVar'";
} else {
$sOut = (string)$mVar;
echo "'$mVar'";
return strlen($mVar) + 2;
}
echo htmlspecialchars($sOut, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401);
return strlen($sOut);
echo (string)$mVar;
return strlen((string)$mVar);
}
}

View File

@@ -874,7 +874,7 @@ class Geocode
$iCountWords = 0;
$sAddress = $aResult['langaddress'];
foreach ($aRecheckWords as $i => $sWord) {
if (grapheme_stripos($sAddress, $sWord)!==false) {
if (stripos($sAddress, $sWord)!==false) {
$iCountWords++;
if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) {
$iCountWords += 0.1;

View File

@@ -86,7 +86,7 @@ class PlaceLookup
($this->bIncludePolygonAsSVG ? 1 : 0);
if ($iWantedTypes > CONST_PolygonOutput_MaximumTypes) {
if (CONST_PolygonOutput_MaximumTypes) {
userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polygon output option');
userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polgyon output option');
} else {
userError('Polygon output is disabled');
}
@@ -524,7 +524,12 @@ class PlaceLookup
// Get the bounding box and outline polygon
$sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
if ($fLonReverse != null && $fLatReverse != null) {
$sSQL .= ' ST_Y(closest_point) as centrelat,';
$sSQL .= ' ST_X(closest_point) as centrelon,';
} else {
$sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
}
$sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
$sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
if ($this->bIncludePolygonAsGeoJSON) {
@@ -539,21 +544,19 @@ class PlaceLookup
if ($this->bIncludePolygonAsText) {
$sSQL .= ',ST_AsText(geometry) as astext';
}
$sSQL .= ' FROM (SELECT place_id';
if ($fLonReverse != null && $fLatReverse != null) {
$sSQL .= ',CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
$sSQL .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
$sSQL .=' ELSE centroid END AS centroid';
$sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
$sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
$sFrom .=' ELSE centroid END AS closest_point';
$sFrom .= ' from placex where place_id = '.$iPlaceID.') as plx';
} else {
$sSQL .= ',centroid';
$sFrom = ' from placex where place_id = '.$iPlaceID;
}
if ($this->fPolygonSimplificationThreshold > 0) {
$sSQL .= ',ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry';
$sSQL .= ' from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'.$sFrom.') as plx';
} else {
$sSQL .= ',geometry';
$sSQL .= $sFrom;
}
$sSQL .= ' FROM placex where place_id = '.$iPlaceID.') as plx';
$aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');

View File

@@ -40,10 +40,10 @@ class ReverseGeocode
9 => 12,
10 => 17, // City
11 => 17,
12 => 18, // Town
13 => 19, // Village
14 => 22, // Neighbourhood
15 => 25, // Locality
12 => 18, // Town / Village
13 => 18,
14 => 22, // Suburb
15 => 22,
16 => 26, // major street
17 => 27, // minor street
18 => 30, // or >, Building
@@ -85,11 +85,6 @@ class ReverseGeocode
protected function lookupLargeArea($sPointSQL, $iMaxRank)
{
$sCountryCode = $this->getCountryCode($sPointSQL);
if (CONST_Search_WithinCountries and $sCountryCode == null) {
return null;
}
if ($iMaxRank > 4) {
$aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
if ($aPlace) {
@@ -99,12 +94,12 @@ class ReverseGeocode
// If no polygon which contains the searchpoint is found,
// searches in the country_osm_grid table for a polygon.
return $this->lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode);
return $this->lookupInCountry($sPointSQL, $iMaxRank);
}
protected function getCountryCode($sPointSQL)
protected function lookupInCountry($sPointSQL, $iMaxRank)
{
Debug::newFunction('getCountryCode');
Debug::newFunction('lookupInCountry');
// searches for polygon in table country_osm_grid which contains the searchpoint
// and searches for the nearest place node to the searchpoint in this polygon
$sSQL = 'SELECT country_code FROM country_osm_grid';
@@ -116,12 +111,8 @@ class ReverseGeocode
null,
'Could not determine country polygon containing the point.'
);
return $sCountryCode;
}
Debug::printVar('Country code', $sCountryCode);
protected function lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode)
{
Debug::newFunction('lookupInCountry');
if ($sCountryCode) {
if ($iMaxRank > 4) {
// look for place nodes with the given country code
@@ -131,13 +122,12 @@ class ReverseGeocode
$sSQL .= ' FROM placex';
$sSQL .= ' WHERE osm_type = \'N\'';
$sSQL .= ' AND country_code = \''.$sCountryCode.'\'';
$sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
$sSQL .= ' AND rank_search < 26 '; // needed to select right index
$sSQL .= ' AND rank_search between 5 and ' .min(25, $iMaxRank);
$sSQL .= ' AND type != \'postcode\'';
$sSQL .= ' AND class = \'place\' AND type != \'postcode\'';
$sSQL .= ' AND name IS NOT NULL ';
$sSQL .= ' and indexed_status = 0 and linked_place_id is null';
$sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
$sSQL .= ') as a ';
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, 1.8)) p ';
$sSQL .= 'WHERE distance <= reverse_place_diameter(rank_search)';
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' LIMIT 1';
@@ -226,18 +216,23 @@ class ReverseGeocode
$sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
$sSQL .= ' FROM placex';
$sSQL .= ' WHERE osm_type = \'N\'';
// using rank_search because of a better differentiation
// for place nodes at rank_address 16
$sSQL .= ' AND rank_search > '.$iRankSearch;
$sSQL .= ' AND rank_search <= '.$iMaxRank;
$sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
$sSQL .= ' AND rank_search < 26 '; // needed to select right index
$sSQL .= ' AND rank_address > 0';
$sSQL .= ' AND class = \'place\'';
$sSQL .= ' AND type != \'postcode\'';
$sSQL .= ' AND name IS NOT NULL ';
$sSQL .= ' AND indexed_status = 0 AND linked_place_id is null';
$sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' limit 100) as a';
$sSQL .= ' WHERE ST_Contains((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
$sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, reverse_place_diameter('.$iRankSearch.'::smallint))';
$sSQL .= ' ORDER BY distance ASC,';
$sSQL .= ' rank_address DESC';
$sSQL .= ' limit 500) as a';
$sSQL .= ' WHERE ST_CONTAINS((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
$sSQL .= ' AND distance <= reverse_place_diameter(rank_search)';
$sSQL .= ' ORDER BY rank_search DESC, distance ASC';
$sSQL .= ' ORDER BY distance ASC, rank_search DESC';
$sSQL .= ' LIMIT 1';
Debug::printSQL($sSQL);

190
lib-php/admin/export.php Normal file
View File

@@ -0,0 +1,190 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
@define('CONST_LibDir', dirname(dirname(__FILE__)));
// Script to extract structured city and street data
// from a running nominatim instance as CSV data
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/ParameterParser.php');
ini_set('memory_limit', '800M');
$aCMDOptions = array(
'Export addresses as CSV file from a Nominatim database',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
"\nAddress ranks: continent, country, state, county, city, suburb, street, path",
'Additional output types: postcode, placeid (placeid for each object)',
"\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
'can be merged into one column by simply using a comma-separated list.',
"\nDefault output-type: street",
'Default output format: street;suburb;city;county;state;country'
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
$aRankmap = array(
'continent' => 1,
'country' => 4,
'state' => 8,
'county' => 12,
'city' => 16,
'suburb' => 20,
'street' => 26,
'path' => 27
);
$oDB = new Nominatim\DB();
$oDB->connect();
if (isset($aCMDResult['output-type'])) {
if (!isset($aRankmap[$aCMDResult['output-type']])) {
fail('unknown output-type: '.$aCMDResult['output-type']);
}
$iOutputRank = $aRankmap[$aCMDResult['output-type']];
} else {
$iOutputRank = $aRankmap['street'];
}
// Preferred language
$oParams = new Nominatim\ParameterParser();
if (!isset($aCMDResult['language'])) {
$aCMDResult['language'] = 'xx';
}
$aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
// output formatting: build up a lookup table that maps address ranks to columns
$aColumnMapping = array();
$iNumCol = 0;
if (!isset($aCMDResult['output-format'])) {
$aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
}
foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
$bHasData = false;
foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
if ($sRank == 'postcode' || $sRank == 'placeid') {
$aColumnMapping[$sRank] = $iNumCol;
$bHasData = true;
} elseif (isset($aRankmap[$sRank])) {
$iRank = $aRankmap[$sRank];
if ($iRank <= $iOutputRank) {
$aColumnMapping[(string)$iRank] = $iNumCol;
$bHasData = true;
}
}
}
if ($bHasData) {
$iNumCol++;
}
}
// build the query for objects
$sPlacexSQL = 'select min(place_id) as place_id, ';
$sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
$sPlacexSQL .= 'country_code as cc, ';
$sPlacexSQL .= 'postcode, ';
// get the address places excluding postcodes
$sPlacexSQL .= 'array(select address_place_id from place_addressline a';
$sPlacexSQL .= ' where a.place_id = placex.place_id and isaddress';
$sPlacexSQL .= ' and address_place_id != placex.place_id';
$sPlacexSQL .= ' and not cached_rank_address in (5,11)';
$sPlacexSQL .= ' and cached_rank_address > 2 order by cached_rank_address)';
$sPlacexSQL .= ' as address';
$sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
$sPlacexSQL .= ' and rank_address = '.$iOutputRank;
if (isset($aCMDResult['restrict-to-country'])) {
$sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']);
}
// restriction to parent place id
$sParentId = false;
$sOsmType = false;
if (isset($aCMDResult['restrict-to-osm-node'])) {
$sOsmType = 'N';
$sOsmId = $aCMDResult['restrict-to-osm-node'];
}
if (isset($aCMDResult['restrict-to-osm-way'])) {
$sOsmType = 'W';
$sOsmId = $aCMDResult['restrict-to-osm-way'];
}
if (isset($aCMDResult['restrict-to-osm-relation'])) {
$sOsmType = 'R';
$sOsmId = $aCMDResult['restrict-to-osm-relation'];
}
if ($sOsmType) {
$sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
$sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
if (!$sParentId) {
fail('Could not find place '.$sOsmType.' '.$sOsmId);
}
}
if ($sParentId) {
$sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
}
$sPlacexSQL .= " group by name->'name', address, postcode, country_code, placex.place_id";
// Iterate over placeids
// to get further hierarchical information
//var_dump($sPlacexSQL);
$oResults = $oDB->getQueryStatement($sPlacexSQL);
$fOutstream = fopen('php://output', 'w');
while ($aRow = $oResults->fetch()) {
$iPlaceID = $aRow['place_id'];
$sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
$sSQL .= ' WHERE isaddress';
$sSQL .= ' order by rank_address desc,isaddress desc';
$aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID));
$aOutput = array_fill(0, $iNumCol, '');
// output address parts
foreach ($aAddressLines as $aAddress) {
if (isset($aColumnMapping[$aAddress['rank_address']])) {
$aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
}
}
// output postcode
if (isset($aColumnMapping['postcode'])) {
if ($aCMDResult['output-all-postcodes']) {
$sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
$sSQL .= 'on px.place_id = pa.address_place_id ';
$sSQL .= 'where pa.cached_rank_address in (5,11) ';
$sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) ';
$sSQL .= 'group by postcode order by count(*) desc limit 1';
$sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1)));
$aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
} else {
$aOutput[$aColumnMapping['postcode']] = $aRow['postcode'];
}
}
if (isset($aColumnMapping['placeid'])) {
$aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
}
fputcsv($fOutstream, $aOutput);
}
fclose($fOutstream);

114
lib-php/admin/warm.php Normal file
View File

@@ -0,0 +1,114 @@
<?php
/**
* SPDX-License-Identifier: GPL-2.0-only
*
* This file is part of Nominatim. (https://nominatim.org)
*
* Copyright (C) 2022 by the Nominatim developer community.
* For a full list of authors see the git log.
*/
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/log.php');
require_once(CONST_LibDir.'/PlaceLookup.php');
require_once(CONST_LibDir.'/ReverseGeocode.php');
ini_set('memory_limit', '800M');
$aCMDOptions = array(
'Tools to warm nominatim db',
array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'),
array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'),
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
@define('CONST_Log_DB', getSettingBool('LOG_DB'));
@define('CONST_Log_File', getSetting('LOG_FILE', false));
@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
require_once(CONST_LibDir.'/Geocode.php');
$oDB = new Nominatim\DB();
$oDB->connect();
$bVerbose = $aResult['verbose'];
function print_results($aResults, $bVerbose)
{
if ($bVerbose) {
if ($aResults && count($aResults)) {
echo $aResults[0]['langaddress']."\n";
} else {
echo "<not found>\n";
}
} else {
echo '.';
}
}
if (!$aResult['search-only']) {
$oReverseGeocode = new Nominatim\ReverseGeocode($oDB);
$oReverseGeocode->setZoom(20);
$oPlaceLookup = new Nominatim\PlaceLookup($oDB);
$oPlaceLookup->setIncludeAddressDetails(true);
$oPlaceLookup->setLanguagePreference(array('en'));
echo 'Warm reverse: ';
if ($bVerbose) {
echo "\n";
}
for ($i = 0; $i < 1000; $i++) {
$fLat = rand(-9000, 9000) / 100;
$fLon = rand(-18000, 18000) / 100;
if ($bVerbose) {
echo "$fLat, $fLon = ";
}
$oLookup = $oReverseGeocode->lookup($fLat, $fLon);
$aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
print_results($aSearchResults, $bVerbose);
}
echo "\n";
}
if (!$aResult['reverse-only']) {
$oGeocode = new Nominatim\Geocode($oDB);
echo 'Warm search: ';
if ($bVerbose) {
echo "\n";
}
$oTokenizer = new \Nominatim\Tokenizer($oDB);
$aWords = $oTokenizer->mostFrequentWords(1000);
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
foreach ($aWords as $sWord) {
if ($bVerbose) {
echo "$sWord = ";
}
$oGeocode->setLanguagePreference(array('en'));
$oGeocode->setQuery($sWord);
$aSearchResults = $oGeocode->lookup();
print_results($aSearchResults, $bVerbose);
}
echo "\n";
}

View File

@@ -36,9 +36,6 @@ if (empty($aPlace)) {
$aFilteredPlaces['properties']['geocoding']['osm_id'] = $aPlace['osm_id'];
}
$aFilteredPlaces['properties']['geocoding']['osm_key'] = $aPlace['class'];
$aFilteredPlaces['properties']['geocoding']['osm_value'] = $aPlace['type'];
$aFilteredPlaces['properties']['geocoding']['type'] = addressRankToGeocodeJsonType($aPlace['rank_address']);
$aFilteredPlaces['properties']['geocoding']['accuracy'] = (int) $fDistance;

View File

@@ -23,7 +23,7 @@ $aLangPrefOrder = $oParams->getPreferredLanguages();
$sPlaceId = $oParams->getString('place_id');
$sOsmType = $oParams->getSet('osmtype', array('N', 'W', 'R'));
$iOsmId = $oParams->getInt('osmid', 0);
$iOsmId = $oParams->getInt('osmid', -1);
$sClass = $oParams->getString('class');
$bIncludeKeywords = $oParams->getBool('keywords', false);
@@ -38,7 +38,7 @@ $oDB->connect();
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
if ($sOsmType && $iOsmId !== 0) {
if ($sOsmType && $iOsmId > 0) {
$sSQL = 'SELECT place_id FROM placex WHERE osm_type = :type AND osm_id = :id';
$aSQLParams = array(':type' => $sOsmType, ':id' => $iOsmId);
// osm_type and osm_id are not unique enough

View File

@@ -187,7 +187,6 @@ BEGIN
-- --- Return the record for the base entry.
current_rank_address := 1000;
FOR location IN
SELECT placex.place_id, osm_type, osm_id, name,
coalesce(extratags->'linked_place', extratags->'place') as place_type,
@@ -262,7 +261,7 @@ BEGIN
-- If the place had a postcode assigned, take this one only
-- into consideration when it is an area and the place does not have
-- a postcode itself.
IF location.fromarea AND location_isaddress
IF location.fromarea AND location.isaddress
AND (place.address is null or not place.address ? 'postcode')
THEN
place.postcode := null; -- remove the less exact postcode

View File

@@ -62,6 +62,10 @@ BEGIN
WHILE langs[i] IS NOT NULL LOOP
wiki_article := extratags->(case when langs[i] in ('english','country') THEN 'wikipedia' ELSE 'wikipedia:'||langs[i] END);
IF wiki_article is not null THEN
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/wiki/',E'\\2:');
wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3}).wikipedia.org/w/index.php\\?title=',E'\\2:');
wiki_article := regexp_replace(wiki_article,E'^(.*?)/([a-z]{2,3})/wiki/',E'\\2:');
--wiki_article := regexp_replace(wiki_article,E'^(.*?)([a-z]{2,3})[=:]',E'\\2:');
wiki_article := replace(wiki_article,' ','_');
IF strpos(wiki_article, ':') IN (3,4) THEN
wiki_article_language := lower(trim(split_part(wiki_article, ':', 1)));
@@ -130,7 +134,7 @@ BEGIN
-- Still nothing? Fall back to a default.
IF result.importance is null THEN
result.importance := 0.40001 - (rank_search::float / 75);
result.importance := 0.75001 - (rank_search::float / 40);
END IF;
{% if 'secondary_importance' in db.tables %}

View File

@@ -164,7 +164,7 @@ DECLARE
newend INTEGER;
moddiff SMALLINT;
linegeo GEOMETRY;
splitpoint FLOAT;
splitline GEOMETRY;
sectiongeo GEOMETRY;
postcode TEXT;
stepmod SMALLINT;
@@ -223,27 +223,15 @@ BEGIN
FROM placex, generate_series(1, array_upper(waynodes, 1)) nodeidpos
WHERE osm_type = 'N' and osm_id = waynodes[nodeidpos]::BIGINT
and address is not NULL and address ? 'housenumber'
and ST_Distance(NEW.linegeo, geometry) < 0.0005
ORDER BY nodeidpos
LOOP
{% if debug %}RAISE WARNING 'processing point % (%)', nextnode.hnr, ST_AsText(nextnode.geometry);{% endif %}
IF linegeo is null THEN
linegeo := NEW.linegeo;
ELSE
splitpoint := ST_LineLocatePoint(linegeo, nextnode.geometry);
IF splitpoint = 0 THEN
-- Corner case where the splitpoint falls on the first point
-- and thus would not return a geometry. Skip that section.
sectiongeo := NULL;
ELSEIF splitpoint = 1 THEN
-- Point is at the end of the line.
sectiongeo := linegeo;
linegeo := NULL;
ELSE
-- Split the line.
sectiongeo := ST_LineSubstring(linegeo, 0, splitpoint);
linegeo := ST_LineSubstring(linegeo, splitpoint, 1);
END IF;
splitline := ST_Split(ST_Snap(linegeo, nextnode.geometry, 0.0005), nextnode.geometry);
sectiongeo := ST_GeometryN(splitline, 1);
linegeo := ST_GeometryN(splitline, 2);
END IF;
IF prevnode.hnr is not null
@@ -251,9 +239,6 @@ BEGIN
-- regularly mapped housenumbers.
-- (Conveniently also fails if one of the house numbers is not a number.)
and abs(prevnode.hnr - nextnode.hnr) > NEW.step
-- If the interpolation geometry is broken or two nodes are at the
-- same place, then splitting might produce a point. Ignore that.
and ST_GeometryType(sectiongeo) = 'ST_LineString'
THEN
IF prevnode.hnr < nextnode.hnr THEN
startnumber := prevnode.hnr;
@@ -315,12 +300,12 @@ BEGIN
NEW.address, postcode,
NEW.country_code, NEW.geometry_sector, 0);
END IF;
END IF;
-- early break if we are out of line string,
-- might happen when a line string loops back on itself
IF linegeo is null or ST_GeometryType(linegeo) != 'ST_LineString' THEN
RETURN NEW;
-- early break if we are out of line string,
-- might happen when a line string loops back on itself
IF ST_GeometryType(linegeo) != 'ST_LineString' THEN
RETURN NEW;
END IF;
END IF;
prevnode := nextnode;

View File

@@ -37,7 +37,7 @@ BEGIN
-- Remove the place from the list of places to be deleted
DELETE FROM place_to_be_deleted pdel
WHERE pdel.osm_type = NEW.osm_type and pdel.osm_id = NEW.osm_id
and pdel.class = NEW.class and pdel.type = NEW.type;
and pdel.class = NEW.class;
-- Have we already done this place?
SELECT * INTO existing
@@ -296,9 +296,7 @@ BEGIN
extratags = NEW.extratags,
admin_level = NEW.admin_level,
indexed_status = 2,
geometry = CASE WHEN existingplacex.rank_address = 0
THEN simplify_large_polygons(NEW.geometry)
ELSE NEW.geometry END
geometry = NEW.geometry
WHERE place_id = existingplacex.place_id;
-- Invalidate linked places: they potentially get a new name and addresses.
@@ -365,3 +363,45 @@ BEGIN
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;
-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = place_to_be_deleted.osm_type
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;
-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

View File

@@ -2,7 +2,7 @@
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2024 by the Nominatim developer community.
-- Copyright (C) 2022 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- Trigger functions for the placex table.
@@ -88,18 +88,12 @@ BEGIN
-- Add all names from the place nodes that deviate from the name
-- in the relation with the prefix '_place_'. Deviation means that
-- either the value is different or a given key is missing completely
IF result.name is null THEN
SELECT hstore(array_agg('_place_' || key), array_agg(value))
INTO result.name
FROM each(location.name);
ELSE
SELECT hstore(array_agg('_place_' || key), array_agg(value)) INTO extra_names
FROM each(location.name - result.name);
{% if debug %}RAISE WARNING 'Extra names: %', extra_names;{% endif %}
SELECT hstore(array_agg('_place_' || key), array_agg(value)) INTO extra_names
FROM each(location.name - result.name);
{% if debug %}RAISE WARNING 'Extra names: %', extra_names;{% endif %}
IF extra_names is not null THEN
result.name := result.name || extra_names;
END IF;
IF extra_names is not null THEN
result.name := result.name || extra_names;
END IF;
{% if debug %}RAISE WARNING 'Final names: %', result.name;{% endif %}
@@ -119,14 +113,12 @@ CREATE OR REPLACE FUNCTION find_associated_street(poi_osm_type CHAR(1),
AS $$
DECLARE
location RECORD;
member JSONB;
parent RECORD;
result BIGINT;
distance FLOAT;
new_distance FLOAT;
waygeom GEOMETRY;
BEGIN
{% if db.middle_db_format == '1' %}
FOR location IN
SELECT members FROM planet_osm_rels
WHERE parts @> ARRAY[poi_osm_id]
@@ -163,40 +155,6 @@ BEGIN
END LOOP;
END LOOP;
{% else %}
FOR member IN
SELECT value FROM planet_osm_rels r, LATERAL jsonb_array_elements(members)
WHERE planet_osm_member_ids(members, poi_osm_type::char(1)) && ARRAY[poi_osm_id]
and tags->>'type' = 'associatedStreet'
and value->>'role' = 'street'
LOOP
FOR parent IN
SELECT place_id, geometry
FROM placex
WHERE osm_type = (member->>'type')::char(1)
and osm_id = (member->>'ref')::bigint
and name is not null
and rank_search between 26 and 27
LOOP
-- Find the closest 'street' member.
-- Avoid distance computation for the frequent case where there is
-- only one street member.
IF waygeom is null THEN
result := parent.place_id;
waygeom := parent.geometry;
ELSE
distance := coalesce(distance, ST_Distance(waygeom, bbox));
new_distance := ST_Distance(parent.geometry, bbox);
IF new_distance < distance THEN
distance := new_distance;
result := parent.place_id;
waygeom := parent.geometry;
END IF;
END IF;
END LOOP;
END LOOP;
{% endif %}
RETURN result;
END;
$$
@@ -293,11 +251,7 @@ CREATE OR REPLACE FUNCTION find_linked_place(bnd placex)
RETURNS placex
AS $$
DECLARE
{% if db.middle_db_format == '1' %}
relation_members TEXT[];
{% else %}
relation_members JSONB;
{% endif %}
rel_member RECORD;
linked_placex placex%ROWTYPE;
bnd_name TEXT;
@@ -718,12 +672,6 @@ BEGIN
NEW.country_code := NULL;
END IF;
-- Simplify polygons with a very large memory footprint when they
-- do not take part in address computation.
IF NEW.rank_address = 0 THEN
NEW.geometry := simplify_large_polygons(NEW.geometry);
END IF;
END IF;
{% if debug %}RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %}
@@ -795,11 +743,7 @@ CREATE OR REPLACE FUNCTION placex_update()
DECLARE
i INTEGER;
location RECORD;
{% if db.middle_db_format == '1' %}
relation_members TEXT[];
{% else %}
relation_member JSONB;
{% endif %}
geom GEOMETRY;
parent_address_level SMALLINT;
@@ -844,9 +788,6 @@ BEGIN
result := deleteLocationArea(NEW.partition, NEW.place_id, NEW.rank_search);
NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
IF NEW.extratags = ''::hstore THEN
NEW.extratags := NULL;
END IF;
-- NEW.linked_place_id contains the precomputed linkee. Save this and restore
-- the previous link status.
@@ -1021,7 +962,6 @@ BEGIN
-- waterway ways are linked when they are part of a relation and have the same class/type
IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN
{% if db.middle_db_format == '1' %}
FOR relation_members IN select members from planet_osm_rels r where r.id = NEW.osm_id and r.parts != array[]::bigint[]
LOOP
FOR i IN 1..array_upper(relation_members, 1) BY 2 LOOP
@@ -1040,29 +980,6 @@ BEGIN
END IF;
END LOOP;
END LOOP;
{% else %}
FOR relation_member IN
SELECT value FROM planet_osm_rels r, LATERAL jsonb_array_elements(r.members)
WHERE r.id = NEW.osm_id
LOOP
IF relation_member->>'role' IN ('', 'main_stream', 'side_stream')
and relation_member->>'type' = 'W'
THEN
{% if debug %}RAISE WARNING 'waterway parent %, child %', NEW.osm_id, relation_member;{% endif %}
FOR linked_node_id IN
SELECT place_id FROM placex
WHERE osm_type = 'W' and osm_id = (relation_member->>'ref')::bigint
and class = NEW.class and type in ('river', 'stream', 'canal', 'drain', 'ditch')
and (relation_member->>'role' != 'side_stream' or NEW.name->'name' = name->'name')
LOOP
UPDATE placex SET linked_place_id = NEW.place_id WHERE place_id = linked_node_id;
{% if 'search_name' in db.tables %}
DELETE FROM search_name WHERE place_id = linked_node_id;
{% endif %}
END LOOP;
END IF;
END LOOP;
{% endif %}
{% if debug %}RAISE WARNING 'Waterway processed';{% endif %}
END IF;
@@ -1079,7 +996,7 @@ BEGIN
{% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %}
NEW.parent_place_id := null;
is_place_address := not token_is_street_address(NEW.token_info);
is_place_address := coalesce(not NEW.address ? 'street' and NEW.address ? 'place', FALSE);
-- We have to find our parent road.
NEW.parent_place_id := find_parent_for_poi(NEW.osm_type, NEW.osm_id,
@@ -1096,7 +1013,7 @@ BEGIN
SELECT p.country_code, p.postcode, p.name FROM placex p
WHERE p.place_id = NEW.parent_place_id INTO location;
IF is_place_address and NEW.address ? 'place' THEN
IF is_place_address THEN
-- Check if the addr:place tag is part of the parent name
SELECT count(*) INTO i
FROM svals(location.name) AS pname WHERE pname = NEW.address->'place';
@@ -1203,7 +1120,7 @@ BEGIN
ELSE
-- No linked place? As a last resort check if the boundary is tagged with
-- a place type and adapt the rank address.
IF NEW.rank_address between 4 and 25 and NEW.extratags ? 'place' THEN
IF NEW.rank_address > 0 and NEW.extratags ? 'place' THEN
SELECT address_rank INTO place_address_level
FROM compute_place_rank(NEW.country_code, 'A', 'place',
NEW.extratags->'place', 0::SMALLINT, False, null);
@@ -1265,8 +1182,6 @@ BEGIN
END IF;
ELSEIF NEW.rank_address > 25 THEN
max_rank := 25;
ELSEIF NEW.class in ('place','boundary') and NEW.type in ('postcode','postal_code') THEN
max_rank := NEW.rank_search;
ELSE
max_rank := NEW.rank_address;
END IF;
@@ -1315,14 +1230,8 @@ BEGIN
{% endif %}
END IF;
IF NEW.postcode is null AND NEW.rank_search > 8
AND (NEW.rank_address > 0
OR ST_GeometryType(NEW.geometry) not in ('ST_LineString','ST_MultiLineString')
OR ST_Length(NEW.geometry) < 0.02)
THEN
NEW.postcode := get_nearest_postcode(NEW.country_code,
CASE WHEN NEW.rank_address > 25
THEN NEW.centroid ELSE NEW.geometry END);
IF NEW.postcode is null AND NEW.rank_search > 8 THEN
NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry);
END IF;
{% if debug %}RAISE WARNING 'place update % % finished.', NEW.osm_type, NEW.osm_id;{% endif %}

View File

@@ -284,24 +284,3 @@ BEGIN
END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
rankings TEXT,
def_weight FLOAT)
RETURNS FLOAT
AS $$
DECLARE
rank JSON;
BEGIN
FOR rank IN
SELECT * FROM json_array_elements(rankings::JSON)
LOOP
IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN
RETURN (rank->>0)::float;
END IF;
END LOOP;
RETURN def_weight;
END;
$$
LANGUAGE plpgsql IMMUTABLE;

View File

@@ -73,26 +73,6 @@ END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION get_rel_node_members(members JSONB, memberLabels TEXT[])
RETURNS SETOF BIGINT
AS $$
DECLARE
member JSONB;
BEGIN
FOR member IN SELECT * FROM jsonb_array_elements(members)
LOOP
IF member->>'type' = 'N' and member->>'role' = ANY(memberLabels) THEN
RETURN NEXT (member->>'ref')::bigint;
END IF;
END LOOP;
RETURN;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
-- Copy 'name' to or from the default language.
--
-- \param country_code Country code of the object being named.
@@ -293,8 +273,8 @@ BEGIN
END IF;
RETURN ST_Envelope(ST_Collect(
ST_Project(geom::geography, radius, 0.785398)::geometry,
ST_Project(geom::geography, radius, 3.9269908)::geometry));
ST_Project(geom, radius, 0.785398)::geometry,
ST_Project(geom, radius, 3.9269908)::geometry));
END;
$$
LANGUAGE plpgsql IMMUTABLE;
@@ -436,20 +416,6 @@ END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION simplify_large_polygons(geometry GEOMETRY)
RETURNS GEOMETRY
AS $$
BEGIN
IF ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')
and ST_MemSize(geometry) > 3000000
THEN
geometry := ST_SimplifyPreserveTopology(geometry, 0.0001);
END IF;
RETURN geometry;
END;
$$
LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION place_force_delete(placeid BIGINT)
RETURNS BOOLEAN
@@ -463,10 +429,9 @@ BEGIN
SELECT osm_type, osm_id, class, type FROM placex WHERE place_id = placeid INTO osmtype, osmid, pclass, ptype;
DELETE FROM import_polygon_delete where osm_type = osmtype and osm_id = osmid and class = pclass and type = ptype;
DELETE FROM import_polygon_error where osm_type = osmtype and osm_id = osmid and class = pclass and type = ptype;
-- force delete by directly entering it into the to-be-deleted table
INSERT INTO place_to_be_deleted (osm_type, osm_id, class, type, deferred)
VALUES(osmtype, osmid, pclass, ptype, false);
PERFORM flush_deleted_places();
-- force delete from place/placex by making it a very small geometry
UPDATE place set geometry = ST_SetSRID(ST_Point(0,0), 4326) where osm_type = osmtype and osm_id = osmid and class = pclass and type = ptype;
DELETE FROM place where osm_type = osmtype and osm_id = osmid and class = pclass and type = ptype;
RETURN TRUE;
END;
@@ -521,56 +486,3 @@ BEGIN
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION flush_deleted_places()
RETURNS INTEGER
AS $$
BEGIN
-- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-- delete from place table
ALTER TABLE place DISABLE TRIGGER place_before_delete;
DELETE FROM place USING place_to_be_deleted
WHERE place.osm_type = place_to_be_deleted.osm_type
and place.osm_id = place_to_be_deleted.osm_id
and place.class = place_to_be_deleted.class
and place.type = place_to_be_deleted.type
and not deferred;
ALTER TABLE place ENABLE TRIGGER place_before_delete;
-- Mark for delete in the placex table
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
and placex.osm_id = place_to_be_deleted.osm_id
and placex.class = place_to_be_deleted.class
and placex.type = place_to_be_deleted.type
and not deferred;
-- Mark for delete in interpolations
UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
WHERE place_to_be_deleted.osm_type = 'W'
and place_to_be_deleted.class = 'place'
and place_to_be_deleted.type = 'houses'
and location_property_osmline.osm_id = place_to_be_deleted.osm_id
and not deferred;
-- Clear todo list.
TRUNCATE TABLE place_to_be_deleted;
RETURN NULL;
END;
$$ LANGUAGE plpgsql;

View File

@@ -23,10 +23,6 @@ CREATE INDEX IF NOT EXISTS idx_placex_parent_place_id
---
CREATE INDEX IF NOT EXISTS idx_placex_geometry ON placex
USING GIST (geometry) {{db.tablespace.search_index}};
-- Index is needed during import but can be dropped as soon as a full
-- geometry index is in place. The partial index is almost as big as the full
-- index.
DROP INDEX IF EXISTS idx_placex_geometry_lower_rank_ways;
---
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
ON placex USING gist (geometry) {{db.tablespace.search_index}}
@@ -34,13 +30,6 @@ CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPolygon
AND rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
---
-- used in reverse large area lookup
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
{{db.tablespace.search_index}}
WHERE rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND linked_place_id is null AND osm_type = 'N';
---
CREATE INDEX IF NOT EXISTS idx_osmline_parent_place_id
ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}
WHERE parent_place_id is not null;

View File

@@ -190,6 +190,7 @@ CREATE INDEX idx_placex_geometry_buildings ON placex
-- Usage: - linking of similar named places to boundaries
-- - linking of place nodes with same type to boundaries
-- - lookupPolygon()
CREATE INDEX idx_placex_geometry_placenode ON placex
USING {{postgres.spgist_geom}} (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'N' and rank_search < 26
@@ -298,15 +299,7 @@ CREATE TABLE IF NOT EXISTS wikipedia_redirect (
-- osm2pgsql does not create indexes on the middle tables for Nominatim
-- Add one for lookup of associated street relations.
{% if db.middle_db_format == '1' %}
CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts)
{{db.tablespace.address_index}}
WHERE tags @> ARRAY['associatedStreet'];
{% else %}
CREATE INDEX planet_osm_rels_relation_members_idx ON planet_osm_rels USING gin(planet_osm_member_ids(members, 'R'::character(1)))
WITH (fastupdate=off)
{{db.tablespace.address_index}};
{% endif %}
CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts) WHERE tags @> ARRAY['associatedStreet'];
-- Needed for lookups if a node is part of an interpolation.
CREATE INDEX IF NOT EXISTS idx_place_interpolations

View File

@@ -41,17 +41,10 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null or info->>'place' is null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null and info->>'street' != '{}';
SELECT info->>'street' is not null;
$$ LANGUAGE SQL IMMUTABLE;

View File

@@ -0,0 +1,40 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2022 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS word;
CREATE TABLE word (
word_id INTEGER,
word_token text NOT NULL,
type text NOT NULL,
word text,
info jsonb
) {{db.tablespace.search_data}};
CREATE INDEX idx_word_word_token ON word
USING BTREE (word_token) {{db.tablespace.search_index}};
-- Used when updating country names from the boundary relation.
CREATE INDEX idx_word_country_names ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'C';
-- Used when inserting new postcodes on updates.
CREATE INDEX idx_word_postcodes ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'P';
-- Used when inserting full words.
CREATE INDEX idx_word_full_word ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'W';
-- Used when inserting analyzed housenumbers (exclude old-style entries).
CREATE INDEX idx_word_housenumbers ON word
USING btree(word) {{db.tablespace.address_index}}
WHERE type = 'H' and word is not null;
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
DROP SEQUENCE IF EXISTS seq_word;
CREATE SEQUENCE seq_word start 1;
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";

View File

@@ -41,17 +41,10 @@ AS $$
$$ LANGUAGE SQL IMMUTABLE STRICT;
CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null or info->>'place_search' is null;
$$ LANGUAGE SQL IMMUTABLE;
CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB)
RETURNS BOOLEAN
AS $$
SELECT info->>'street' is not null and info->>'street' != '{}';
SELECT info->>'street' is not null;
$$ LANGUAGE SQL IMMUTABLE;
@@ -347,7 +340,7 @@ BEGIN
END LOOP;
END IF;
-- consider parts before an opening bracket a full word as well
-- consider parts before an opening braket a full word as well
words := regexp_split_to_array(value, E'[(]');
IF array_upper(words, 1) > 1 THEN
s := make_standard_name(words[1]);

View File

@@ -7,6 +7,6 @@ sys.path.append('@PROJECT_SOURCE_DIR@')
from nominatim.cli import get_set_parser
def get_parser():
parser = get_set_parser()
parser = get_set_parser(phpcgi_path='@PHPCGI_BIN@')
return parser.parser

View File

@@ -1,6 +1,6 @@
# just use the pgxs makefile
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "16" "15" "14" "13" "12" "11" "10" "9.6")
foreach(suffix ${PostgreSQL_ADDITIONAL_VERSIONS} "15" "14" "13" "12" "11" "10" "9.6")
list(APPEND PG_CONFIG_HINTS
"/usr/pgsql-${suffix}/bin")
endforeach()

View File

@@ -11,11 +11,9 @@
#include "mb/pg_wchar.h"
#include <utfasciitable.h>
#if PG_MAJORVERSION_NUM > 15
#include "varatt.h"
#endif
#ifdef PG_MODULE_MAGIC
PG_MODULE_MAGIC;
#endif
Datum transliteration( PG_FUNCTION_ARGS );
Datum gettokenstring( PG_FUNCTION_ARGS );

View File

@@ -1,38 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
The public interface of the Nominatim library.
Classes and functions defined in this file are considered stable. Always
import from this file, not from the source files directly.
"""
# See also https://github.com/PyCQA/pylint/issues/6006
# pylint: disable=useless-import-alias
from .core import (NominatimAPI as NominatimAPI,
NominatimAPIAsync as NominatimAPIAsync)
from .connection import (SearchConnection as SearchConnection)
from .status import (StatusResult as StatusResult)
from .types import (PlaceID as PlaceID,
OsmID as OsmID,
PlaceRef as PlaceRef,
Point as Point,
Bbox as Bbox,
GeometryFormat as GeometryFormat,
DataLayer as DataLayer)
from .results import (SourceTable as SourceTable,
AddressLine as AddressLine,
AddressLines as AddressLines,
WordInfo as WordInfo,
WordInfos as WordInfos,
DetailedResult as DetailedResult,
ReverseResult as ReverseResult,
ReverseResults as ReverseResults,
SearchResult as SearchResult,
SearchResults as SearchResults)
from .localization import (Locales as Locales)

View File

@@ -1,149 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Extended SQLAlchemy connection class that also includes access to the schema.
"""
from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
Awaitable, Callable, TypeVar
import asyncio
import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection
from nominatim.typing import SaFromClause
from nominatim.db.sqlalchemy_schema import SearchTables
from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.logging import log
T = TypeVar('T')
class SearchConnection:
""" An extended SQLAlchemy connection class, that also contains
then table definitions. The underlying asynchronous SQLAlchemy
connection can be accessed with the 'connection' property.
The 't' property is the collection of Nominatim tables.
"""
def __init__(self, conn: AsyncConnection,
tables: SearchTables,
properties: Dict[str, Any]) -> None:
self.connection = conn
self.t = tables # pylint: disable=invalid-name
self._property_cache = properties
self._classtables: Optional[Set[str]] = None
self.query_timeout: Optional[int] = None
def set_query_timeout(self, timeout: Optional[int]) -> None:
""" Set the timeout after which a query over this connection
is cancelled.
"""
self.query_timeout = timeout
async def scalar(self, sql: sa.sql.base.Executable,
params: Union[Mapping[str, Any], None] = None
) -> Any:
""" Execute a 'scalar()' query on the connection.
"""
log().sql(self.connection, sql, params)
return await asyncio.wait_for(self.connection.scalar(sql, params), self.query_timeout)
async def execute(self, sql: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None] = None
) -> 'sa.Result[Any]':
""" Execute a 'execute()' query on the connection.
"""
log().sql(self.connection, sql, params)
return await asyncio.wait_for(self.connection.execute(sql, params), self.query_timeout)
async def get_property(self, name: str, cached: bool = True) -> str:
""" Get a property from Nominatim's property table.
Property values are normally cached so that they are only
retrieved from the database when they are queried for the
first time with this function. Set 'cached' to False to force
reading the property from the database.
Raises a ValueError if the property does not exist.
"""
lookup_name = f'DBPROP:{name}'
if cached and lookup_name in self._property_cache:
return cast(str, self._property_cache[lookup_name])
sql = sa.select(self.t.properties.c.value)\
.where(self.t.properties.c.property == name)
value = await self.connection.scalar(sql)
if value is None:
raise ValueError(f"Property '{name}' not found in database.")
self._property_cache[lookup_name] = cast(str, value)
return cast(str, value)
async def get_db_property(self, name: str) -> Any:
""" Get a setting from the database. At the moment, only
'server_version', the version of the database software, can
be retrieved with this function.
Raises a ValueError if the property does not exist.
"""
if name != 'server_version':
raise ValueError(f"DB setting '{name}' not found in database.")
return self._property_cache['DB:server_version']
async def get_cached_value(self, group: str, name: str,
factory: Callable[[], Awaitable[T]]) -> T:
""" Access the cache for this Nominatim instance.
Each cache value needs to belong to a group and have a name.
This function is for internal API use only.
`factory` is an async callback function that produces
the value if it is not already cached.
Returns the cached value or the result of factory (also caching
the result).
"""
full_name = f'{group}:{name}'
if full_name in self._property_cache:
return cast(T, self._property_cache[full_name])
value = await factory()
self._property_cache[full_name] = value
return value
async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
""" Lookup up if there is a classtype table for the given category
and return a SQLAlchemy table for it, if it exists.
"""
if self._classtables is None:
res = await self.execute(sa.text("""SELECT tablename FROM pg_tables
WHERE tablename LIKE 'place_classtype_%'
"""))
self._classtables = {r[0] for r in res}
tablename = f"place_classtype_{cls}_{typ}"
if tablename not in self._classtables:
return None
if tablename in self.t.meta.tables:
return self.t.meta.tables[tablename]
return sa.Table(tablename, self.t.meta,
sa.Column('place_id', sa.BigInteger),
sa.Column('centroid', Geometry))

View File

@@ -1,974 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of classes for API access via libraries.
"""
from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
import asyncio
import sys
import contextlib
from pathlib import Path
import sqlalchemy as sa
import sqlalchemy.ext.asyncio as sa_asyncio
from nominatim.errors import UsageError
from nominatim.db.sqlalchemy_schema import SearchTables
from nominatim.db.async_core_library import PGCORE_LIB, PGCORE_ERROR
import nominatim.db.sqlite_functions
from nominatim.config import Configuration
from nominatim.api.connection import SearchConnection
from nominatim.api.status import get_status, StatusResult
from nominatim.api.lookup import get_detailed_place, get_simple_place
from nominatim.api.reverse import ReverseGeocoder
from nominatim.api.search import ForwardGeocoder, Phrase, PhraseType, make_query_analyzer
import nominatim.api.types as ntyp
from nominatim.api.results import DetailedResult, ReverseResult, SearchResults
class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
""" The main frontend to the Nominatim database implements the
functions for lookup, forward and reverse geocoding using
asynchronous functions.
This class shares most of the functions with its synchronous
version. There are some additional functions or parameters,
which are documented below.
"""
def __init__(self, project_dir: Path,
environ: Optional[Mapping[str, str]] = None,
loop: Optional[asyncio.AbstractEventLoop] = None) -> None:
""" Initiate a new frontend object with synchronous API functions.
Parameters:
project_dir: Path to the
[project directory](../admin/Import.md#creating-the-project-directory)
of the local Nominatim installation.
environ: Mapping of [configuration parameters](../customize/Settings.md).
When set, replaces any configuration via environment variables.
Settings in this mapping also have precedence over any
parameters found in the `.env` file of the project directory.
loop: The asyncio event loop that will be used when calling
functions. Only needed, when a custom event loop is used
and the Python version is 3.9 or earlier.
"""
self.config = Configuration(project_dir, environ)
self.query_timeout = self.config.get_int('QUERY_TIMEOUT') \
if self.config.QUERY_TIMEOUT else None
self.reverse_restrict_to_country_area = self.config.get_bool('SEARCH_WITHIN_COUNTRIES')
self.server_version = 0
if sys.version_info >= (3, 10):
self._engine_lock = asyncio.Lock()
else:
self._engine_lock = asyncio.Lock(loop=loop) # pylint: disable=unexpected-keyword-arg
self._engine: Optional[sa_asyncio.AsyncEngine] = None
self._tables: Optional[SearchTables] = None
self._property_cache: Dict[str, Any] = {'DB:server_version': 0}
async def setup_database(self) -> None:
""" Set up the SQL engine and connections.
This function will be implicitly called when the database is
accessed for the first time. You may also call it explicitly to
avoid that the first call is delayed by the setup.
"""
async with self._engine_lock:
if self._engine:
return
extra_args: Dict[str, Any] = {'future': True,
'echo': self.config.get_bool('DEBUG_SQL')}
if self.config.get_int('API_POOL_SIZE') == 0:
extra_args['poolclass'] = sa.pool.NullPool
else:
extra_args['poolclass'] = sa.pool.AsyncAdaptedQueuePool
extra_args['max_overflow'] = 0
extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
if is_sqlite:
params = dict((p.split('=', 1)
for p in self.config.DATABASE_DSN[7:].split(';')))
dburl = sa.engine.URL.create('sqlite+aiosqlite',
database=params.get('dbname'))
if not ('NOMINATIM_DATABASE_RW' in self.config.environ
and self.config.get_bool('DATABASE_RW')) \
and not Path(params.get('dbname', '')).is_file():
raise UsageError(f"SQlite database '{params.get('dbname')}' does not exist.")
else:
dsn = self.config.get_database_params()
query = {k: v for k, v in dsn.items()
if k not in ('user', 'password', 'dbname', 'host', 'port')}
dburl = sa.engine.URL.create(
f'postgresql+{PGCORE_LIB}',
database=dsn.get('dbname'),
username=dsn.get('user'),
password=dsn.get('password'),
host=dsn.get('host'),
port=int(dsn['port']) if 'port' in dsn else None,
query=query)
engine = sa_asyncio.create_async_engine(dburl, **extra_args)
if is_sqlite:
server_version = 0
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
nominatim.db.sqlite_functions.install_custom_functions(dbapi_con)
cursor = dbapi_con.cursor()
cursor.execute("SELECT load_extension('mod_spatialite')")
cursor.execute('SELECT SetDecimalPrecision(7)')
dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
else:
try:
async with engine.begin() as conn:
result = await conn.scalar(sa.text('SHOW server_version_num'))
server_version = int(result)
if server_version >= 110000:
await conn.execute(sa.text("SET jit_above_cost TO '-1'"))
await conn.execute(sa.text(
"SET max_parallel_workers_per_gather TO '0'"))
except (PGCORE_ERROR, sa.exc.OperationalError):
server_version = 0
if server_version >= 110000:
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
cursor = dbapi_con.cursor()
cursor.execute("SET jit_above_cost TO '-1'")
cursor.execute("SET max_parallel_workers_per_gather TO '0'")
self._property_cache['DB:server_version'] = server_version
self._tables = SearchTables(sa.MetaData()) # pylint: disable=no-member
self._engine = engine
async def close(self) -> None:
""" Close all active connections to the database. The NominatimAPIAsync
object remains usable after closing. If a new API functions is
called, new connections are created.
"""
if self._engine is not None:
await self._engine.dispose()
@contextlib.asynccontextmanager
async def begin(self) -> AsyncIterator[SearchConnection]:
""" Create a new connection with automatic transaction handling.
This function may be used to get low-level access to the database.
Refer to the documentation of SQLAlchemy for details how to use
the connection object.
"""
if self._engine is None:
await self.setup_database()
assert self._engine is not None
assert self._tables is not None
async with self._engine.begin() as conn:
yield SearchConnection(conn, self._tables, self._property_cache)
async def status(self) -> StatusResult:
""" Return the status of the database.
"""
try:
async with self.begin() as conn:
conn.set_query_timeout(self.query_timeout)
status = await get_status(conn)
except (PGCORE_ERROR, sa.exc.OperationalError):
return StatusResult(700, 'Database connection failed')
return status
async def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
""" Get detailed information about a place in the database.
Returns None if there is no entry under the given ID.
"""
details = ntyp.LookupDetails.from_kwargs(params)
async with self.begin() as conn:
conn.set_query_timeout(self.query_timeout)
if details.keywords:
await make_query_analyzer(conn)
return await get_detailed_place(conn, place, details)
async def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
""" Get simple information about a list of places.
Returns a list of place information for all IDs that were found.
"""
details = ntyp.LookupDetails.from_kwargs(params)
async with self.begin() as conn:
conn.set_query_timeout(self.query_timeout)
if details.keywords:
await make_query_analyzer(conn)
return SearchResults(filter(None,
[await get_simple_place(conn, p, details) for p in places]))
async def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
""" Find a place by its coordinates. Also known as reverse geocoding.
Returns the closest result that can be found or None if
no place matches the given criteria.
"""
# The following negation handles NaN correctly. Don't change.
if not abs(coord[0]) <= 180 or not abs(coord[1]) <= 90:
# There are no results to be expected outside valid coordinates.
return None
details = ntyp.ReverseDetails.from_kwargs(params)
async with self.begin() as conn:
conn.set_query_timeout(self.query_timeout)
if details.keywords:
await make_query_analyzer(conn)
geocoder = ReverseGeocoder(conn, details,
self.reverse_restrict_to_country_area)
return await geocoder.lookup(coord)
async def search(self, query: str, **params: Any) -> SearchResults:
""" Find a place by free-text search. Also known as forward geocoding.
"""
query = query.strip()
if not query:
raise UsageError('Nothing to search for.')
async with self.begin() as conn:
conn.set_query_timeout(self.query_timeout)
geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params),
self.config.get_int('REQUEST_TIMEOUT') \
if self.config.REQUEST_TIMEOUT else None)
phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')]
return await geocoder.lookup(phrases)
# pylint: disable=too-many-arguments,too-many-branches
async def search_address(self, amenity: Optional[str] = None,
street: Optional[str] = None,
city: Optional[str] = None,
county: Optional[str] = None,
state: Optional[str] = None,
country: Optional[str] = None,
postalcode: Optional[str] = None,
**params: Any) -> SearchResults:
""" Find an address using structured search.
"""
async with self.begin() as conn:
conn.set_query_timeout(self.query_timeout)
details = ntyp.SearchDetails.from_kwargs(params)
phrases: List[Phrase] = []
if amenity:
phrases.append(Phrase(PhraseType.AMENITY, amenity))
if street:
phrases.append(Phrase(PhraseType.STREET, street))
if city:
phrases.append(Phrase(PhraseType.CITY, city))
if county:
phrases.append(Phrase(PhraseType.COUNTY, county))
if state:
phrases.append(Phrase(PhraseType.STATE, state))
if postalcode:
phrases.append(Phrase(PhraseType.POSTCODE, postalcode))
if country:
phrases.append(Phrase(PhraseType.COUNTRY, country))
if not phrases:
raise UsageError('Nothing to search for.')
if amenity or street:
details.restrict_min_max_rank(26, 30)
elif city:
details.restrict_min_max_rank(13, 25)
elif county:
details.restrict_min_max_rank(10, 12)
elif state:
details.restrict_min_max_rank(5, 9)
elif postalcode:
details.restrict_min_max_rank(5, 11)
else:
details.restrict_min_max_rank(4, 4)
if 'layers' not in params:
details.layers = ntyp.DataLayer.ADDRESS
if amenity:
details.layers |= ntyp.DataLayer.POI
geocoder = ForwardGeocoder(conn, details,
self.config.get_int('REQUEST_TIMEOUT') \
if self.config.REQUEST_TIMEOUT else None)
return await geocoder.lookup(phrases)
async def search_category(self, categories: List[Tuple[str, str]],
near_query: Optional[str] = None,
**params: Any) -> SearchResults:
""" Find an object of a certain category near another place.
The near place may either be given as an unstructured search
query in itself or as coordinates.
"""
if not categories:
return SearchResults()
details = ntyp.SearchDetails.from_kwargs(params)
async with self.begin() as conn:
conn.set_query_timeout(self.query_timeout)
if near_query:
phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')]
else:
phrases = []
if details.keywords:
await make_query_analyzer(conn)
geocoder = ForwardGeocoder(conn, details,
self.config.get_int('REQUEST_TIMEOUT') \
if self.config.REQUEST_TIMEOUT else None)
return await geocoder.lookup_pois(categories, phrases)
class NominatimAPI:
""" This class provides a thin synchronous wrapper around the asynchronous
Nominatim functions. It creates its own event loop and runs each
synchronous function call to completion using that loop.
"""
def __init__(self, project_dir: Path,
environ: Optional[Mapping[str, str]] = None) -> None:
""" Initiate a new frontend object with synchronous API functions.
Parameters:
project_dir: Path to the
[project directory](../admin/Import.md#creating-the-project-directory)
of the local Nominatim installation.
environ: Mapping of [configuration parameters](../customize/Settings.md).
When set, replaces any configuration via environment variables.
Settings in this mapping also have precedence over any
parameters found in the `.env` file of the project directory.
"""
self._loop = asyncio.new_event_loop()
self._async_api = NominatimAPIAsync(project_dir, environ, loop=self._loop)
def close(self) -> None:
""" Close all active connections to the database.
This function also closes the asynchronous worker loop making
the NominatimAPI object unusable.
"""
self._loop.run_until_complete(self._async_api.close())
self._loop.close()
@property
def config(self) -> Configuration:
""" Provide read-only access to the [configuration](#Configuration)
used by the API.
"""
return self._async_api.config
def status(self) -> StatusResult:
""" Return the status of the database as a dataclass object
with the fields described below.
Returns:
status(int): A status code as described on the status page.
message(str): Either 'OK' or a human-readable message of the
problem encountered.
software_version(tuple): A tuple with the version of the
Nominatim library consisting of (major, minor, patch, db-patch)
version.
database_version(tuple): A tuple with the version of the library
which was used for the import or last migration.
Also consists of (major, minor, patch, db-patch).
data_updated(datetime): Timestamp with the age of the data.
"""
return self._loop.run_until_complete(self._async_api.status())
def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
""" Get detailed information about a place in the database.
The result is a dataclass object with the fields described below
or `None` if the place could not be found in the database.
Parameters:
place: Description of the place to look up. See
[Place identification](Input-Parameter-Types.md#place-identification)
for the various ways to reference a place.
Other parameters:
geometry_output (enum): Add the full geometry of the place to the result.
Multiple formats may be selected. Note that geometries can become
quite large. (Default: none)
geometry_simplification (float): Simplification factor to use on
the geometries before returning them. The factor expresses
the tolerance in degrees from which the geometry may differ.
Topology is preserved. (Default: 0.0)
address_details (bool): Add detailed information about the places
that make up the address of the requested object. (Default: False)
linked_places (bool): Add detailed information about the places
that link to the result. (Default: False)
parented_places (bool): Add detailed information about all places
for which the requested object is a parent, i.e. all places for
which the object provides the address details.
Only POI places can have parents. (Default: False)
keywords (bool): Add detailed information about the search terms
used for this place.
Returns:
source_table (enum): Data source of the place. See below for possible values.
category (tuple): A tuple of two strings with the primary OSM tag
and value.
centroid (Point): Point position of the place.
place_id (Optional[int]): Internal ID of the place. This ID may differ
for the same place between different installations.
parent_place_id (Optional(int]): Internal ID of the parent of this
place. Only meaning full for POI-like objects (places with a
rank_address of 30).
linked_place_id (Optional[int]): Internal ID of the place this object
links to. When this ID is set then there is no guarantee that
the rest of the result information is complete.
admin_level (int): Value of the `admin_level` OSM tag. Only meaningful
for administrative boundary objects.
indexed_date (datetime): Timestamp when the place was last updated.
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
names (Optional[dict]): Dictionary of names of the place. Keys are
usually the corresponding OSM tag keys.
address (Optional[dict]): Dictionary of address parts directly
attributed to the place. Keys are usually the corresponding
OSM tag keys with the `addr:` prefix removed.
extratags (Optional[dict]): Dictionary of additional attributes for
the place. Usually OSM tag keys and values.
housenumber (Optional[str]): House number of the place, normalised
for lookup. To get the house number in its original spelling,
use `address['housenumber']`.
postcode (Optional[str]): Computed postcode for the place. To get
directly attributed postcodes, use `address['postcode']` instead.
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
The string has the format <language code>:<wikipedia title>.
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
importance (Optional[float]): Relative importance of the place. This is a measure
how likely the place will be searched for.
country_code (Optional[str]): Country the feature is in as
ISO 3166-1 alpha-2 country code.
address_rows (Optional[AddressLines]): List of places that make up the
computed address. `None` when `address_details` parameter was False.
linked_rows (Optional[AddressLines]): List of places that link to the object.
`None` when `linked_places` parameter was False.
parented_rows (Optional[AddressLines]): List of direct children of the place.
`None` when `parented_places` parameter was False.
name_keywords (Optional[WordInfos]): List of search words for the name of
the place. `None` when `keywords` parameter is set to False.
address_keywords (Optional[WordInfos]): List of search word for the address of
the place. `None` when `keywords` parameter is set to False.
geometry (dict): Dictionary containing the full geometry of the place
in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(self._async_api.details(place, **params))
def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults:
""" Get simple information about a list of places.
Returns a list of place information for all IDs that were found.
Each result is a dataclass with the fields detailed below.
Parameters:
places: List of descriptions of the place to look up. See
[Place identification](Input-Parameter-Types.md#place-identification)
for the various ways to reference a place.
Other parameters:
geometry_output (enum): Add the full geometry of the place to the result.
Multiple formats may be selected. Note that geometries can become
quite large. (Default: none)
geometry_simplification (float): Simplification factor to use on
the geometries before returning them. The factor expresses
the tolerance in degrees from which the geometry may differ.
Topology is preserved. (Default: 0.0)
address_details (bool): Add detailed information about the places
that make up the address of the requested object. (Default: False)
linked_places (bool): Add detailed information about the places
that link to the result. (Default: False)
parented_places (bool): Add detailed information about all places
for which the requested object is a parent, i.e. all places for
which the object provides the address details.
Only POI places can have parents. (Default: False)
keywords (bool): Add detailed information about the search terms
used for this place.
Returns:
source_table (enum): Data source of the place. See below for possible values.
category (tuple): A tuple of two strings with the primary OSM tag
and value.
centroid (Point): Point position of the place.
place_id (Optional[int]): Internal ID of the place. This ID may differ
for the same place between different installations.
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
names (Optional[dict]): Dictionary of names of the place. Keys are
usually the corresponding OSM tag keys.
address (Optional[dict]): Dictionary of address parts directly
attributed to the place. Keys are usually the corresponding
OSM tag keys with the `addr:` prefix removed.
extratags (Optional[dict]): Dictionary of additional attributes for
the place. Usually OSM tag keys and values.
housenumber (Optional[str]): House number of the place, normalised
for lookup. To get the house number in its original spelling,
use `address['housenumber']`.
postcode (Optional[str]): Computed postcode for the place. To get
directly attributed postcodes, use `address['postcode']` instead.
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
The string has the format <language code>:<wikipedia title>.
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
importance (Optional[float]): Relative importance of the place. This is a measure
how likely the place will be searched for.
country_code (Optional[str]): Country the feature is in as
ISO 3166-1 alpha-2 country code.
address_rows (Optional[AddressLines]): List of places that make up the
computed address. `None` when `address_details` parameter was False.
linked_rows (Optional[AddressLines]): List of places that link to the object.
`None` when `linked_places` parameter was False.
parented_rows (Optional[AddressLines]): List of direct children of the place.
`None` when `parented_places` parameter was False.
name_keywords (Optional[WordInfos]): List of search words for the name of
the place. `None` when `keywords` parameter is set to False.
address_keywords (Optional[WordInfos]): List of search word for the address of
the place. `None` when `keywords` parameter is set to False.
bbox (Bbox): Bounding box of the full geometry of the place.
If the place is a single point, then the size of the bounding
box is guessed according to the type of place.
geometry (dict): Dictionary containing the full geometry of the place
in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(self._async_api.lookup(places, **params))
def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
""" Find a place by its coordinates. Also known as reverse geocoding.
Returns the closest result that can be found or `None` if
no place matches the given criteria. The result is a dataclass
with the fields as detailed below.
Parameters:
coord: Coordinate to lookup the place for as a Point
or a tuple (x, y). Must be in WGS84 projection.
Other parameters:
max_rank (int): Highest address rank to return. Can be used to
restrict search to streets or settlements.
layers (enum): Defines the kind of data to take into account.
See description of layers below. (Default: addresses and POIs)
geometry_output (enum): Add the full geometry of the place to the result.
Multiple formats may be selected. Note that geometries can become
quite large. (Default: none)
geometry_simplification (float): Simplification factor to use on
the geometries before returning them. The factor expresses
the tolerance in degrees from which the geometry may differ.
Topology is preserved. (Default: 0.0)
address_details (bool): Add detailed information about the places
that make up the address of the requested object. (Default: False)
linked_places (bool): Add detailed information about the places
that link to the result. (Default: False)
parented_places (bool): Add detailed information about all places
for which the requested object is a parent, i.e. all places for
which the object provides the address details.
Only POI places can have parents. (Default: False)
keywords (bool): Add detailed information about the search terms
used for this place.
Returns:
source_table (enum): Data source of the place. See below for possible values.
category (tuple): A tuple of two strings with the primary OSM tag
and value.
centroid (Point): Point position of the place.
place_id (Optional[int]): Internal ID of the place. This ID may differ
for the same place between different installations.
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
names (Optional[dict]): Dictionary of names of the place. Keys are
usually the corresponding OSM tag keys.
address (Optional[dict]): Dictionary of address parts directly
attributed to the place. Keys are usually the corresponding
OSM tag keys with the `addr:` prefix removed.
extratags (Optional[dict]): Dictionary of additional attributes for
the place. Usually OSM tag keys and values.
housenumber (Optional[str]): House number of the place, normalised
for lookup. To get the house number in its original spelling,
use `address['housenumber']`.
postcode (Optional[str]): Computed postcode for the place. To get
directly attributed postcodes, use `address['postcode']` instead.
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
The string has the format <language code>:<wikipedia title>.
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
importance (Optional[float]): Relative importance of the place. This is a measure
how likely the place will be searched for.
country_code (Optional[str]): Country the feature is in as
ISO 3166-1 alpha-2 country code.
address_rows (Optional[AddressLines]): List of places that make up the
computed address. `None` when `address_details` parameter was False.
linked_rows (Optional[AddressLines]): List of places that link to the object.
`None` when `linked_places` parameter was False.
parented_rows (Optional[AddressLines]): List of direct children of the place.
`None` when `parented_places` parameter was False.
name_keywords (Optional[WordInfos]): List of search words for the name of
the place. `None` when `keywords` parameter is set to False.
address_keywords (Optional[WordInfos]): List of search word for the address of
the place. `None` when `keywords` parameter is set to False.
bbox (Bbox): Bounding box of the full geometry of the place.
If the place is a single point, then the size of the bounding
box is guessed according to the type of place.
geometry (dict): Dictionary containing the full geometry of the place
in the formats requested in the `geometry_output` parameter.
distance (Optional[float]): Distance in degree from the input point.
"""
return self._loop.run_until_complete(self._async_api.reverse(coord, **params))
def search(self, query: str, **params: Any) -> SearchResults:
""" Find a place by free-text search. Also known as forward geocoding.
Parameters:
query: Free-form text query searching for a place.
Other parameters:
max_results (int): Maximum number of results to return. The
actual number of results may be less. (Default: 10)
min_rank (int): Lowest permissible rank for the result.
For addressable places this is the minimum
[address rank](../customize/Ranking.md#address-rank). For all
other places the [search rank](../customize/Ranking.md#search-rank)
is used.
max_rank (int): Highest permissible rank for the result. See min_rank above.
layers (enum): Defines the kind of data to take into account.
See [layers section](Input-Parameter-Types.md#layers) for details.
(Default: addresses and POIs)
countries (list[str]): Restrict search to countries with the given
ISO 3166-1 alpha-2 country code. An empty list (the default)
disables this filter.
excluded (list[int]): A list of internal IDs of places to exclude
from the search.
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
as a filter and return only results within the bounding box.
near (Optional[Point]): Focus search around the given point and
return results ordered by distance to the given point.
near_radius (Optional[float]): Restrict results to results within
the given distance in degrees of `near` point. Ignored, when
`near` is not set.
categories (list[tuple]): Restrict search to places of the given
categories. The category is the main OSM tag assigned to each
place. An empty list (the default) disables this filter.
geometry_output (enum): Add the full geometry of the place to the result.
Multiple formats may be selected. Note that geometries can become
quite large. (Default: none)
geometry_simplification (float): Simplification factor to use on
the geometries before returning them. The factor expresses
the tolerance in degrees from which the geometry may differ.
Topology is preserved. (Default: 0.0)
address_details (bool): Add detailed information about the places
that make up the address of the requested object. (Default: False)
linked_places (bool): Add detailed information about the places
that link to the result. (Default: False)
parented_places (bool): Add detailed information about all places
for which the requested object is a parent, i.e. all places for
which the object provides the address details.
Only POI places can have parents. (Default: False)
keywords (bool): Add detailed information about the search terms
used for this place.
Returns:
source_table (enum): Data source of the place. See below for possible values.
category (tuple): A tuple of two strings with the primary OSM tag
and value.
centroid (Point): Point position of the place.
place_id (Optional[int]): Internal ID of the place. This ID may differ
for the same place between different installations.
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
names (Optional[dict]): Dictionary of names of the place. Keys are
usually the corresponding OSM tag keys.
address (Optional[dict]): Dictionary of address parts directly
attributed to the place. Keys are usually the corresponding
OSM tag keys with the `addr:` prefix removed.
extratags (Optional[dict]): Dictionary of additional attributes for
the place. Usually OSM tag keys and values.
housenumber (Optional[str]): House number of the place, normalised
for lookup. To get the house number in its original spelling,
use `address['housenumber']`.
postcode (Optional[str]): Computed postcode for the place. To get
directly attributed postcodes, use `address['postcode']` instead.
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
The string has the format <language code>:<wikipedia title>.
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
importance (Optional[float]): Relative importance of the place. This is a measure
how likely the place will be searched for.
country_code (Optional[str]): Country the feature is in as
ISO 3166-1 alpha-2 country code.
address_rows (Optional[AddressLines]): List of places that make up the
computed address. `None` when `address_details` parameter was False.
linked_rows (Optional[AddressLines]): List of places that link to the object.
`None` when `linked_places` parameter was False.
parented_rows (Optional[AddressLines]): List of direct children of the place.
`None` when `parented_places` parameter was False.
name_keywords (Optional[WordInfos]): List of search words for the name of
the place. `None` when `keywords` parameter is set to False.
address_keywords (Optional[WordInfos]): List of search word for the address of
the place. `None` when `keywords` parameter is set to False.
bbox (Bbox): Bounding box of the full geometry of the place.
If the place is a single point, then the size of the bounding
box is guessed according to the type of place.
geometry (dict): Dictionary containing the full geometry of the place
in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(
self._async_api.search(query, **params))
# pylint: disable=too-many-arguments
def search_address(self, amenity: Optional[str] = None,
street: Optional[str] = None,
city: Optional[str] = None,
county: Optional[str] = None,
state: Optional[str] = None,
country: Optional[str] = None,
postalcode: Optional[str] = None,
**params: Any) -> SearchResults:
""" Find an address using structured search.
Parameters:
amenity: Name of a POI.
street: Street and optionally housenumber of the address. If the address
does not have a street, then the place the housenumber references to.
city: Postal city of the address.
county: County equivalent of the address. Does not exist in all
jurisdictions.
state: State or province of the address.
country: Country with its full name or its ISO 3166-1 alpha-2 country code.
Do not use together with the country_code filter.
postalcode: Post code or ZIP for the place.
Other parameters:
max_results (int): Maximum number of results to return. The
actual number of results may be less. (Default: 10)
min_rank (int): Lowest permissible rank for the result.
For addressable places this is the minimum
[address rank](../customize/Ranking.md#address-rank). For all
other places the [search rank](../customize/Ranking.md#search-rank)
is used.
max_rank (int): Highest permissible rank for the result. See min_rank above.
layers (enum): Defines the kind of data to take into account.
See [layers section](Input-Parameter-Types.md#layers) for details.
(Default: addresses and POIs)
countries (list[str]): Restrict search to countries with the given
ISO 3166-1 alpha-2 country code. An empty list (the default)
disables this filter. Do not use, when the country parameter
is used.
excluded (list[int]): A list of internal IDs of places to exclude
from the search.
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
as a filter and return only results within the bounding box.
near (Optional[Point]): Focus search around the given point and
return results ordered by distance to the given point.
near_radius (Optional[float]): Restrict results to results within
the given distance in degrees of `near` point. Ignored, when
`near` is not set.
categories (list[tuple]): Restrict search to places of the given
categories. The category is the main OSM tag assigned to each
place. An empty list (the default) disables this filter.
geometry_output (enum): Add the full geometry of the place to the result.
Multiple formats may be selected. Note that geometries can become
quite large. (Default: none)
geometry_simplification (float): Simplification factor to use on
the geometries before returning them. The factor expresses
the tolerance in degrees from which the geometry may differ.
Topology is preserved. (Default: 0.0)
address_details (bool): Add detailed information about the places
that make up the address of the requested object. (Default: False)
linked_places (bool): Add detailed information about the places
that link to the result. (Default: False)
parented_places (bool): Add detailed information about all places
for which the requested object is a parent, i.e. all places for
which the object provides the address details.
Only POI places can have parents. (Default: False)
keywords (bool): Add detailed information about the search terms
used for this place.
Returns:
source_table (enum): Data source of the place. See below for possible values.
category (tuple): A tuple of two strings with the primary OSM tag
and value.
centroid (Point): Point position of the place.
place_id (Optional[int]): Internal ID of the place. This ID may differ
for the same place between different installations.
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
names (Optional[dict]): Dictionary of names of the place. Keys are
usually the corresponding OSM tag keys.
address (Optional[dict]): Dictionary of address parts directly
attributed to the place. Keys are usually the corresponding
OSM tag keys with the `addr:` prefix removed.
extratags (Optional[dict]): Dictionary of additional attributes for
the place. Usually OSM tag keys and values.
housenumber (Optional[str]): House number of the place, normalised
for lookup. To get the house number in its original spelling,
use `address['housenumber']`.
postcode (Optional[str]): Computed postcode for the place. To get
directly attributed postcodes, use `address['postcode']` instead.
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
The string has the format <language code>:<wikipedia title>.
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
importance (Optional[float]): Relative importance of the place. This is a measure
how likely the place will be searched for.
country_code (Optional[str]): Country the feature is in as
ISO 3166-1 alpha-2 country code.
address_rows (Optional[AddressLines]): List of places that make up the
computed address. `None` when `address_details` parameter was False.
linked_rows (Optional[AddressLines]): List of places that link to the object.
`None` when `linked_places` parameter was False.
parented_rows (Optional[AddressLines]): List of direct children of the place.
`None` when `parented_places` parameter was False.
name_keywords (Optional[WordInfos]): List of search words for the name of
the place. `None` when `keywords` parameter is set to False.
address_keywords (Optional[WordInfos]): List of search word for the address of
the place. `None` when `keywords` parameter is set to False.
bbox (Bbox): Bounding box of the full geometry of the place.
If the place is a single point, then the size of the bounding
box is guessed according to the type of place.
geometry (dict): Dictionary containing the full geometry of the place
in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(
self._async_api.search_address(amenity, street, city, county,
state, country, postalcode, **params))
def search_category(self, categories: List[Tuple[str, str]],
near_query: Optional[str] = None,
**params: Any) -> SearchResults:
""" Find an object of a certain category near another place.
The near place may either be given as an unstructured search
query in itself or as a geographic area through the
viewbox or near parameters.
Parameters:
categories: Restrict search to places of the given
categories. The category is the main OSM tag assigned to each
place.
near_query: Optional free-text query to define the are to
restrict search to.
Other parameters:
max_results (int): Maximum number of results to return. The
actual number of results may be less. (Default: 10)
min_rank (int): Lowest permissible rank for the result.
For addressable places this is the minimum
[address rank](../customize/Ranking.md#address-rank). For all
other places the [search rank](../customize/Ranking.md#search-rank)
is used.
max_rank (int): Highest permissible rank for the result. See min_rank above.
layers (enum): Defines the kind of data to take into account.
See [layers section](Input-Parameter-Types.md#layers) for details.
(Default: addresses and POIs)
countries (list[str]): Restrict search to countries with the given
ISO 3166-1 alpha-2 country code. An empty list (the default)
disables this filter.
excluded (list[int]): A list of internal IDs of places to exclude
from the search.
viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
bounded_viewbox (bool): Consider the bounding box given in `viewbox`
as a filter and return only results within the bounding box.
near (Optional[Point]): Focus search around the given point and
return results ordered by distance to the given point.
near_radius (Optional[float]): Restrict results to results within
the given distance in degrees of `near` point. Ignored, when
`near` is not set.
geometry_output (enum): Add the full geometry of the place to the result.
Multiple formats may be selected. Note that geometries can become
quite large. (Default: none)
geometry_simplification (float): Simplification factor to use on
the geometries before returning them. The factor expresses
the tolerance in degrees from which the geometry may differ.
Topology is preserved. (Default: 0.0)
address_details (bool): Add detailed information about the places
that make up the address of the requested object. (Default: False)
linked_places (bool): Add detailed information about the places
that link to the result. (Default: False)
parented_places (bool): Add detailed information about all places
for which the requested object is a parent, i.e. all places for
which the object provides the address details.
Only POI places can have parents. (Default: False)
keywords (bool): Add detailed information about the search terms
used for this place.
Returns:
source_table (enum): Data source of the place. See below for possible values.
category (tuple): A tuple of two strings with the primary OSM tag
and value.
centroid (Point): Point position of the place.
place_id (Optional[int]): Internal ID of the place. This ID may differ
for the same place between different installations.
osm_object (Optional[tuple]): OSM type and ID of the place, if available.
names (Optional[dict]): Dictionary of names of the place. Keys are
usually the corresponding OSM tag keys.
address (Optional[dict]): Dictionary of address parts directly
attributed to the place. Keys are usually the corresponding
OSM tag keys with the `addr:` prefix removed.
extratags (Optional[dict]): Dictionary of additional attributes for
the place. Usually OSM tag keys and values.
housenumber (Optional[str]): House number of the place, normalised
for lookup. To get the house number in its original spelling,
use `address['housenumber']`.
postcode (Optional[str]): Computed postcode for the place. To get
directly attributed postcodes, use `address['postcode']` instead.
wikipedia (Optional[str]): Reference to a wikipedia site for the place.
The string has the format <language code>:<wikipedia title>.
rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
importance (Optional[float]): Relative importance of the place. This is a measure
how likely the place will be searched for.
country_code (Optional[str]): Country the feature is in as
ISO 3166-1 alpha-2 country code.
address_rows (Optional[AddressLines]): List of places that make up the
computed address. `None` when `address_details` parameter was False.
linked_rows (Optional[AddressLines]): List of places that link to the object.
`None` when `linked_places` parameter was False.
parented_rows (Optional[AddressLines]): List of direct children of the place.
`None` when `parented_places` parameter was False.
name_keywords (Optional[WordInfos]): List of search words for the name of
the place. `None` when `keywords` parameter is set to False.
address_keywords (Optional[WordInfos]): List of search word for the address of
the place. `None` when `keywords` parameter is set to False.
bbox (Bbox): Bounding box of the full geometry of the place.
If the place is a single point, then the size of the bounding
box is guessed according to the type of place.
geometry (dict): Dictionary containing the full geometry of the place
in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(
self._async_api.search_category(categories, near_query, **params))

View File

@@ -1,97 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Helper functions for localizing names of results.
"""
from typing import Mapping, List, Optional
import re
class Locales:
""" Helper class for localization of names.
It takes a list of language prefixes in their order of preferred
usage.
"""
def __init__(self, langs: Optional[List[str]] = None):
self.languages = langs or []
self.name_tags: List[str] = []
# Build the list of supported tags. It is currently hard-coded.
self._add_lang_tags('name')
self._add_tags('name', 'brand')
self._add_lang_tags('official_name', 'short_name')
self._add_tags('official_name', 'short_name', 'ref')
def __bool__(self) -> bool:
return len(self.languages) > 0
def _add_tags(self, *tags: str) -> None:
for tag in tags:
self.name_tags.append(tag)
self.name_tags.append(f"_place_{tag}")
def _add_lang_tags(self, *tags: str) -> None:
for tag in tags:
for lang in self.languages:
self.name_tags.append(f"{tag}:{lang}")
self.name_tags.append(f"_place_{tag}:{lang}")
def display_name(self, names: Optional[Mapping[str, str]]) -> str:
""" Return the best matching name from a dictionary of names
containing different name variants.
If 'names' is null or empty, an empty string is returned. If no
appropriate localization is found, the first name is returned.
"""
if not names:
return ''
if len(names) > 1:
for tag in self.name_tags:
if tag in names:
return names[tag]
# Nothing? Return any of the other names as a default.
return next(iter(names.values()))
@staticmethod
def from_accept_languages(langstr: str) -> 'Locales':
""" Create a localization object from a language list in the
format of HTTP accept-languages header.
The functions tries to be forgiving of format errors by first splitting
the string into comma-separated parts and then parsing each
description separately. Badly formatted parts are then ignored.
"""
# split string into languages
candidates = []
for desc in langstr.split(','):
m = re.fullmatch(r'\s*([a-z_-]+)(?:;\s*q\s*=\s*([01](?:\.\d+)?))?\s*',
desc, flags=re.I)
if m:
candidates.append((m[1], float(m[2] or 1.0)))
# sort the results by the weight of each language (preserving order).
candidates.sort(reverse=True, key=lambda e: e[1])
# If a language has a region variant, also add the language without
# variant but only if it isn't already in the list to not mess up the weight.
languages = []
for lid, _ in candidates:
languages.append(lid)
parts = lid.split('-', 1)
if len(parts) > 1 and all(c[0] != parts[0] for c in candidates):
languages.append(parts[0])
return Locales(languages)

View File

@@ -1,433 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Functions for specialised logging with HTML output.
"""
from typing import Any, Iterator, Optional, List, Tuple, cast, Union, Mapping, Sequence
from contextvars import ContextVar
import datetime as dt
import textwrap
import io
import re
import html
import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection
try:
from pygments import highlight
from pygments.lexers import PythonLexer, PostgresLexer
from pygments.formatters import HtmlFormatter
CODE_HIGHLIGHT = True
except ModuleNotFoundError:
CODE_HIGHLIGHT = False
def _debug_name(res: Any) -> str:
if res.names:
return cast(str, res.names.get('name', next(iter(res.names.values()))))
return f"Hnr {res.housenumber}" if res.housenumber is not None else '[NONE]'
class BaseLogger:
""" Interface for logging function.
The base implementation does nothing. Overwrite the functions
in derived classes which implement logging functionality.
"""
def get_buffer(self) -> str:
""" Return the current content of the log buffer.
"""
return ''
def function(self, func: str, **kwargs: Any) -> None:
""" Start a new debug chapter for the given function and its parameters.
"""
def section(self, heading: str) -> None:
""" Start a new section with the given title.
"""
def comment(self, text: str) -> None:
""" Add a simple comment to the debug output.
"""
def var_dump(self, heading: str, var: Any) -> None:
""" Print the content of the variable to the debug output prefixed by
the given heading.
"""
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
""" Print the table generated by the generator function.
"""
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
""" Print a list of search results generated by the generator function.
"""
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
""" Print the SQL for the given statement.
"""
def format_sql(self, conn: AsyncConnection, statement: 'sa.Executable',
extra_params: Union[Mapping[str, Any],
Sequence[Mapping[str, Any]], None]) -> str:
""" Return the compiled version of the statement.
"""
compiled = cast('sa.ClauseElement', statement).compile(conn.sync_engine)
params = dict(compiled.params)
if isinstance(extra_params, Mapping):
for k, v in extra_params.items():
if hasattr(v, 'to_wkt'):
params[k] = v.to_wkt()
elif isinstance(v, (int, float)):
params[k] = v
else:
params[k] = str(v)
elif isinstance(extra_params, Sequence) and extra_params:
for k in extra_params[0]:
params[k] = f':{k}'
sqlstr = str(compiled)
if conn.dialect.name == 'postgresql':
if sa.__version__.startswith('1'):
try:
sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
return sqlstr % tuple((repr(params.get(name, None))
for name in compiled.positiontup)) # type: ignore
except TypeError:
return sqlstr
# Fixes an odd issue with Python 3.7 where percentages are not
# quoted correctly.
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
return sqlstr % params
assert conn.dialect.name == 'sqlite'
# params in positional order
pparams = (repr(params.get(name, None)) for name in compiled.positiontup) # type: ignore
sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', '?', sqlstr)
sqlstr = re.sub(r"\?", lambda m: next(pparams), sqlstr)
return sqlstr
class HTMLLogger(BaseLogger):
""" Logger that formats messages in HTML.
"""
def __init__(self) -> None:
self.buffer = io.StringIO()
def _timestamp(self) -> None:
self._write(f'<p class="timestamp">[{dt.datetime.now()}]</p>')
def get_buffer(self) -> str:
return HTML_HEADER + self.buffer.getvalue() + HTML_FOOTER
def function(self, func: str, **kwargs: Any) -> None:
self._timestamp()
self._write(f"<h1>Debug output for {func}()</h1>\n<p>Parameters:<dl>")
for name, value in kwargs.items():
self._write(f'<dt>{name}</dt><dd>{self._python_var(value)}</dd>')
self._write('</dl></p>')
def section(self, heading: str) -> None:
self._timestamp()
self._write(f"<h2>{heading}</h2>")
def comment(self, text: str) -> None:
self._timestamp()
self._write(f"<p>{text}</p>")
def var_dump(self, heading: str, var: Any) -> None:
self._timestamp()
if callable(var):
var = var()
self._write(f'<h5>{heading}</h5>{self._python_var(var)}')
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
self._timestamp()
head = next(rows)
assert head
self._write(f'<table><thead><tr><th colspan="{len(head)}">{heading}</th></tr><tr>')
for cell in head:
self._write(f'<th>{cell}</th>')
self._write('</tr></thead><tbody>')
for row in rows:
if row is not None:
self._write('<tr>')
for cell in row:
self._write(f'<td>{cell}</td>')
self._write('</tr>')
self._write('</tbody></table>')
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
""" Print a list of search results generated by the generator function.
"""
self._timestamp()
def format_osm(osm_object: Optional[Tuple[str, int]]) -> str:
if not osm_object:
return '-'
t, i = osm_object
if t == 'N':
fullt = 'node'
elif t == 'W':
fullt = 'way'
elif t == 'R':
fullt = 'relation'
else:
return f'{t}{i}'
return f'<a href="https://www.openstreetmap.org/{fullt}/{i}">{t}{i}</a>'
self._write(f'<h5>{heading}</h5><p><dl>')
total = 0
for rank, res in results:
self._write(f'<dt>[{rank:.3f}]</dt> <dd>{res.source_table.name}(')
self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
self._write(f"rank={res.rank_address}, ")
self._write(f"osm={format_osm(res.osm_object)}, ")
self._write(f'cc={res.country_code}, ')
self._write(f'importance={res.importance or float("nan"):.5f})</dd>')
total += 1
self._write(f'</dl><b>TOTAL:</b> {total}</p>')
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
self._timestamp()
sqlstr = self.format_sql(conn, statement, params)
if CODE_HIGHLIGHT:
sqlstr = highlight(sqlstr, PostgresLexer(),
HtmlFormatter(nowrap=True, lineseparator='<br />'))
self._write(f'<div class="highlight"><code class="lang-sql">{sqlstr}</code></div>')
else:
self._write(f'<code class="lang-sql">{html.escape(sqlstr)}</code>')
def _python_var(self, var: Any) -> str:
if CODE_HIGHLIGHT:
fmt = highlight(str(var), PythonLexer(), HtmlFormatter(nowrap=True))
return f'<div class="highlight"><code class="lang-python">{fmt}</code></div>'
return f'<code class="lang-python">{html.escape(str(var))}</code>'
def _write(self, text: str) -> None:
""" Add the raw text to the debug output.
"""
self.buffer.write(text)
class TextLogger(BaseLogger):
""" Logger creating output suitable for the console.
"""
def __init__(self) -> None:
self.buffer = io.StringIO()
def _timestamp(self) -> None:
self._write(f'[{dt.datetime.now()}]\n')
def get_buffer(self) -> str:
return self.buffer.getvalue()
def function(self, func: str, **kwargs: Any) -> None:
self._write(f"#### Debug output for {func}()\n\nParameters:\n")
for name, value in kwargs.items():
self._write(f' {name}: {self._python_var(value)}\n')
self._write('\n')
def section(self, heading: str) -> None:
self._timestamp()
self._write(f"\n# {heading}\n\n")
def comment(self, text: str) -> None:
self._write(f"{text}\n")
def var_dump(self, heading: str, var: Any) -> None:
if callable(var):
var = var()
self._write(f'{heading}:\n {self._python_var(var)}\n\n')
def table_dump(self, heading: str, rows: Iterator[Optional[List[Any]]]) -> None:
self._write(f'{heading}:\n')
data = [list(map(self._python_var, row)) if row else None for row in rows]
assert data[0] is not None
num_cols = len(data[0])
maxlens = [max(len(d[i]) for d in data if d) for i in range(num_cols)]
tablewidth = sum(maxlens) + 3 * num_cols + 1
row_format = '| ' +' | '.join(f'{{:<{l}}}' for l in maxlens) + ' |\n'
self._write('-'*tablewidth + '\n')
self._write(row_format.format(*data[0]))
self._write('-'*tablewidth + '\n')
for row in data[1:]:
if row:
self._write(row_format.format(*row))
else:
self._write('-'*tablewidth + '\n')
if data[-1]:
self._write('-'*tablewidth + '\n')
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
self._timestamp()
self._write(f'{heading}:\n')
total = 0
for rank, res in results:
self._write(f'[{rank:.3f}] {res.source_table.name}(')
self._write(f"{_debug_name(res)}, type=({','.join(res.category)}), ")
self._write(f"rank={res.rank_address}, ")
self._write(f"osm={''.join(map(str, res.osm_object or []))}, ")
self._write(f'cc={res.country_code}, ')
self._write(f'importance={res.importance or -1:.5f})\n')
total += 1
self._write(f'TOTAL: {total}\n\n')
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
self._timestamp()
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
self._write(f"| {sqlstr}\n\n")
def _python_var(self, var: Any) -> str:
return str(var)
def _write(self, text: str) -> None:
self.buffer.write(text)
logger: ContextVar[BaseLogger] = ContextVar('logger', default=BaseLogger())
def set_log_output(fmt: str) -> None:
""" Enable collecting debug information.
"""
if fmt == 'html':
logger.set(HTMLLogger())
elif fmt == 'text':
logger.set(TextLogger())
else:
logger.set(BaseLogger())
def log() -> BaseLogger:
""" Return the logger for the current context.
"""
return logger.get()
def get_and_disable() -> str:
""" Return the current content of the debug buffer and disable logging.
"""
buf = logger.get().get_buffer()
logger.set(BaseLogger())
return buf
HTML_HEADER: str = """<!DOCTYPE html>
<html>
<head>
<title>Nominatim - Debug</title>
<style>
""" + \
(HtmlFormatter(nobackground=True).get_style_defs('.highlight') if CODE_HIGHLIGHT else '') +\
"""
h2 { font-size: x-large }
dl {
padding-left: 10pt;
font-family: monospace
}
dt {
float: left;
font-weight: bold;
margin-right: 0.5em
}
dt::after { content: ": "; }
dd::after {
clear: left;
display: block
}
.lang-sql {
color: #555;
font-size: small
}
h5 {
border: solid lightgrey 0.1pt;
margin-bottom: 0;
background-color: #f7f7f7
}
h5 + .highlight {
padding: 3pt;
border: solid lightgrey 0.1pt
}
table, th, tbody {
border: thin solid;
border-collapse: collapse;
}
td {
border-right: thin solid;
padding-left: 3pt;
padding-right: 3pt;
}
.timestamp {
font-size: 0.8em;
color: darkblue;
width: calc(100% - 5pt);
text-align: right;
position: absolute;
left: 0;
margin-top: -5px;
}
</style>
</head>
<body>
"""
HTML_FOOTER: str = "</body></html>"

View File

@@ -1,251 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of place lookup by ID.
"""
from typing import Optional, Callable, Tuple, Type
import datetime as dt
import sqlalchemy as sa
from nominatim.typing import SaColumn, SaRow, SaSelect
from nominatim.api.connection import SearchConnection
import nominatim.api.types as ntyp
import nominatim.api.results as nres
from nominatim.api.logging import log
RowFunc = Callable[[Optional[SaRow], Type[nres.BaseResultT]], Optional[nres.BaseResultT]]
GeomFunc = Callable[[SaSelect, SaColumn], SaSelect]
async def find_in_placex(conn: SearchConnection, place: ntyp.PlaceRef,
add_geometries: GeomFunc) -> Optional[SaRow]:
""" Search for the given place in the placex table and return the
base information.
"""
log().section("Find in placex table")
t = conn.t.placex
sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.admin_level,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.importance, t.c.wikipedia, t.c.indexed_date,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
t.c.linked_place_id,
t.c.geometry.ST_Expand(0).label('bbox'),
t.c.centroid)
if isinstance(place, ntyp.PlaceID):
sql = sql.where(t.c.place_id == place.place_id)
elif isinstance(place, ntyp.OsmID):
sql = sql.where(t.c.osm_type == place.osm_type)\
.where(t.c.osm_id == place.osm_id)
if place.osm_class:
sql = sql.where(t.c.class_ == place.osm_class)
else:
sql = sql.order_by(t.c.class_)
sql = sql.limit(1)
else:
return None
return (await conn.execute(add_geometries(sql, t.c.geometry))).one_or_none()
async def find_in_osmline(conn: SearchConnection, place: ntyp.PlaceRef,
add_geometries: GeomFunc) -> Optional[SaRow]:
""" Search for the given place in the osmline table and return the
base information.
"""
log().section("Find in interpolation table")
t = conn.t.osmline
sql = sa.select(t.c.place_id, t.c.osm_id, t.c.parent_place_id,
t.c.indexed_date, t.c.startnumber, t.c.endnumber,
t.c.step, t.c.address, t.c.postcode, t.c.country_code,
t.c.linegeo.ST_Centroid().label('centroid'))
if isinstance(place, ntyp.PlaceID):
sql = sql.where(t.c.place_id == place.place_id)
elif isinstance(place, ntyp.OsmID) and place.osm_type == 'W':
# There may be multiple interpolations for a single way.
# If 'class' contains a number, return the one that belongs to that number.
sql = sql.where(t.c.osm_id == place.osm_id).limit(1)
if place.osm_class and place.osm_class.isdigit():
sql = sql.order_by(sa.func.greatest(0,
int(place.osm_class) - t.c.endnumber,
t.c.startnumber - int(place.osm_class)))
else:
return None
return (await conn.execute(add_geometries(sql, t.c.linegeo))).one_or_none()
async def find_in_tiger(conn: SearchConnection, place: ntyp.PlaceRef,
add_geometries: GeomFunc) -> Optional[SaRow]:
""" Search for the given place in the table of Tiger addresses and return
the base information. Only lookup by place ID is supported.
"""
if not isinstance(place, ntyp.PlaceID):
return None
log().section("Find in TIGER table")
t = conn.t.tiger
parent = conn.t.placex
sql = sa.select(t.c.place_id, t.c.parent_place_id,
parent.c.osm_type, parent.c.osm_id,
t.c.startnumber, t.c.endnumber, t.c.step,
t.c.postcode,
t.c.linegeo.ST_Centroid().label('centroid'))\
.where(t.c.place_id == place.place_id)\
.join(parent, t.c.parent_place_id == parent.c.place_id, isouter=True)
return (await conn.execute(add_geometries(sql, t.c.linegeo))).one_or_none()
async def find_in_postcode(conn: SearchConnection, place: ntyp.PlaceRef,
add_geometries: GeomFunc) -> Optional[SaRow]:
""" Search for the given place in the postcode table and return the
base information. Only lookup by place ID is supported.
"""
if not isinstance(place, ntyp.PlaceID):
return None
log().section("Find in postcode table")
t = conn.t.postcode
sql = sa.select(t.c.place_id, t.c.parent_place_id,
t.c.rank_search, t.c.rank_address,
t.c.indexed_date, t.c.postcode, t.c.country_code,
t.c.geometry.label('centroid')) \
.where(t.c.place_id == place.place_id)
return (await conn.execute(add_geometries(sql, t.c.geometry))).one_or_none()
async def find_in_all_tables(conn: SearchConnection, place: ntyp.PlaceRef,
add_geometries: GeomFunc
) -> Tuple[Optional[SaRow], RowFunc[nres.BaseResultT]]:
""" Search for the given place in all data tables
and return the base information.
"""
row = await find_in_placex(conn, place, add_geometries)
log().var_dump('Result (placex)', row)
if row is not None:
return row, nres.create_from_placex_row
row = await find_in_osmline(conn, place, add_geometries)
log().var_dump('Result (osmline)', row)
if row is not None:
return row, nres.create_from_osmline_row
row = await find_in_postcode(conn, place, add_geometries)
log().var_dump('Result (postcode)', row)
if row is not None:
return row, nres.create_from_postcode_row
row = await find_in_tiger(conn, place, add_geometries)
log().var_dump('Result (tiger)', row)
return row, nres.create_from_tiger_row
async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
details: ntyp.LookupDetails) -> Optional[nres.DetailedResult]:
""" Retrieve a place with additional details from the database.
"""
log().function('get_detailed_place', place=place, details=details)
if details.geometry_output and details.geometry_output != ntyp.GeometryFormat.GEOJSON:
raise ValueError("lookup only supports geojosn polygon output.")
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
return sql.add_columns(sa.func.ST_AsGeoJSON(
sa.case((sa.func.ST_NPoints(column) > 5000,
sa.func.ST_SimplifyPreserveTopology(column, 0.0001)),
else_=column), 7).label('geometry_geojson'))
else:
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
return sql.add_columns(sa.func.ST_GeometryType(column).label('geometry_type'))
row_func: RowFunc[nres.DetailedResult]
row, row_func = await find_in_all_tables(conn, place, _add_geometry)
if row is None:
return None
result = row_func(row, nres.DetailedResult)
assert result is not None
# add missing details
assert result is not None
if 'type' in result.geometry:
result.geometry['type'] = GEOMETRY_TYPE_MAP.get(result.geometry['type'],
result.geometry['type'])
indexed_date = getattr(row, 'indexed_date', None)
if indexed_date is not None:
result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
await nres.add_result_details(conn, [result], details)
return result
async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
details: ntyp.LookupDetails) -> Optional[nres.SearchResult]:
""" Retrieve a place as a simple search result from the database.
"""
log().function('get_simple_place', place=place, details=details)
def _add_geometry(sql: SaSelect, col: SaColumn) -> SaSelect:
if not details.geometry_output:
return sql
out = []
if details.geometry_simplification > 0.0:
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if details.geometry_output & ntyp.GeometryFormat.TEXT:
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if details.geometry_output & ntyp.GeometryFormat.KML:
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if details.geometry_output & ntyp.GeometryFormat.SVG:
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
row_func: RowFunc[nres.SearchResult]
row, row_func = await find_in_all_tables(conn, place, _add_geometry)
if row is None:
return None
result = row_func(row, nres.SearchResult)
assert result is not None
# add missing details
assert result is not None
if hasattr(row, 'bbox'):
result.bbox = ntyp.Bbox.from_wkb(row.bbox)
await nres.add_result_details(conn, [result], details)
return result
GEOMETRY_TYPE_MAP = {
'POINT': 'ST_Point',
'MULTIPOINT': 'ST_MultiPoint',
'LINESTRING': 'ST_LineString',
'MULTILINESTRING': 'ST_MultiLineString',
'POLYGON': 'ST_Polygon',
'MULTIPOLYGON': 'ST_MultiPolygon',
'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
}

View File

@@ -1,56 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Helper classes and functions for formatting results into API responses.
"""
from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping
from collections import defaultdict
T = TypeVar('T') # pylint: disable=invalid-name
FormatFunc = Callable[[T, Mapping[str, Any]], str]
class FormatDispatcher:
""" Helper class to conveniently create formatting functions in
a module using decorators.
"""
def __init__(self) -> None:
self.format_functions: Dict[Type[Any], Dict[str, FormatFunc[Any]]] = defaultdict(dict)
def format_func(self, result_class: Type[T],
fmt: str) -> Callable[[FormatFunc[T]], FormatFunc[T]]:
""" Decorator for a function that formats a given type of result into the
selected format.
"""
def decorator(func: FormatFunc[T]) -> FormatFunc[T]:
self.format_functions[result_class][fmt] = func
return func
return decorator
def list_formats(self, result_type: Type[Any]) -> List[str]:
""" Return a list of formats supported by this formatter.
"""
return list(self.format_functions[result_type].keys())
def supports_format(self, result_type: Type[Any], fmt: str) -> bool:
""" Check if the given format is supported by this formatter.
"""
return fmt in self.format_functions[result_type]
def format_result(self, result: Any, fmt: str, options: Mapping[str, Any]) -> str:
""" Convert the given result into a string using the given format.
The format is expected to be in the list returned by
`list_formats()`.
"""
return self.format_functions[type(result)][fmt](result, options)

View File

@@ -1,752 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Dataclasses for search results and helper functions to fill them.
Data classes are part of the public API while the functions are for
internal use only. That's why they are implemented as free-standing functions
instead of member functions.
"""
from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast, Callable
import enum
import dataclasses
import datetime as dt
import sqlalchemy as sa
from nominatim.typing import SaSelect, SaRow
from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.types import Point, Bbox, LookupDetails
from nominatim.api.connection import SearchConnection
from nominatim.api.logging import log
from nominatim.api.localization import Locales
# This file defines complex result data classes.
# pylint: disable=too-many-instance-attributes
def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str]]:
""" Mix-in names from linked places, so that they show up
as standard names where necessary.
"""
if not names:
return None
out = {}
for k, v in names.items():
if k.startswith('_place_'):
outkey = k[7:]
out[k if outkey in names else outkey] = v
else:
out[k] = v
return out
class SourceTable(enum.Enum):
""" The `SourceTable` type lists the possible sources a result can have.
"""
PLACEX = 1
""" The placex table is the main source for result usually containing
OSM data.
"""
OSMLINE = 2
""" The osmline table contains address interpolations from OSM data.
Interpolation addresses are always approximate. The OSM id in the
result refers to the OSM way with the interpolation line object.
"""
TIGER = 3
""" TIGER address data contains US addresses imported on the side,
see [Installing TIGER data](../customize/Tiger.md).
TIGER address are also interpolations. The addresses always refer
to a street from OSM data. The OSM id in the result refers to
that street.
"""
POSTCODE = 4
""" The postcode table contains artificial centroids for postcodes,
computed from the postcodes available with address points. Results
are always approximate.
"""
COUNTRY = 5
""" The country table provides a fallback, when country data is missing
in the OSM data.
"""
@dataclasses.dataclass
class AddressLine:
""" The `AddressLine` may contain the following fields about a related place
and its function as an address object. Most fields are optional.
Their presence depends on the kind and function of the address part.
"""
category: Tuple[str, str]
""" Main category of the place, described by a key-value pair.
"""
names: Dict[str, str]
""" All available names for the place including references, alternative
names and translations.
"""
fromarea: bool
""" If true, then the exact area of the place is known. Without area
information, Nominatim has to make an educated guess if an address
belongs to one place or another.
"""
isaddress: bool
""" If true, this place should be considered for the final address display.
Nominatim will sometimes include more than one candidate for
the address in the list when it cannot reliably determine where the
place belongs. It will consider names of all candidates when searching
but when displaying the result, only the most likely candidate should
be shown.
"""
rank_address: int
""" [Address rank](../customize/Ranking.md#address-rank) of the place.
"""
distance: float
""" Distance in degrees between the result place and this address part.
"""
place_id: Optional[int] = None
""" Internal ID of the place.
"""
osm_object: Optional[Tuple[str, int]] = None
""" OSM type and ID of the place, if such an object exists.
"""
extratags: Optional[Dict[str, str]] = None
""" Any extra information available about the place. This is a dictionary
that usually contains OSM tag key-value pairs.
"""
admin_level: Optional[int] = None
""" The administrative level of a boundary as tagged in the input data.
This field is only meaningful for places of the category
(boundary, administrative).
"""
local_name: Optional[str] = None
""" Place holder for localization of this address part. See
[Localization](#localization) below.
"""
class AddressLines(List[AddressLine]):
""" Sequence of address lines order in descending order by their rank.
"""
def localize(self, locales: Locales) -> List[str]:
""" Set the local name of address parts according to the chosen
locale. Return the list of local names without duplicates.
Only address parts that are marked as isaddress are localized
and returned.
"""
label_parts: List[str] = []
for line in self:
if line.isaddress and line.names:
line.local_name = locales.display_name(line.names)
if not label_parts or label_parts[-1] != line.local_name:
label_parts.append(line.local_name)
return label_parts
@dataclasses.dataclass
class WordInfo:
""" Each entry in the list of search terms contains the
following detailed information.
"""
word_id: int
""" Internal identifier for the word.
"""
word_token: str
""" Normalised and transliterated form of the word.
This form is used for searching.
"""
word: Optional[str] = None
""" Untransliterated form, if available.
"""
WordInfos = Sequence[WordInfo]
@dataclasses.dataclass
class BaseResult:
""" Data class collecting information common to all
types of search results.
"""
source_table: SourceTable
category: Tuple[str, str]
centroid: Point
place_id : Optional[int] = None
osm_object: Optional[Tuple[str, int]] = None
parent_place_id: Optional[int] = None
linked_place_id: Optional[int] = None
admin_level: int = 15
locale_name: Optional[str] = None
display_name: Optional[str] = None
names: Optional[Dict[str, str]] = None
address: Optional[Dict[str, str]] = None
extratags: Optional[Dict[str, str]] = None
housenumber: Optional[str] = None
postcode: Optional[str] = None
wikipedia: Optional[str] = None
rank_address: int = 30
rank_search: int = 30
importance: Optional[float] = None
country_code: Optional[str] = None
address_rows: Optional[AddressLines] = None
linked_rows: Optional[AddressLines] = None
parented_rows: Optional[AddressLines] = None
name_keywords: Optional[WordInfos] = None
address_keywords: Optional[WordInfos] = None
geometry: Dict[str, str] = dataclasses.field(default_factory=dict)
@property
def lat(self) -> float:
""" Get the latitude (or y) of the center point of the place.
"""
return self.centroid[1]
@property
def lon(self) -> float:
""" Get the longitude (or x) of the center point of the place.
"""
return self.centroid[0]
def calculated_importance(self) -> float:
""" Get a valid importance value. This is either the stored importance
of the value or an artificial value computed from the place's
search rank.
"""
return self.importance or (0.40001 - (self.rank_search/75.0))
def localize(self, locales: Locales) -> None:
""" Fill the locale_name and the display_name field for the
place and, if available, its address information.
"""
self.locale_name = locales.display_name(self.names)
if self.address_rows:
self.display_name = ', '.join(self.address_rows.localize(locales))
else:
self.display_name = self.locale_name
BaseResultT = TypeVar('BaseResultT', bound=BaseResult)
@dataclasses.dataclass
class DetailedResult(BaseResult):
""" A search result with more internal information from the database
added.
"""
indexed_date: Optional[dt.datetime] = None
@dataclasses.dataclass
class ReverseResult(BaseResult):
""" A search result for reverse geocoding.
"""
distance: Optional[float] = None
bbox: Optional[Bbox] = None
class ReverseResults(List[ReverseResult]):
""" Sequence of reverse lookup results ordered by distance.
May be empty when no result was found.
"""
@dataclasses.dataclass
class SearchResult(BaseResult):
""" A search result for forward geocoding.
"""
bbox: Optional[Bbox] = None
accuracy: float = 0.0
@property
def ranking(self) -> float:
""" Return the ranking, a combined measure of accuracy and importance.
"""
return (self.accuracy if self.accuracy is not None else 1) \
- self.calculated_importance()
class SearchResults(List[SearchResult]):
""" Sequence of forward lookup results ordered by relevance.
May be empty when no result was found.
"""
def _filter_geometries(row: SaRow) -> Dict[str, str]:
return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
if k.startswith('geometry_')}
def create_from_placex_row(row: Optional[SaRow],
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
""" Construct a new result and add the data from the result row
from the placex table. 'class_type' defines the type of result
to return. Returns None if the row is None.
"""
if row is None:
return None
return class_type(source_table=SourceTable.PLACEX,
place_id=row.place_id,
osm_object=(row.osm_type, row.osm_id),
category=(row.class_, row.type),
parent_place_id = row.parent_place_id,
linked_place_id = getattr(row, 'linked_place_id', None),
admin_level = getattr(row, 'admin_level', 15),
names=_mingle_name_tags(row.name),
address=row.address,
extratags=row.extratags,
housenumber=row.housenumber,
postcode=row.postcode,
wikipedia=row.wikipedia,
rank_address=row.rank_address,
rank_search=row.rank_search,
importance=row.importance,
country_code=row.country_code,
centroid=Point.from_wkb(row.centroid),
geometry=_filter_geometries(row))
def create_from_osmline_row(row: Optional[SaRow],
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
""" Construct a new result and add the data from the result row
from the address interpolation table osmline. 'class_type' defines
the type of result to return. Returns None if the row is None.
If the row contains a housenumber, then the housenumber is filled out.
Otherwise the result contains the interpolation information in extratags.
"""
if row is None:
return None
hnr = getattr(row, 'housenumber', None)
res = class_type(source_table=SourceTable.OSMLINE,
place_id=row.place_id,
parent_place_id = row.parent_place_id,
osm_object=('W', row.osm_id),
category=('place', 'houses' if hnr is None else 'house'),
address=row.address,
postcode=row.postcode,
country_code=row.country_code,
centroid=Point.from_wkb(row.centroid),
geometry=_filter_geometries(row))
if hnr is None:
res.extratags = {'startnumber': str(row.startnumber),
'endnumber': str(row.endnumber),
'step': str(row.step)}
else:
res.housenumber = str(hnr)
return res
def create_from_tiger_row(row: Optional[SaRow],
class_type: Type[BaseResultT],
osm_type: Optional[str] = None,
osm_id: Optional[int] = None) -> Optional[BaseResultT]:
""" Construct a new result and add the data from the result row
from the Tiger data interpolation table. 'class_type' defines
the type of result to return. Returns None if the row is None.
If the row contains a housenumber, then the housenumber is filled out.
Otherwise the result contains the interpolation information in extratags.
"""
if row is None:
return None
hnr = getattr(row, 'housenumber', None)
res = class_type(source_table=SourceTable.TIGER,
place_id=row.place_id,
parent_place_id = row.parent_place_id,
osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
category=('place', 'houses' if hnr is None else 'house'),
postcode=row.postcode,
country_code='us',
centroid=Point.from_wkb(row.centroid),
geometry=_filter_geometries(row))
if hnr is None:
res.extratags = {'startnumber': str(row.startnumber),
'endnumber': str(row.endnumber),
'step': str(row.step)}
else:
res.housenumber = str(hnr)
return res
def create_from_postcode_row(row: Optional[SaRow],
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
""" Construct a new result and add the data from the result row
from the postcode table. 'class_type' defines
the type of result to return. Returns None if the row is None.
"""
if row is None:
return None
return class_type(source_table=SourceTable.POSTCODE,
place_id=row.place_id,
parent_place_id = row.parent_place_id,
category=('place', 'postcode'),
names={'ref': row.postcode},
rank_search=row.rank_search,
rank_address=row.rank_address,
country_code=row.country_code,
centroid=Point.from_wkb(row.centroid),
geometry=_filter_geometries(row))
def create_from_country_row(row: Optional[SaRow],
class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
""" Construct a new result and add the data from the result row
from the fallback country tables. 'class_type' defines
the type of result to return. Returns None if the row is None.
"""
if row is None:
return None
return class_type(source_table=SourceTable.COUNTRY,
category=('place', 'country'),
centroid=Point.from_wkb(row.centroid),
names=row.name,
rank_address=4, rank_search=4,
country_code=row.country_code,
geometry=_filter_geometries(row))
async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
details: LookupDetails) -> None:
""" Retrieve more details from the database according to the
parameters specified in 'details'.
"""
if results:
log().section('Query details for result')
if details.address_details:
log().comment('Query address details')
await complete_address_details(conn, results)
if details.linked_places:
log().comment('Query linked places')
for result in results:
await complete_linked_places(conn, result)
if details.parented_places:
log().comment('Query parent places')
for result in results:
await complete_parented_places(conn, result)
if details.keywords:
log().comment('Query keywords')
for result in results:
await complete_keywords(conn, result)
for result in results:
result.localize(details.locales)
def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
""" Create a new AddressLine from the results of a database query.
"""
extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
if 'linked_place' in extratags:
extratags['place'] = extratags['linked_place']
names = _mingle_name_tags(row.name) or {}
if getattr(row, 'housenumber', None) is not None:
names['housenumber'] = row.housenumber
if isaddress is None:
isaddress = getattr(row, 'isaddress', True)
return AddressLine(place_id=row.place_id,
osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
category=(getattr(row, 'class'), row.type),
names=names,
extratags=extratags,
admin_level=row.admin_level,
fromarea=row.fromarea,
isaddress=isaddress,
rank_address=row.rank_address,
distance=row.distance)
def _get_address_lookup_id(result: BaseResultT) -> int:
assert result.place_id
if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
return result.parent_place_id or result.place_id
return result.linked_place_id or result.place_id
async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
assert result.address_rows is not None
if result.category[0] not in ('boundary', 'place')\
or result.category[1] not in ('postal_code', 'postcode'):
postcode = result.postcode
if not postcode and result.address:
postcode = result.address.get('postcode')
if postcode and ',' not in postcode and ';' not in postcode:
result.address_rows.append(AddressLine(
category=('place', 'postcode'),
names={'ref': postcode},
fromarea=False, isaddress=True, rank_address=5,
distance=0.0))
if result.country_code:
async def _get_country_names() -> Optional[Dict[str, str]]:
t = conn.t.country_name
sql = sa.select(t.c.name, t.c.derived_name)\
.where(t.c.country_code == result.country_code)
for cres in await conn.execute(sql):
names = cast(Dict[str, str], cres[0])
if cres[1]:
names.update(cast(Dict[str, str], cres[1]))
return names
return None
country_names = await conn.get_cached_value('COUNTRY_NAME',
result.country_code,
_get_country_names)
if country_names:
result.address_rows.append(AddressLine(
category=('place', 'country'),
names=country_names,
fromarea=False, isaddress=True, rank_address=4,
distance=0.0))
result.address_rows.append(AddressLine(
category=('place', 'country_code'),
names={'ref': result.country_code}, extratags = {},
fromarea=True, isaddress=False, rank_address=4,
distance=0.0))
def _setup_address_details(result: BaseResultT) -> None:
""" Retrieve information about places that make up the address of the result.
"""
result.address_rows = AddressLines()
if result.names:
result.address_rows.append(AddressLine(
place_id=result.place_id,
osm_object=result.osm_object,
category=result.category,
names=result.names,
extratags=result.extratags or {},
admin_level=result.admin_level,
fromarea=True, isaddress=True,
rank_address=result.rank_address, distance=0.0))
if result.source_table == SourceTable.PLACEX and result.address:
housenumber = result.address.get('housenumber')\
or result.address.get('streetnumber')\
or result.address.get('conscriptionnumber')
elif result.housenumber:
housenumber = result.housenumber
else:
housenumber = None
if housenumber:
result.address_rows.append(AddressLine(
category=('place', 'house_number'),
names={'ref': housenumber},
fromarea=True, isaddress=True, rank_address=28, distance=0))
if result.address and '_unlisted_place' in result.address:
result.address_rows.append(AddressLine(
category=('place', 'locality'),
names={'name': result.address['_unlisted_place']},
fromarea=False, isaddress=True, rank_address=25, distance=0))
async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
""" Retrieve information about places that make up the address of the result.
"""
for result in results:
_setup_address_details(result)
### Lookup entries from place_address line
lookup_ids = [{'pid': r.place_id,
'lid': _get_address_lookup_id(r),
'names': list(r.address.values()) if r.address else [],
'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
for r in results if r.place_id]
if not lookup_ids:
return
ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
.table_valued(sa.column('value', type_=sa.JSON))
t = conn.t.placex
taddr = conn.t.addressline
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.extratags,
t.c.admin_level, taddr.c.fromarea,
sa.case((t.c.rank_address == 11, 5),
else_=t.c.rank_address).label('rank_address'),
taddr.c.distance, t.c.country_code, t.c.postcode)\
.join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
.join(t, taddr.c.address_place_id == t.c.place_id)\
.order_by('src_place_id')\
.order_by(sa.column('rank_address').desc())\
.order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
.order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
(taddr.c.isaddress, 0),
(sa.and_(taddr.c.fromarea,
t.c.geometry.ST_Contains(
sa.func.ST_GeomFromEWKT(
ltab.c.value['c'].as_string()))), 1),
else_=-1).desc())\
.order_by(taddr.c.fromarea.desc())\
.order_by(taddr.c.distance.desc())\
.order_by(t.c.rank_search.desc())
current_result = None
current_rank_address = -1
for row in await conn.execute(sql):
if current_result is None or row.src_place_id != current_result.place_id:
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
assert current_result is not None
current_rank_address = -1
location_isaddress = row.rank_address != current_rank_address
if current_result.country_code is None and row.country_code:
current_result.country_code = row.country_code
if row.type in ('postcode', 'postal_code') and location_isaddress:
if not row.fromarea or \
(current_result.address and 'postcode' in current_result.address):
location_isaddress = False
else:
current_result.postcode = None
assert current_result.address_rows is not None
current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
current_rank_address = row.rank_address
for result in results:
await _finalize_entry(conn, result)
### Finally add the record for the parent entry where necessary.
parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
if parent_lookup_ids:
ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
.table_valued(sa.column('value', type_=sa.JSON))
sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type, t.c.extratags,
t.c.admin_level,
t.c.rank_address)\
.where(t.c.place_id == ltab.c.value['lid'].as_integer())
for row in await conn.execute(sql):
current_result = next((r for r in results if r.place_id == row.src_place_id), None)
assert current_result is not None
assert current_result.address_rows is not None
current_result.address_rows.append(AddressLine(
place_id=row.place_id,
osm_object=(row.osm_type, row.osm_id),
category=(row.class_, row.type),
names=row.name, extratags=row.extratags or {},
admin_level=row.admin_level,
fromarea=True, isaddress=True,
rank_address=row.rank_address, distance=0.0))
### Now sort everything
def mk_sort_key(place_id: Optional[int]) -> Callable[[AddressLine], Tuple[bool, int, bool]]:
return lambda a: (a.place_id != place_id, -a.rank_address, a.isaddress)
for result in results:
assert result.address_rows is not None
result.address_rows.sort(key=mk_sort_key(result.place_id))
def _placex_select_address_row(conn: SearchConnection,
centroid: Point) -> SaSelect:
t = conn.t.placex
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_.label('class'), t.c.type,
t.c.admin_level, t.c.housenumber,
t.c.geometry.is_area().label('fromarea'),
t.c.rank_address,
t.c.geometry.distance_spheroid(
sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
""" Retrieve information about places that link to the result.
"""
result.linked_rows = AddressLines()
if result.source_table != SourceTable.PLACEX:
return
sql = _placex_select_address_row(conn, result.centroid)\
.where(conn.t.placex.c.linked_place_id == result.place_id)
for row in await conn.execute(sql):
result.linked_rows.append(_result_row_to_address_row(row))
async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
""" Retrieve information about the search terms used for this place.
Requires that the query analyzer was initialised to get access to
the word table.
"""
t = conn.t.search_name
sql = sa.select(t.c.name_vector, t.c.nameaddress_vector)\
.where(t.c.place_id == result.place_id)
result.name_keywords = []
result.address_keywords = []
t = conn.t.meta.tables['word']
sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
for name_tokens, address_tokens in await conn.execute(sql):
for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
result.name_keywords.append(WordInfo(*row))
for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
result.address_keywords.append(WordInfo(*row))
async def complete_parented_places(conn: SearchConnection, result: BaseResult) -> None:
""" Retrieve information about places that the result provides the
address for.
"""
result.parented_rows = AddressLines()
if result.source_table != SourceTable.PLACEX:
return
sql = _placex_select_address_row(conn, result.centroid)\
.where(conn.t.placex.c.parent_place_id == result.place_id)\
.where(conn.t.placex.c.rank_search == 30)
for row in await conn.execute(sql):
result.parented_rows.append(_result_row_to_address_row(row))

View File

@@ -1,590 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of reverse geocoding.
"""
from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
import functools
import sqlalchemy as sa
from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
SaBind, SaLambdaSelect
from nominatim.api.connection import SearchConnection
import nominatim.api.results as nres
from nominatim.api.logging import log
from nominatim.api.types import AnyPoint, DataLayer, ReverseDetails, GeometryFormat, Bbox
from nominatim.db.sqlalchemy_types import Geometry
# In SQLAlchemy expression which compare with NULL need to be expressed with
# the equal sign.
# pylint: disable=singleton-comparison
RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.ReverseResult]]
WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
def no_index(expr: SaColumn) -> SaColumn:
""" Wrap the given expression, so that the query planner will
refrain from using the expression for index lookup.
"""
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
""" Create a select statement with the columns relevant for reverse
results.
"""
if not use_wkt:
distance = t.c.distance
centroid = t.c.centroid
else:
distance = t.c.geometry.ST_Distance(WKT_PARAM)
centroid = sa.case((t.c.geometry.is_line_like(), t.c.geometry.ST_ClosestPoint(WKT_PARAM)),
else_=t.c.centroid).label('centroid')
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.importance, t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
centroid,
t.c.linked_place_id, t.c.admin_level,
distance.label('distance'),
t.c.geometry.ST_Expand(0).label('bbox'))
def _interpolated_housenumber(table: SaFromClause) -> SaLabel:
return sa.cast(table.c.startnumber
+ sa.func.round(((table.c.endnumber - table.c.startnumber) * table.c.position)
/ table.c.step) * table.c.step,
sa.Integer).label('housenumber')
def _interpolated_position(table: SaFromClause) -> SaLabel:
fac = sa.cast(table.c.step, sa.Float) / (table.c.endnumber - table.c.startnumber)
rounded_pos = sa.func.round(table.c.position / fac) * fac
return sa.case(
(table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
else_=table.c.linegeo.ST_LineInterpolatePoint(rounded_pos)).label('centroid')
def _locate_interpolation(table: SaFromClause) -> SaLabel:
""" Given a position, locate the closest point on the line.
"""
return sa.case((table.c.linegeo.is_line_like(),
table.c.linegeo.ST_LineLocatePoint(WKT_PARAM)),
else_=0).label('position')
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
return min(rows, key=lambda row: 1000 if row is None else row.distance)
class ReverseGeocoder:
""" Class implementing the logic for looking up a place from a
coordinate.
"""
def __init__(self, conn: SearchConnection, params: ReverseDetails,
restrict_to_country_areas: bool = False) -> None:
self.conn = conn
self.params = params
self.restrict_to_country_areas = restrict_to_country_areas
self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
@property
def max_rank(self) -> int:
""" Return the maximum configured rank.
"""
return self.params.max_rank
def has_geometries(self) -> bool:
""" Check if any geometries are requested.
"""
return bool(self.params.geometry_output)
def layer_enabled(self, *layer: DataLayer) -> bool:
""" Return true when any of the given layer types are requested.
"""
return any(self.params.layers & l for l in layer)
def layer_disabled(self, *layer: DataLayer) -> bool:
""" Return true when none of the given layer types is requested.
"""
return not any(self.params.layers & l for l in layer)
def has_feature_layers(self) -> bool:
""" Return true if any layer other than ADDRESS or POI is requested.
"""
return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
out = []
if self.params.geometry_simplification > 0.0:
col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
if self.params.geometry_output & GeometryFormat.GEOJSON:
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if self.params.geometry_output & GeometryFormat.TEXT:
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if self.params.geometry_output & GeometryFormat.KML:
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if self.params.geometry_output & GeometryFormat.SVG:
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
def _filter_by_layer(self, table: SaFromClause) -> SaColumn:
if self.layer_enabled(DataLayer.MANMADE):
exclude = []
if self.layer_disabled(DataLayer.RAILWAY):
exclude.append('railway')
if self.layer_disabled(DataLayer.NATURAL):
exclude.extend(('natural', 'water', 'waterway'))
return table.c.class_.not_in(tuple(exclude))
include = []
if self.layer_enabled(DataLayer.RAILWAY):
include.append('railway')
if self.layer_enabled(DataLayer.NATURAL):
include.extend(('natural', 'water', 'waterway'))
return table.c.class_.in_(tuple(include))
async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
""" Look up the closest rank 26+ place in the database, which
is closer than the given distance.
"""
t = self.conn.t.placex
# PostgreSQL must not get the distance as a parameter because
# there is a danger it won't be able to properly estimate index use
# when used with prepared statements
diststr = sa.text(f"{distance}")
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
.where(t.c.geometry.within_distance(WKT_PARAM, diststr))
.where(t.c.indexed_status == 0)
.where(t.c.linked_place_id == None)
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
.order_by('distance')
.limit(1))
if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry)
restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
if self.layer_enabled(DataLayer.ADDRESS):
max_rank = min(29, self.max_rank)
restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
if self.max_rank == 30:
restrict.append(lambda: sa.func.IsAddressPoint(t))
if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
t.c.class_.not_in(('place', 'building')),
sa.not_(t.c.geometry.is_line_like())))
if self.has_feature_layers():
restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
no_index(t.c.rank_address) == 0,
self._filter_by_layer(t)))
if not restrict:
return None
sql = sql.where(sa.or_(*restrict))
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.placex
def _base_query() -> SaSelect:
return _select_from_placex(t)\
.where(t.c.geometry.within_distance(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.where(sa.func.IsAddressPoint(t))\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.order_by('distance')\
.limit(1)
sql: SaLambdaSelect
if self.has_geometries():
sql = self._add_geometry_columns(_base_query(), t.c.geometry)
else:
sql = sa.lambda_stmt(_base_query)
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
async def _find_interpolation_for_street(self, parent_place_id: Optional[int],
distance: float) -> Optional[SaRow]:
t = self.conn.t.osmline
sql = sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.within_distance(WKT_PARAM, distance))\
.where(t.c.startnumber != None)\
.order_by('distance')\
.limit(1)
if parent_place_id is not None:
sql = sql.where(t.c.parent_place_id == parent_place_id)
inner = sql.subquery('ipol')
sql = sa.select(inner.c.place_id, inner.c.osm_id,
inner.c.parent_place_id, inner.c.address,
_interpolated_housenumber(inner),
_interpolated_position(inner),
inner.c.postcode, inner.c.country_code,
inner.c.distance)
if self.has_geometries():
sub = sql.subquery('geom')
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.tiger
def _base_query() -> SaSelect:
inner = sa.select(t,
t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
_locate_interpolation(t))\
.where(t.c.linegeo.within_distance(WKT_PARAM, 0.001))\
.where(t.c.parent_place_id == parent_place_id)\
.order_by('distance')\
.limit(1)\
.subquery('tiger')
return sa.select(inner.c.place_id,
inner.c.parent_place_id,
_interpolated_housenumber(inner),
_interpolated_position(inner),
inner.c.postcode,
inner.c.distance)
sql: SaLambdaSelect
if self.has_geometries():
sub = _base_query().subquery('geom')
sql = self._add_geometry_columns(sa.select(sub), sub.c.centroid)
else:
sql = sa.lambda_stmt(_base_query)
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
async def lookup_street_poi(self) -> Tuple[Optional[SaRow], RowFunc]:
""" Find a street or POI/address for the given WKT point.
"""
log().section('Reverse lookup on street/address level')
distance = 0.006
parent_place_id = None
row = await self._find_closest_street_or_poi(distance)
row_func: RowFunc = nres.create_from_placex_row
log().var_dump('Result (street/building)', row)
# If the closest result was a street, but an address was requested,
# check for a housenumber nearby which is part of the street.
if row is not None:
if self.max_rank > 27 \
and self.layer_enabled(DataLayer.ADDRESS) \
and row.rank_address <= 27:
distance = 0.001
parent_place_id = row.place_id
log().comment('Find housenumber for street')
addr_row = await self._find_housenumber_for_street(parent_place_id)
log().var_dump('Result (street housenumber)', addr_row)
if addr_row is not None:
row = addr_row
row_func = nres.create_from_placex_row
distance = addr_row.distance
elif row.country_code == 'us' and parent_place_id is not None:
log().comment('Find TIGER housenumber for street')
addr_row = await self._find_tiger_number_for_street(parent_place_id)
log().var_dump('Result (street Tiger housenumber)', addr_row)
if addr_row is not None:
row_func = cast(RowFunc,
functools.partial(nres.create_from_tiger_row,
osm_type=row.osm_type,
osm_id=row.osm_id))
row = addr_row
else:
distance = row.distance
# Check for an interpolation that is either closer than our result
# or belongs to a close street found.
if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
log().comment('Find interpolation for street')
addr_row = await self._find_interpolation_for_street(parent_place_id,
distance)
log().var_dump('Result (street interpolation)', addr_row)
if addr_row is not None:
row = addr_row
row_func = nres.create_from_osmline_row
return row, row_func
async def _lookup_area_address(self) -> Optional[SaRow]:
""" Lookup large addressable areas for the given WKT point.
"""
log().comment('Reverse lookup by larger address area features')
t = self.conn.t.placex
def _base_query() -> SaSelect:
# The inner SQL brings results in the right order, so that
# later only a minimum of results needs to be checked with ST_Contains.
inner = sa.select(t, sa.literal(0.0).label('distance'))\
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
.where(t.c.geometry.intersects(WKT_PARAM))\
.where(sa.func.PlacexGeometryReverseLookuppolygon())\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('area')
return _select_from_placex(inner, False)\
.where(inner.c.geometry.ST_Contains(WKT_PARAM))\
.order_by(sa.desc(inner.c.rank_search))\
.limit(1)
sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
if self.has_geometries():
sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (area)', address_row)
if address_row is not None and address_row.rank_search < self.max_rank:
log().comment('Search for better matching place nodes inside the area')
address_rank = address_row.rank_search
address_id = address_row.place_id
def _place_inside_area_query() -> SaSelect:
inner = \
sa.select(t,
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.rank_search > address_rank)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
.where(t.c.indexed_status == 0)\
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('places')
touter = t.alias('outer')
return _select_from_placex(inner, False)\
.join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
.where(touter.c.place_id == address_id)\
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
if self.has_geometries():
sql = self._add_geometry_columns(_place_inside_area_query(),
sa.literal_column('places.geometry'))
else:
sql = sa.lambda_stmt(_place_inside_area_query)
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (place node)', place_address_row)
if place_address_row is not None:
return place_address_row
return address_row
async def _lookup_area_others(self) -> Optional[SaRow]:
t = self.conn.t.placex
inner = sa.select(t, t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.rank_address == 0)\
.where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
.where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.where(self._filter_by_layer(t))\
.where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
.order_by(sa.desc(t.c.rank_search))\
.order_by('distance')\
.limit(50)\
.subquery()
sql = _select_from_placex(inner, False)\
.where(sa.or_(sa.not_(inner.c.geometry.is_area()),
inner.c.geometry.ST_Contains(WKT_PARAM)))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
if self.has_geometries():
sql = self._add_geometry_columns(sql, inner.c.geometry)
row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (non-address feature)', row)
return row
async def lookup_area(self) -> Optional[SaRow]:
""" Lookup large areas for the current search.
"""
log().section('Reverse lookup by larger area features')
if self.layer_enabled(DataLayer.ADDRESS):
address_row = await self._lookup_area_address()
else:
address_row = None
if self.has_feature_layers():
other_row = await self._lookup_area_others()
else:
other_row = None
return _get_closest(address_row, other_row)
async def lookup_country_codes(self) -> List[str]:
""" Lookup the country for the current search.
"""
log().section('Reverse lookup by country code')
t = self.conn.t.country_grid
sql = sa.select(t.c.country_code).distinct()\
.where(t.c.geometry.ST_Contains(WKT_PARAM))
ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
log().var_dump('Country codes', ccodes)
return ccodes
async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
""" Lookup the country for the current search.
"""
if not ccodes:
ccodes = await self.lookup_country_codes()
if not ccodes:
return None
t = self.conn.t.placex
if self.max_rank > 4:
log().comment('Search for place nodes in country')
def _base_query() -> SaSelect:
inner = \
sa.select(t,
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
.where(t.c.rank_search > 4)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
.where(t.c.indexed_status == 0)\
.where(t.c.country_code.in_(ccodes))\
.where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('area')
return _select_from_placex(inner, False)\
.where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
sql: SaLambdaSelect
if self.has_geometries():
sql = self._add_geometry_columns(_base_query(),
sa.literal_column('area.geometry'))
else:
sql = sa.lambda_stmt(_base_query)
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (addressable place node)', address_row)
else:
address_row = None
if address_row is None:
# Still nothing, then return a country with the appropriate country code.
def _country_base_query() -> SaSelect:
return _select_from_placex(t)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)\
.where(t.c.rank_search == 4)\
.where(t.c.linked_place_id == None)\
.order_by('distance')\
.limit(1)
if self.has_geometries():
sql = self._add_geometry_columns(_country_base_query(), t.c.geometry)
else:
sql = sa.lambda_stmt(_country_base_query)
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
return address_row
async def lookup(self, coord: AnyPoint) -> Optional[nres.ReverseResult]:
""" Look up a single coordinate. Returns the place information,
if a place was found near the coordinates or None otherwise.
"""
log().function('reverse_lookup', coord=coord, params=self.params)
self.bind_params['wkt'] = f'POINT({coord[0]} {coord[1]})'
row: Optional[SaRow] = None
row_func: RowFunc = nres.create_from_placex_row
if self.max_rank >= 26:
row, tmp_row_func = await self.lookup_street_poi()
if row is not None:
row_func = tmp_row_func
if row is None:
if self.restrict_to_country_areas:
ccodes = await self.lookup_country_codes()
if not ccodes:
return None
else:
ccodes = []
if self.max_rank > 4:
row = await self.lookup_area()
if row is None and self.layer_enabled(DataLayer.ADDRESS):
row = await self.lookup_country(ccodes)
result = row_func(row, nres.ReverseResult)
if result is not None:
assert row is not None
result.distance = row.distance
if hasattr(row, 'bbox'):
result.bbox = Bbox.from_wkb(row.bbox)
await nres.add_result_details(self.conn, [result], self.params)
return result

View File

@@ -1,15 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Module for forward search.
"""
# pylint: disable=useless-import-alias
from .geocoder import (ForwardGeocoder as ForwardGeocoder)
from .query import (Phrase as Phrase,
PhraseType as PhraseType)
from .query_analyzer_factory import (make_query_analyzer as make_query_analyzer)

View File

@@ -1,411 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Conversion from token assignment to an abstract DB search.
"""
from typing import Optional, List, Tuple, Iterator, Dict
import heapq
from nominatim.api.types import SearchDetails, DataLayer
from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange, BreakType
from nominatim.api.search.token_assignment import TokenAssignment
import nominatim.api.search.db_search_fields as dbf
import nominatim.api.search.db_searches as dbs
import nominatim.api.search.db_search_lookups as lookups
def wrap_near_search(categories: List[Tuple[str, str]],
search: dbs.AbstractSearch) -> dbs.NearSearch:
""" Create a new search that wraps the given search in a search
for near places of the given category.
"""
return dbs.NearSearch(penalty=search.penalty,
categories=dbf.WeightedCategories(categories,
[0.0] * len(categories)),
search=search)
def build_poi_search(category: List[Tuple[str, str]],
countries: Optional[List[str]]) -> dbs.PoiSearch:
""" Create a new search for places by the given category, possibly
constraint to the given countries.
"""
if countries:
ccs = dbf.WeightedStrings(countries, [0.0] * len(countries))
else:
ccs = dbf.WeightedStrings([], [])
class _PoiData(dbf.SearchData):
penalty = 0.0
qualifiers = dbf.WeightedCategories(category, [0.0] * len(category))
countries=ccs
return dbs.PoiSearch(_PoiData())
class SearchBuilder:
""" Build the abstract search queries from token assignments.
"""
def __init__(self, query: QueryStruct, details: SearchDetails) -> None:
self.query = query
self.details = details
@property
def configured_for_country(self) -> bool:
""" Return true if the search details are configured to
allow countries in the result.
"""
return self.details.min_rank <= 4 and self.details.max_rank >= 4 \
and self.details.layer_enabled(DataLayer.ADDRESS)
@property
def configured_for_postcode(self) -> bool:
""" Return true if the search details are configured to
allow postcodes in the result.
"""
return self.details.min_rank <= 5 and self.details.max_rank >= 11\
and self.details.layer_enabled(DataLayer.ADDRESS)
@property
def configured_for_housenumbers(self) -> bool:
""" Return true if the search details are configured to
allow addresses in the result.
"""
return self.details.max_rank >= 30 \
and self.details.layer_enabled(DataLayer.ADDRESS)
def build(self, assignment: TokenAssignment) -> Iterator[dbs.AbstractSearch]:
""" Yield all possible abstract searches for the given token assignment.
"""
sdata = self.get_search_data(assignment)
if sdata is None:
return
near_items = self.get_near_items(assignment)
if near_items is not None and not near_items:
return # impossible compbination of near items and category parameter
if assignment.name is None:
if near_items and not sdata.postcodes:
sdata.qualifiers = near_items
near_items = None
builder = self.build_poi_search(sdata)
elif assignment.housenumber:
hnr_tokens = self.query.get_tokens(assignment.housenumber,
TokenType.HOUSENUMBER)
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
else:
builder = self.build_special_search(sdata, assignment.address,
bool(near_items))
else:
builder = self.build_name_search(sdata, assignment.name, assignment.address,
bool(near_items))
if near_items:
penalty = min(near_items.penalties)
near_items.penalties = [p - penalty for p in near_items.penalties]
for search in builder:
search_penalty = search.penalty
search.penalty = 0.0
yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
near_items, search)
else:
for search in builder:
search.penalty += assignment.penalty
yield search
def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
""" Build abstract search query for a simple category search.
This kind of search requires an additional geographic constraint.
"""
if not sdata.housenumbers \
and ((self.details.viewbox and self.details.bounded_viewbox) or self.details.near):
yield dbs.PoiSearch(sdata)
def build_special_search(self, sdata: dbf.SearchData,
address: List[TokenRange],
is_category: bool) -> Iterator[dbs.AbstractSearch]:
""" Build abstract search queries for searches that do not involve
a named place.
"""
if sdata.qualifiers:
# No special searches over qualifiers supported.
return
if sdata.countries and not address and not sdata.postcodes \
and self.configured_for_country:
yield dbs.CountrySearch(sdata)
if sdata.postcodes and (is_category or self.configured_for_postcode):
penalty = 0.0 if sdata.countries else 0.1
if address:
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
[t.token for r in address
for t in self.query.get_partials_list(r)],
lookups.Restrict)]
penalty += 0.2
yield dbs.PostcodeSearch(penalty, sdata)
def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],
address: List[TokenRange]) -> Iterator[dbs.AbstractSearch]:
""" Build a simple address search for special entries where the
housenumber is the main name token.
"""
sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], lookups.LookupAny)]
expected_count = sum(t.count for t in hnrs)
partials = {t.token: t.count for trange in address
for t in self.query.get_partials_list(trange)}
if expected_count < 8000:
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
list(partials), lookups.Restrict))
elif len(partials) != 1 or list(partials.values())[0] < 10000:
sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
list(partials), lookups.LookupAll))
else:
addr_fulls = [t.token for t
in self.query.get_tokens(address[0], TokenType.WORD)]
if len(addr_fulls) > 5:
return
sdata.lookups.append(
dbf.FieldLookup('nameaddress_vector', addr_fulls, lookups.LookupAny))
sdata.housenumbers = dbf.WeightedStrings([], [])
yield dbs.PlaceSearch(0.05, sdata, expected_count)
def build_name_search(self, sdata: dbf.SearchData,
name: TokenRange, address: List[TokenRange],
is_category: bool) -> Iterator[dbs.AbstractSearch]:
""" Build abstract search queries for simple name or address searches.
"""
if is_category or not sdata.housenumbers or self.configured_for_housenumbers:
ranking = self.get_name_ranking(name)
name_penalty = ranking.normalize_penalty()
if ranking.rankings:
sdata.rankings.append(ranking)
for penalty, count, lookup in self.yield_lookups(name, address):
sdata.lookups = lookup
yield dbs.PlaceSearch(penalty + name_penalty, sdata, count)
def yield_lookups(self, name: TokenRange, address: List[TokenRange])\
-> Iterator[Tuple[float, int, List[dbf.FieldLookup]]]:
""" Yield all variants how the given name and address should best
be searched for. This takes into account how frequent the terms
are and tries to find a lookup that optimizes index use.
"""
penalty = 0.0 # extra penalty
name_partials = {t.token: t for t in self.query.get_partials_list(name)}
addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
addr_tokens = list({t.token for t in addr_partials})
partials_indexed = all(t.is_indexed for t in name_partials.values()) \
and all(t.is_indexed for t in addr_partials)
exp_count = min(t.count for t in name_partials.values()) / (2**(len(name_partials) - 1))
if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
yield penalty, exp_count, dbf.lookup_by_names(list(name_partials.keys()), addr_tokens)
return
# Partial term to frequent. Try looking up by rare full names first.
name_fulls = self.query.get_tokens(name, TokenType.WORD)
if name_fulls:
fulls_count = sum(t.count for t in name_fulls)
# At this point drop unindexed partials from the address.
# This might yield wrong results, nothing we can do about that.
if not partials_indexed:
addr_tokens = [t.token for t in addr_partials if t.is_indexed]
penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
# Any of the full names applies with all of the partials from the address
yield penalty, fulls_count / (2**len(addr_tokens)),\
dbf.lookup_by_any_name([t.token for t in name_fulls],
addr_tokens,
fulls_count > 30000 / max(1, len(addr_tokens)))
# To catch remaining results, lookup by name and address
# We only do this if there is a reasonable number of results expected.
exp_count = exp_count / (2**len(addr_tokens)) if addr_tokens else exp_count
if exp_count < 10000 and all(t.is_indexed for t in name_partials.values()):
lookup = [dbf.FieldLookup('name_vector', list(name_partials.keys()), lookups.LookupAll)]
if addr_tokens:
lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll))
penalty += 0.35 * max(1 if name_fulls else 0.1,
5 - len(name_partials) - len(addr_tokens))
yield penalty, exp_count, lookup
def get_name_ranking(self, trange: TokenRange,
db_field: str = 'name_vector') -> dbf.FieldRanking:
""" Create a ranking expression for a name term in the given range.
"""
name_fulls = self.query.get_tokens(trange, TokenType.WORD)
ranks = [dbf.RankedTokens(t.penalty, [t.token]) for t in name_fulls]
ranks.sort(key=lambda r: r.penalty)
# Fallback, sum of penalty for partials
name_partials = self.query.get_partials_list(trange)
default = sum(t.penalty for t in name_partials) + 0.2
return dbf.FieldRanking(db_field, default, ranks)
def get_addr_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
""" Create a list of ranking expressions for an address term
for the given ranges.
"""
todo: List[Tuple[int, int, dbf.RankedTokens]] = []
heapq.heappush(todo, (0, trange.start, dbf.RankedTokens(0.0, [])))
ranks: List[dbf.RankedTokens] = []
while todo: # pylint: disable=too-many-nested-blocks
neglen, pos, rank = heapq.heappop(todo)
for tlist in self.query.nodes[pos].starting:
if tlist.ttype in (TokenType.PARTIAL, TokenType.WORD):
if tlist.end < trange.end:
chgpenalty = PENALTY_WORDCHANGE[self.query.nodes[tlist.end].btype]
if tlist.ttype == TokenType.PARTIAL:
penalty = rank.penalty + chgpenalty \
+ max(t.penalty for t in tlist.tokens)
heapq.heappush(todo, (neglen - 1, tlist.end,
dbf.RankedTokens(penalty, rank.tokens)))
else:
for t in tlist.tokens:
heapq.heappush(todo, (neglen - 1, tlist.end,
rank.with_token(t, chgpenalty)))
elif tlist.end == trange.end:
if tlist.ttype == TokenType.PARTIAL:
ranks.append(dbf.RankedTokens(rank.penalty
+ max(t.penalty for t in tlist.tokens),
rank.tokens))
else:
ranks.extend(rank.with_token(t, 0.0) for t in tlist.tokens)
if len(ranks) >= 10:
# Too many variants, bail out and only add
# Worst-case Fallback: sum of penalty of partials
name_partials = self.query.get_partials_list(trange)
default = sum(t.penalty for t in name_partials) + 0.2
ranks.append(dbf.RankedTokens(rank.penalty + default, []))
# Bail out of outer loop
todo.clear()
break
ranks.sort(key=lambda r: len(r.tokens))
default = ranks[0].penalty + 0.3
del ranks[0]
ranks.sort(key=lambda r: r.penalty)
return dbf.FieldRanking('nameaddress_vector', default, ranks)
def get_search_data(self, assignment: TokenAssignment) -> Optional[dbf.SearchData]:
""" Collect the tokens for the non-name search fields in the
assignment.
"""
sdata = dbf.SearchData()
sdata.penalty = assignment.penalty
if assignment.country:
tokens = self.get_country_tokens(assignment.country)
if not tokens:
return None
sdata.set_strings('countries', tokens)
elif self.details.countries:
sdata.countries = dbf.WeightedStrings(self.details.countries,
[0.0] * len(self.details.countries))
if assignment.housenumber:
sdata.set_strings('housenumbers',
self.query.get_tokens(assignment.housenumber,
TokenType.HOUSENUMBER))
if assignment.postcode:
sdata.set_strings('postcodes',
self.query.get_tokens(assignment.postcode,
TokenType.POSTCODE))
if assignment.qualifier:
tokens = self.get_qualifier_tokens(assignment.qualifier)
if not tokens:
return None
sdata.set_qualifiers(tokens)
elif self.details.categories:
sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
[0.0] * len(self.details.categories))
if assignment.address:
if not assignment.name and assignment.housenumber:
# housenumber search: the first item needs to be handled like
# a name in ranking or penalties are not comparable with
# normal searches.
sdata.set_ranking([self.get_name_ranking(assignment.address[0],
db_field='nameaddress_vector')]
+ [self.get_addr_ranking(r) for r in assignment.address[1:]])
else:
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
else:
sdata.rankings = []
return sdata
def get_country_tokens(self, trange: TokenRange) -> List[Token]:
""" Return the list of country tokens for the given range,
optionally filtered by the country list from the details
parameters.
"""
tokens = self.query.get_tokens(trange, TokenType.COUNTRY)
if self.details.countries:
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
return tokens
def get_qualifier_tokens(self, trange: TokenRange) -> List[Token]:
""" Return the list of qualifier tokens for the given range,
optionally filtered by the qualifier list from the details
parameters.
"""
tokens = self.query.get_tokens(trange, TokenType.QUALIFIER)
if self.details.categories:
tokens = [t for t in tokens if t.get_category() in self.details.categories]
return tokens
def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
""" Collect tokens for near items search or use the categories
requested per parameter.
Returns None if no category search is requested.
"""
if assignment.near_item:
tokens: Dict[Tuple[str, str], float] = {}
for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
cat = t.get_category()
# The category of a near search will be that of near_item.
# Thus, if search is restricted to a category parameter,
# the two sets must intersect.
if (not self.details.categories or cat in self.details.categories)\
and t.penalty < tokens.get(cat, 1000.0):
tokens[cat] = t.penalty
return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
return None
PENALTY_WORDCHANGE = {
BreakType.START: 0.0,
BreakType.END: 0.0,
BreakType.PHRASE: 0.0,
BreakType.WORD: 0.1,
BreakType.PART: 0.2,
BreakType.TOKEN: 0.4
}

View File

@@ -1,253 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Data structures for more complex fields in abstract search descriptions.
"""
from typing import List, Tuple, Iterator, Dict, Type
import dataclasses
import sqlalchemy as sa
from nominatim.typing import SaFromClause, SaColumn, SaExpression
from nominatim.api.search.query import Token
import nominatim.api.search.db_search_lookups as lookups
from nominatim.utils.json_writer import JsonWriter
@dataclasses.dataclass
class WeightedStrings:
""" A list of strings together with a penalty.
"""
values: List[str]
penalties: List[float]
def __bool__(self) -> bool:
return bool(self.values)
def __iter__(self) -> Iterator[Tuple[str, float]]:
return iter(zip(self.values, self.penalties))
def get_penalty(self, value: str, default: float = 1000.0) -> float:
""" Get the penalty for the given value. Returns the given default
if the value does not exist.
"""
try:
return self.penalties[self.values.index(value)]
except ValueError:
pass
return default
@dataclasses.dataclass
class WeightedCategories:
""" A list of class/type tuples together with a penalty.
"""
values: List[Tuple[str, str]]
penalties: List[float]
def __bool__(self) -> bool:
return bool(self.values)
def __iter__(self) -> Iterator[Tuple[Tuple[str, str], float]]:
return iter(zip(self.values, self.penalties))
def get_penalty(self, value: Tuple[str, str], default: float = 1000.0) -> float:
""" Get the penalty for the given value. Returns the given default
if the value does not exist.
"""
try:
return self.penalties[self.values.index(value)]
except ValueError:
pass
return default
def sql_restrict(self, table: SaFromClause) -> SaExpression:
""" Return an SQLAlcheny expression that restricts the
class and type columns of the given table to the values
in the list.
Must not be used with an empty list.
"""
assert self.values
if len(self.values) == 1:
return sa.and_(table.c.class_ == self.values[0][0],
table.c.type == self.values[0][1])
return sa.or_(*(sa.and_(table.c.class_ == c, table.c.type == t)
for c, t in self.values))
@dataclasses.dataclass(order=True)
class RankedTokens:
""" List of tokens together with the penalty of using it.
"""
penalty: float
tokens: List[int]
def with_token(self, t: Token, transition_penalty: float) -> 'RankedTokens':
""" Create a new RankedTokens list with the given token appended.
The tokens penalty as well as the given transition penalty
are added to the overall penalty.
"""
return RankedTokens(self.penalty + t.penalty + transition_penalty,
self.tokens + [t.token])
@dataclasses.dataclass
class FieldRanking:
""" A list of rankings to be applied sequentially until one matches.
The matched ranking determines the penalty. If none matches a
default penalty is applied.
"""
column: str
default: float
rankings: List[RankedTokens]
def normalize_penalty(self) -> float:
""" Reduce the default and ranking penalties, such that the minimum
penalty is 0. Return the penalty that was subtracted.
"""
if self.rankings:
min_penalty = min(self.default, min(r.penalty for r in self.rankings))
else:
min_penalty = self.default
if min_penalty > 0.0:
self.default -= min_penalty
for ranking in self.rankings:
ranking.penalty -= min_penalty
return min_penalty
def sql_penalty(self, table: SaFromClause) -> SaColumn:
""" Create an SQL expression for the rankings.
"""
assert self.rankings
rout = JsonWriter().start_array()
for rank in self.rankings:
rout.start_array().value(rank.penalty).next()
rout.start_array()
for token in rank.tokens:
rout.value(token).next()
rout.end_array()
rout.end_array().next()
rout.end_array()
return sa.func.weigh_search(table.c[self.column], rout(), self.default)
@dataclasses.dataclass
class FieldLookup:
""" A list of tokens to be searched for. The column names the database
column to search in and the lookup_type the operator that is applied.
'lookup_all' requires all tokens to match. 'lookup_any' requires
one of the tokens to match. 'restrict' requires to match all tokens
but avoids the use of indexes.
"""
column: str
tokens: List[int]
lookup_type: Type[lookups.LookupType]
def sql_condition(self, table: SaFromClause) -> SaColumn:
""" Create an SQL expression for the given match condition.
"""
return self.lookup_type(table, self.column, self.tokens)
class SearchData:
""" Search fields derived from query and token assignment
to be used with the SQL queries.
"""
penalty: float
lookups: List[FieldLookup] = []
rankings: List[FieldRanking]
housenumbers: WeightedStrings = WeightedStrings([], [])
postcodes: WeightedStrings = WeightedStrings([], [])
countries: WeightedStrings = WeightedStrings([], [])
qualifiers: WeightedCategories = WeightedCategories([], [])
def set_strings(self, field: str, tokens: List[Token]) -> None:
""" Set on of the WeightedStrings properties from the given
token list. Adapt the global penalty, so that the
minimum penalty is 0.
"""
if tokens:
min_penalty = min(t.penalty for t in tokens)
self.penalty += min_penalty
wstrs = WeightedStrings([t.lookup_word for t in tokens],
[t.penalty - min_penalty for t in tokens])
setattr(self, field, wstrs)
def set_qualifiers(self, tokens: List[Token]) -> None:
""" Set the qulaifier field from the given tokens.
"""
if tokens:
categories: Dict[Tuple[str, str], float] = {}
min_penalty = 1000.0
for t in tokens:
min_penalty = min(min_penalty, t.penalty)
cat = t.get_category()
if t.penalty < categories.get(cat, 1000.0):
categories[cat] = t.penalty
self.penalty += min_penalty
self.qualifiers = WeightedCategories(list(categories.keys()),
list(categories.values()))
def set_ranking(self, rankings: List[FieldRanking]) -> None:
""" Set the list of rankings and normalize the ranking.
"""
self.rankings = []
for ranking in rankings:
if ranking.rankings:
self.penalty += ranking.normalize_penalty()
self.rankings.append(ranking)
else:
self.penalty += ranking.default
def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
""" Create a lookup list where name tokens are looked up via index
and potential address tokens are used to restrict the search further.
"""
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAll)]
if addr_tokens:
lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookups.Restrict))
return lookup
def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
use_index_for_addr: bool) -> List[FieldLookup]:
""" Create a lookup list where name tokens are looked up via index
and only one of the name tokens must be present.
Potential address tokens are used to restrict the search further.
"""
lookup = [FieldLookup('name_vector', name_tokens, lookups.LookupAny)]
if addr_tokens:
lookup.append(FieldLookup('nameaddress_vector', addr_tokens,
lookups.LookupAll if use_index_for_addr else lookups.Restrict))
return lookup
def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
""" Create a lookup list where address tokens are looked up via index
and the name tokens are only used to restrict the search further.
"""
return [FieldLookup('name_vector', name_tokens, lookups.Restrict),
FieldLookup('nameaddress_vector', addr_tokens, lookups.LookupAll)]

View File

@@ -1,114 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of lookup functions for the search_name table.
"""
from typing import List, Any
import sqlalchemy as sa
from sqlalchemy.ext.compiler import compiles
from nominatim.typing import SaFromClause
from nominatim.db.sqlalchemy_types import IntArray
# pylint: disable=consider-using-f-string
LookupType = sa.sql.expression.FunctionElement[Any]
class LookupAll(LookupType):
""" Find all entries in search_name table that contain all of
a given list of tokens using an index for the search.
"""
inherit_cache = True
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
super().__init__(table.c.place_id, getattr(table.c, column), column,
sa.type_coerce(tokens, IntArray))
@compiles(LookupAll) # type: ignore[no-untyped-call, misc]
def _default_lookup_all(element: LookupAll,
compiler: 'sa.Compiled', **kw: Any) -> str:
_, col, _, tokens = list(element.clauses)
return "(%s @> %s)" % (compiler.process(col, **kw),
compiler.process(tokens, **kw))
@compiles(LookupAll, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_lookup_all(element: LookupAll,
compiler: 'sa.Compiled', **kw: Any) -> str:
place, col, colname, tokens = list(element.clauses)
return "(%s IN (SELECT CAST(value as bigint) FROM"\
" (SELECT array_intersect_fuzzy(places) as p FROM"\
" (SELECT places FROM reverse_search_name"\
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
" AND column = %s"\
" ORDER BY length(places)) as x) as u,"\
" json_each('[' || u.p || ']'))"\
" AND array_contains(%s, %s))"\
% (compiler.process(place, **kw),
compiler.process(tokens, **kw),
compiler.process(colname, **kw),
compiler.process(col, **kw),
compiler.process(tokens, **kw)
)
class LookupAny(LookupType):
""" Find all entries that contain at least one of the given tokens.
Use an index for the search.
"""
inherit_cache = True
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
super().__init__(table.c.place_id, getattr(table.c, column), column,
sa.type_coerce(tokens, IntArray))
@compiles(LookupAny) # type: ignore[no-untyped-call, misc]
def _default_lookup_any(element: LookupAny,
compiler: 'sa.Compiled', **kw: Any) -> str:
_, col, _, tokens = list(element.clauses)
return "(%s && %s)" % (compiler.process(col, **kw),
compiler.process(tokens, **kw))
@compiles(LookupAny, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_lookup_any(element: LookupAny,
compiler: 'sa.Compiled', **kw: Any) -> str:
place, _, colname, tokens = list(element.clauses)
return "%s IN (SELECT CAST(value as bigint) FROM"\
" (SELECT array_union(places) as p FROM reverse_search_name"\
" WHERE word IN (SELECT value FROM json_each('[' || %s || ']'))"\
" AND column = %s) as u,"\
" json_each('[' || u.p || ']'))" % (compiler.process(place, **kw),
compiler.process(tokens, **kw),
compiler.process(colname, **kw))
class Restrict(LookupType):
""" Find all entries that contain all of the given tokens.
Do not use an index for the search.
"""
inherit_cache = True
def __init__(self, table: SaFromClause, column: str, tokens: List[int]) -> None:
super().__init__(getattr(table.c, column),
sa.type_coerce(tokens, IntArray))
@compiles(Restrict) # type: ignore[no-untyped-call, misc]
def _default_restrict(element: Restrict,
compiler: 'sa.Compiled', **kw: Any) -> str:
arg1, arg2 = list(element.clauses)
return "(coalesce(null, %s) @> %s)" % (compiler.process(arg1, **kw),
compiler.process(arg2, **kw))
@compiles(Restrict, 'sqlite') # type: ignore[no-untyped-call, misc]
def _sqlite_restrict(element: Restrict,
compiler: 'sa.Compiled', **kw: Any) -> str:
return "array_contains(%s)" % compiler.process(element.clauses, **kw)

View File

@@ -1,816 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of the actual database accesses for forward search.
"""
from typing import List, Tuple, AsyncIterator, Dict, Any, Callable, cast
import abc
import sqlalchemy as sa
from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
from nominatim.api.connection import SearchConnection
from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
import nominatim.api.results as nres
from nominatim.api.search.db_search_fields import SearchData, WeightedCategories
from nominatim.db.sqlalchemy_types import Geometry, IntArray
#pylint: disable=singleton-comparison,not-callable
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
def no_index(expr: SaColumn) -> SaColumn:
""" Wrap the given expression, so that the query planner will
refrain from using the expression for index lookup.
"""
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
""" Create a dictionary from search parameters that can be used
as bind parameter for SQL execute.
"""
return {'limit': details.max_results,
'min_rank': details.min_rank,
'max_rank': details.max_rank,
'viewbox': details.viewbox,
'viewbox2': details.viewbox_x2,
'near': details.near,
'near_radius': details.near_radius,
'excluded': details.excluded,
'countries': details.countries}
LIMIT_PARAM: SaBind = sa.bindparam('limit')
MIN_RANK_PARAM: SaBind = sa.bindparam('min_rank')
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
def filter_by_area(sql: SaSelect, t: SaFromClause,
details: SearchDetails, avoid_index: bool = False) -> SaSelect:
""" Apply SQL statements for filtering by viewbox and near point,
if applicable.
"""
if details.near is not None and details.near_radius is not None:
if details.near_radius < 0.1 and not avoid_index:
sql = sql.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM))
else:
sql = sql.where(t.c.geometry.ST_Distance(NEAR_PARAM) <= NEAR_RADIUS_PARAM)
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM,
use_index=not avoid_index and
details.viewbox.area < 0.2))
return sql
def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
def _select_placex(t: SaFromClause) -> SaSelect:
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
t.c.linked_place_id, t.c.admin_level,
t.c.centroid,
t.c.geometry.ST_Expand(0).label('bbox'))
def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
out = []
if details.geometry_simplification > 0.0:
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
if details.geometry_output & GeometryFormat.GEOJSON:
out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if details.geometry_output & GeometryFormat.TEXT:
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if details.geometry_output & GeometryFormat.KML:
out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if details.geometry_output & GeometryFormat.SVG:
out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
numerals: List[int], details: SearchDetails) -> SaScalarSelect:
all_ids = sa.func.ArrayAgg(table.c.place_id)
sql = sa.select(all_ids).where(table.c.parent_place_id == inner.c.place_id)
if len(numerals) == 1:
sql = sql.where(sa.between(numerals[0], table.c.startnumber, table.c.endnumber))\
.where((numerals[0] - table.c.startnumber) % table.c.step == 0)
else:
sql = sql.where(sa.or_(
*(sa.and_(sa.between(n, table.c.startnumber, table.c.endnumber),
(n - table.c.startnumber) % table.c.step == 0)
for n in numerals)))
if details.excluded:
sql = sql.where(_exclude_places(table))
return sql.scalar_subquery()
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
orexpr: List[SaExpression] = []
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
orexpr.append(no_index(table.c.rank_address).between(1, 30))
elif layers & DataLayer.ADDRESS:
orexpr.append(no_index(table.c.rank_address).between(1, 29))
orexpr.append(sa.func.IsAddressPoint(table))
elif layers & DataLayer.POI:
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
table.c.class_.not_in(('place', 'building'))))
if layers & DataLayer.MANMADE:
exclude = []
if not layers & DataLayer.RAILWAY:
exclude.append('railway')
if not layers & DataLayer.NATURAL:
exclude.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
no_index(table.c.rank_address) == 0))
else:
include = []
if layers & DataLayer.RAILWAY:
include.append('railway')
if layers & DataLayer.NATURAL:
include.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
no_index(table.c.rank_address) == 0))
if len(orexpr) == 1:
return orexpr[0]
return sa.or_(*orexpr)
def _interpolated_position(table: SaFromClause, nr: SaColumn) -> SaColumn:
pos = sa.cast(nr - table.c.startnumber, sa.Float) / (table.c.endnumber - table.c.startnumber)
return sa.case(
(table.c.endnumber == table.c.startnumber, table.c.linegeo.ST_Centroid()),
else_=table.c.linegeo.ST_LineInterpolatePoint(pos)).label('centroid')
async def _get_placex_housenumbers(conn: SearchConnection,
place_ids: List[int],
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
t = conn.t.placex
sql = _select_placex(t).add_columns(t.c.importance)\
.where(t.c.place_id.in_(place_ids))
if details.geometry_output:
sql = _add_geometry_columns(sql, t.c.geometry, details)
for row in await conn.execute(sql):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.bbox = Bbox.from_wkb(row.bbox)
yield result
def _int_list_to_subquery(inp: List[int]) -> 'sa.Subquery':
""" Create a subselect that returns the given list of integers
as rows in the column 'nr'.
"""
vtab = sa.func.JsonArrayEach(sa.type_coerce(inp, sa.JSON))\
.table_valued(sa.column('value', type_=sa.JSON))
return sa.select(sa.cast(sa.cast(vtab.c.value, sa.Text), sa.Integer).label('nr')).subquery()
async def _get_osmline(conn: SearchConnection, place_ids: List[int],
numerals: List[int],
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
t = conn.t.osmline
values = _int_list_to_subquery(numerals)
sql = sa.select(t.c.place_id, t.c.osm_id,
t.c.parent_place_id, t.c.address,
values.c.nr.label('housenumber'),
_interpolated_position(t, values.c.nr),
t.c.postcode, t.c.country_code)\
.where(t.c.place_id.in_(place_ids))\
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
if details.geometry_output:
sub = sql.subquery()
sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
for row in await conn.execute(sql):
result = nres.create_from_osmline_row(row, nres.SearchResult)
assert result
yield result
async def _get_tiger(conn: SearchConnection, place_ids: List[int],
numerals: List[int], osm_id: int,
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
t = conn.t.tiger
values = _int_list_to_subquery(numerals)
sql = sa.select(t.c.place_id, t.c.parent_place_id,
sa.literal('W').label('osm_type'),
sa.literal(osm_id).label('osm_id'),
values.c.nr.label('housenumber'),
_interpolated_position(t, values.c.nr),
t.c.postcode)\
.where(t.c.place_id.in_(place_ids))\
.join(values, values.c.nr.between(t.c.startnumber, t.c.endnumber))
if details.geometry_output:
sub = sql.subquery()
sql = _add_geometry_columns(sa.select(sub), sub.c.centroid, details)
for row in await conn.execute(sql):
result = nres.create_from_tiger_row(row, nres.SearchResult)
assert result
yield result
class AbstractSearch(abc.ABC):
""" Encapuslation of a single lookup in the database.
"""
SEARCH_PRIO: int = 2
def __init__(self, penalty: float) -> None:
self.penalty = penalty
@abc.abstractmethod
async def lookup(self, conn: SearchConnection,
details: SearchDetails) -> nres.SearchResults:
""" Find results for the search in the database.
"""
class NearSearch(AbstractSearch):
""" Category search of a place type near the result of another search.
"""
def __init__(self, penalty: float, categories: WeightedCategories,
search: AbstractSearch) -> None:
super().__init__(penalty)
self.search = search
self.categories = categories
async def lookup(self, conn: SearchConnection,
details: SearchDetails) -> nres.SearchResults:
""" Find results for the search in the database.
"""
results = nres.SearchResults()
base = await self.search.lookup(conn, details)
if not base:
return results
base.sort(key=lambda r: (r.accuracy, r.rank_search))
max_accuracy = base[0].accuracy + 0.5
if base[0].rank_address == 0:
min_rank = 0
max_rank = 0
elif base[0].rank_address < 26:
min_rank = 1
max_rank = min(25, base[0].rank_address + 4)
else:
min_rank = 26
max_rank = 30
base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
and r.accuracy <= max_accuracy
and r.bbox and r.bbox.area < 20
and r.rank_address >= min_rank
and r.rank_address <= max_rank)
if base:
baseids = [b.place_id for b in base[:5] if b.place_id]
for category, penalty in self.categories:
await self.lookup_category(results, conn, baseids, category, penalty, details)
if len(results) >= details.max_results:
break
return results
async def lookup_category(self, results: nres.SearchResults,
conn: SearchConnection, ids: List[int],
category: Tuple[str, str], penalty: float,
details: SearchDetails) -> None:
""" Find places of the given category near the list of
place ids and add the results to 'results'.
"""
table = await conn.get_class_table(*category)
tgeom = conn.t.placex.alias('pgeom')
if table is None:
# No classtype table available, do a simplified lookup in placex.
table = conn.t.placex
sql = sa.select(table.c.place_id,
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
.label('dist'))\
.join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
.where(table.c.class_ == category[0])\
.where(table.c.type == category[1])
else:
# Use classtype table. We can afford to use a larger
# radius for the lookup.
sql = sa.select(table.c.place_id,
sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
.label('dist'))\
.join(tgeom,
table.c.centroid.ST_CoveredBy(
sa.case((sa.and_(tgeom.c.rank_address > 9,
tgeom.c.geometry.is_area()),
tgeom.c.geometry),
else_ = tgeom.c.centroid.ST_Expand(0.05))))
inner = sql.where(tgeom.c.place_id.in_(ids))\
.group_by(table.c.place_id).subquery()
t = conn.t.placex
sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
.join(inner, inner.c.place_id == t.c.place_id)\
.order_by(inner.c.dist)
sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
if details.countries:
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
if details.excluded:
sql = sql.where(_exclude_places(t))
if details.layers is not None:
sql = sql.where(_filter_by_layer(t, details.layers))
sql = sql.limit(LIMIT_PARAM)
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + penalty
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
class PoiSearch(AbstractSearch):
""" Category search in a geographic area.
"""
def __init__(self, sdata: SearchData) -> None:
super().__init__(sdata.penalty)
self.qualifiers = sdata.qualifiers
self.countries = sdata.countries
async def lookup(self, conn: SearchConnection,
details: SearchDetails) -> nres.SearchResults:
""" Find results for the search in the database.
"""
bind_params = _details_to_bind_params(details)
t = conn.t.placex
rows: List[SaRow] = []
if details.near and details.near_radius is not None and details.near_radius < 0.2:
# simply search in placex table
def _base_query() -> SaSelect:
return _select_placex(t) \
.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance'))\
.where(t.c.linked_place_id == None) \
.where(t.c.geometry.within_distance(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
.order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
.limit(LIMIT_PARAM)
classtype = self.qualifiers.values
if len(classtype) == 1:
cclass, ctype = classtype[0]
sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
.where(t.c.class_ == cclass)
.where(t.c.type == ctype))
else:
sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
for cls, typ in classtype)))
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
rows.extend(await conn.execute(sql, bind_params))
else:
# use the class type tables
for category in self.qualifiers.values:
table = await conn.get_class_table(*category)
if table is not None:
sql = _select_placex(t)\
.add_columns(t.c.importance)\
.join(table, t.c.place_id == table.c.place_id)\
.where(t.c.class_ == category[0])\
.where(t.c.type == category[1])
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(table.c.centroid.intersects(VIEWBOX_PARAM))
if details.near and details.near_radius is not None:
sql = sql.order_by(table.c.centroid.ST_Distance(NEAR_PARAM))\
.where(table.c.centroid.within_distance(NEAR_PARAM,
NEAR_RADIUS_PARAM))
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
sql = sql.limit(LIMIT_PARAM)
rows.extend(await conn.execute(sql, bind_params))
results = nres.SearchResults()
for row in rows:
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
return results
class CountrySearch(AbstractSearch):
""" Search for a country name or country code.
"""
SEARCH_PRIO = 0
def __init__(self, sdata: SearchData) -> None:
super().__init__(sdata.penalty)
self.countries = sdata.countries
async def lookup(self, conn: SearchConnection,
details: SearchDetails) -> nres.SearchResults:
""" Find results for the search in the database.
"""
t = conn.t.placex
ccodes = self.countries.values
sql = _select_placex(t)\
.add_columns(t.c.importance)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)
if details.geometry_output:
sql = _add_geometry_columns(sql, t.c.geometry, details)
if details.excluded:
sql = sql.where(_exclude_places(t))
sql = filter_by_area(sql, t, details)
results = nres.SearchResults()
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
if not results:
results = await self.lookup_in_country_table(conn, details)
if results:
details.min_rank = min(5, details.max_rank)
details.max_rank = min(25, details.max_rank)
return results
async def lookup_in_country_table(self, conn: SearchConnection,
details: SearchDetails) -> nres.SearchResults:
""" Look up the country in the fallback country tables.
"""
# Avoid the fallback search when this is a more search. Country results
# usually are in the first batch of results and it is not possible
# to exclude these fallbacks.
if details.excluded:
return nres.SearchResults()
t = conn.t.country_name
tgrid = conn.t.country_grid
sql = sa.select(tgrid.c.country_code,
tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
.label('centroid'),
tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
.where(tgrid.c.country_code.in_(self.countries.values))\
.group_by(tgrid.c.country_code)
sql = filter_by_area(sql, tgrid, details, avoid_index=True)
sub = sql.subquery('grid')
sql = sa.select(t.c.country_code,
t.c.name.merge(t.c.derived_name).label('name'),
sub.c.centroid, sub.c.bbox)\
.join(sub, t.c.country_code == sub.c.country_code)
if details.geometry_output:
sql = _add_geometry_columns(sql, sub.c.centroid, details)
results = nres.SearchResults()
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_country_row(row, nres.SearchResult)
assert result
result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
results.append(result)
return results
class PostcodeSearch(AbstractSearch):
""" Search for a postcode.
"""
def __init__(self, extra_penalty: float, sdata: SearchData) -> None:
super().__init__(sdata.penalty + extra_penalty)
self.countries = sdata.countries
self.postcodes = sdata.postcodes
self.lookups = sdata.lookups
self.rankings = sdata.rankings
async def lookup(self, conn: SearchConnection,
details: SearchDetails) -> nres.SearchResults:
""" Find results for the search in the database.
"""
t = conn.t.postcode
pcs = self.postcodes.values
sql = sa.select(t.c.place_id, t.c.parent_place_id,
t.c.rank_search, t.c.rank_address,
t.c.postcode, t.c.country_code,
t.c.geometry.label('centroid'))\
.where(t.c.postcode.in_(pcs))
if details.geometry_output:
sql = _add_geometry_columns(sql, t.c.geometry, details)
penalty: SaExpression = sa.literal(self.penalty)
if details.viewbox is not None and not details.bounded_viewbox:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
else_=1.0)
if details.near is not None:
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
sql = filter_by_area(sql, t, details)
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
if details.excluded:
sql = sql.where(_exclude_places(t))
if self.lookups:
assert len(self.lookups) == 1
tsearch = conn.t.search_name
sql = sql.where(tsearch.c.place_id == t.c.parent_place_id)\
.where((tsearch.c.name_vector + tsearch.c.nameaddress_vector)
.contains(sa.type_coerce(self.lookups[0].tokens,
IntArray)))
for ranking in self.rankings:
penalty += ranking.sql_penalty(conn.t.search_name)
penalty += sa.case(*((t.c.postcode == v, p) for v, p in self.postcodes),
else_=1.0)
sql = sql.add_columns(penalty.label('accuracy'))
sql = sql.order_by('accuracy').limit(LIMIT_PARAM)
results = nres.SearchResults()
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_postcode_row(row, nres.SearchResult)
assert result
result.accuracy = row.accuracy
results.append(result)
return results
class PlaceSearch(AbstractSearch):
""" Generic search for an address or named place.
"""
SEARCH_PRIO = 1
def __init__(self, extra_penalty: float, sdata: SearchData, expected_count: int) -> None:
super().__init__(sdata.penalty + extra_penalty)
self.countries = sdata.countries
self.postcodes = sdata.postcodes
self.housenumbers = sdata.housenumbers
self.qualifiers = sdata.qualifiers
self.lookups = sdata.lookups
self.rankings = sdata.rankings
self.expected_count = expected_count
async def lookup(self, conn: SearchConnection,
details: SearchDetails) -> nres.SearchResults:
""" Find results for the search in the database.
"""
t = conn.t.placex
tsearch = conn.t.search_name
sql: SaLambdaSelect = sa.lambda_stmt(lambda:
_select_placex(t).where(t.c.place_id == tsearch.c.place_id))
if details.geometry_output:
sql = _add_geometry_columns(sql, t.c.geometry, details)
penalty: SaExpression = sa.literal(self.penalty)
for ranking in self.rankings:
penalty += ranking.sql_penalty(tsearch)
for lookup in self.lookups:
sql = sql.where(lookup.sql_condition(tsearch))
if self.countries:
sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
if self.postcodes:
# if a postcode is given, don't search for state or country level objects
sql = sql.where(tsearch.c.address_rank > 9)
tpc = conn.t.postcode
pcs = self.postcodes.values
if self.expected_count > 5000:
# Many results expected. Restrict by postcode.
sql = sql.where(sa.select(tpc.c.postcode)
.where(tpc.c.postcode.in_(pcs))
.where(tsearch.c.centroid.within_distance(tpc.c.geometry, 0.12))
.exists())
# Less results, only have a preference for close postcodes
pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
.where(tpc.c.postcode.in_(pcs))\
.scalar_subquery()
penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
else_=sa.func.coalesce(pc_near, cast(SaColumn, 2.0)))
if details.viewbox is not None:
if details.bounded_viewbox:
sql = sql.where(tsearch.c.centroid
.intersects(VIEWBOX_PARAM,
use_index=details.viewbox.area < 0.2))
elif not self.postcodes and not self.housenumbers and self.expected_count >= 10000:
sql = sql.where(tsearch.c.centroid
.intersects(VIEWBOX2_PARAM,
use_index=details.viewbox.area < 0.5))
else:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
else_=1.0)
if details.near is not None:
if details.near_radius is not None:
if details.near_radius < 0.1:
sql = sql.where(tsearch.c.centroid.within_distance(NEAR_PARAM,
NEAR_RADIUS_PARAM))
else:
sql = sql.where(tsearch.c.centroid
.ST_Distance(NEAR_PARAM) < NEAR_RADIUS_PARAM)
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance'))
sql = sql.order_by(sa.desc(sa.text('importance')))
else:
if self.expected_count < 10000\
or (details.viewbox is not None and details.viewbox.area < 0.5):
sql = sql.order_by(
penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
else_=0.40001-(sa.cast(tsearch.c.search_rank, sa.Float())/75)))
sql = sql.add_columns(t.c.importance)
sql = sql.add_columns(penalty.label('accuracy'))
if self.expected_count < 10000:
sql = sql.order_by(sa.text('accuracy'))
if self.housenumbers:
hnr_list = '|'.join(self.housenumbers.values)
sql = sql.where(tsearch.c.address_rank.between(16, 30))\
.where(sa.or_(tsearch.c.address_rank < 30,
sa.func.RegexpWord(hnr_list, t.c.housenumber)))
# Cross check for housenumbers, need to do that on a rather large
# set. Worst case there are 40.000 main streets in OSM.
inner = sql.limit(10000).subquery()
# Housenumbers from placex
thnr = conn.t.placex.alias('hnr')
pid_list = sa.func.ArrayAgg(thnr.c.place_id)
place_sql = sa.select(pid_list)\
.where(thnr.c.parent_place_id == inner.c.place_id)\
.where(sa.func.RegexpWord(hnr_list, thnr.c.housenumber))\
.where(thnr.c.linked_place_id == None)\
.where(thnr.c.indexed_status == 0)
if details.excluded:
place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
if self.qualifiers:
place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
numerals = [int(n) for n in self.housenumbers.values
if n.isdigit() and len(n) < 8]
interpol_sql: SaColumn
tiger_sql: SaColumn
if numerals and \
(not self.qualifiers or ('place', 'house') in self.qualifiers.values):
# Housenumbers from interpolations
interpol_sql = _make_interpolation_subquery(conn.t.osmline, inner,
numerals, details)
# Housenumbers from Tiger
tiger_sql = sa.case((inner.c.country_code == 'us',
_make_interpolation_subquery(conn.t.tiger, inner,
numerals, details)
), else_=None)
else:
interpol_sql = sa.null()
tiger_sql = sa.null()
unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
interpol_sql.label('interpol_hnr'),
tiger_sql.label('tiger_hnr')).subquery('unsort')
sql = sa.select(unsort)\
.order_by(sa.case((unsort.c.placex_hnr != None, 1),
(unsort.c.interpol_hnr != None, 2),
(unsort.c.tiger_hnr != None, 3),
else_=4),
unsort.c.accuracy)
else:
sql = sql.where(t.c.linked_place_id == None)\
.where(t.c.indexed_status == 0)
if self.qualifiers:
sql = sql.where(self.qualifiers.sql_restrict(t))
if details.excluded:
sql = sql.where(_exclude_places(tsearch))
if details.min_rank > 0:
sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
tsearch.c.search_rank >= MIN_RANK_PARAM))
if details.max_rank < 30:
sql = sql.where(sa.or_(tsearch.c.address_rank <= MAX_RANK_PARAM,
tsearch.c.search_rank <= MAX_RANK_PARAM))
if details.layers is not None:
sql = sql.where(_filter_by_layer(t, details.layers))
sql = sql.limit(LIMIT_PARAM)
results = nres.SearchResults()
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = row.accuracy
if self.housenumbers and row.rank_address < 30:
if row.placex_hnr:
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
elif row.interpol_hnr:
subs = _get_osmline(conn, row.interpol_hnr, numerals, details)
elif row.tiger_hnr:
subs = _get_tiger(conn, row.tiger_hnr, numerals, row.osm_id, details)
else:
subs = None
if subs is not None:
async for sub in subs:
assert sub.housenumber
sub.accuracy = result.accuracy
if not any(nr in self.housenumbers.values
for nr in sub.housenumber.split(';')):
sub.accuracy += 0.6
results.append(sub)
# Only add the street as a result, if it meets all other
# filter conditions.
if (not details.excluded or result.place_id not in details.excluded)\
and (not self.qualifiers or result.category in self.qualifiers.values)\
and result.rank_address >= details.min_rank:
result.accuracy += 1.0 # penalty for missing housenumber
results.append(result)
else:
results.append(result)
return results

View File

@@ -1,274 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Public interface to the search code.
"""
from typing import List, Any, Optional, Iterator, Tuple, Dict
import itertools
import re
import datetime as dt
import difflib
from nominatim.api.connection import SearchConnection
from nominatim.api.types import SearchDetails
from nominatim.api.results import SearchResult, SearchResults, add_result_details
from nominatim.api.search.token_assignment import yield_token_assignments
from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
from nominatim.api.search.db_searches import AbstractSearch
from nominatim.api.search.query_analyzer_factory import make_query_analyzer, AbstractQueryAnalyzer
from nominatim.api.search.query import Phrase, QueryStruct
from nominatim.api.logging import log
class ForwardGeocoder:
""" Main class responsible for place search.
"""
def __init__(self, conn: SearchConnection,
params: SearchDetails, timeout: Optional[int]) -> None:
self.conn = conn
self.params = params
self.timeout = dt.timedelta(seconds=timeout or 1000000)
self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
@property
def limit(self) -> int:
""" Return the configured maximum number of search results.
"""
return self.params.max_results
async def build_searches(self,
phrases: List[Phrase]) -> Tuple[QueryStruct, List[AbstractSearch]]:
""" Analyse the query and return the tokenized query and list of
possible searches over it.
"""
if self.query_analyzer is None:
self.query_analyzer = await make_query_analyzer(self.conn)
query = await self.query_analyzer.analyze_query(phrases)
searches: List[AbstractSearch] = []
if query.num_token_slots() > 0:
# 2. Compute all possible search interpretations
log().section('Compute abstract searches')
search_builder = SearchBuilder(query, self.params)
num_searches = 0
for assignment in yield_token_assignments(query):
searches.extend(search_builder.build(assignment))
if num_searches < len(searches):
log().table_dump('Searches for assignment',
_dump_searches(searches, query, num_searches))
num_searches = len(searches)
searches.sort(key=lambda s: (s.penalty, s.SEARCH_PRIO))
return query, searches
async def execute_searches(self, query: QueryStruct,
searches: List[AbstractSearch]) -> SearchResults:
""" Run the abstract searches against the database until a result
is found.
"""
log().section('Execute database searches')
results: Dict[Any, SearchResult] = {}
end_time = dt.datetime.now() + self.timeout
min_ranking = searches[0].penalty + 2.0
prev_penalty = 0.0
for i, search in enumerate(searches):
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
break
log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
log().var_dump('Params', self.params)
lookup_results = await search.lookup(self.conn, self.params)
for result in lookup_results:
rhash = (result.source_table, result.place_id,
result.housenumber, result.country_code)
prevresult = results.get(rhash)
if prevresult:
prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
else:
results[rhash] = result
min_ranking = min(min_ranking, result.accuracy * 1.2)
log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
prev_penalty = search.penalty
if dt.datetime.now() >= end_time:
break
return SearchResults(results.values())
def pre_filter_results(self, results: SearchResults) -> SearchResults:
""" Remove results that are significantly worse than the
best match.
"""
if results:
max_ranking = min(r.ranking for r in results) + 0.5
results = SearchResults(r for r in results if r.ranking < max_ranking)
return results
def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
""" Remove badly matching results, sort by ranking and
limit to the configured number of results.
"""
if results:
results.sort(key=lambda r: r.ranking)
min_rank = results[0].rank_search
min_ranking = results[0].ranking
results = SearchResults(r for r in results
if r.ranking + 0.03 * (r.rank_search - min_rank)
< min_ranking + 0.5)
results = SearchResults(results[:self.limit])
return results
def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
""" Adjust the accuracy of the localized result according to how well
they match the original query.
"""
assert self.query_analyzer is not None
qwords = [word for phrase in query.source
for word in re.split('[, ]+', phrase.text) if word]
if not qwords:
return
for result in results:
# Negative importance indicates ordering by distance, which is
# more important than word matching.
if not result.display_name\
or (result.importance is not None and result.importance < 0):
continue
distance = 0.0
norm = self.query_analyzer.normalize_text(' '.join((result.display_name,
result.country_code or '')))
words = set((w for w in norm.split(' ') if w))
if not words:
continue
for qword in qwords:
wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
if wdist < 0.5:
distance += len(qword)
else:
distance += (1.0 - wdist) * len(qword)
# Compensate for the fact that country names do not get a
# match penalty yet by the tokenizer.
# Temporary hack that needs to be removed!
if result.rank_address == 4:
distance *= 2
result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
async def lookup_pois(self, categories: List[Tuple[str, str]],
phrases: List[Phrase]) -> SearchResults:
""" Look up places by category. If phrase is given, a place search
over the phrase will be executed first and places close to the
results returned.
"""
log().function('forward_lookup_pois', categories=categories, params=self.params)
if phrases:
query, searches = await self.build_searches(phrases)
if query:
searches = [wrap_near_search(categories, s) for s in searches[:50]]
results = await self.execute_searches(query, searches)
results = self.pre_filter_results(results)
await add_result_details(self.conn, results, self.params)
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
results = self.sort_and_cut_results(results)
else:
results = SearchResults()
else:
search = build_poi_search(categories, self.params.countries)
results = await search.lookup(self.conn, self.params)
await add_result_details(self.conn, results, self.params)
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
return results
async def lookup(self, phrases: List[Phrase]) -> SearchResults:
""" Look up a single free-text query.
"""
log().function('forward_lookup', phrases=phrases, params=self.params)
results = SearchResults()
if self.params.is_impossible():
return results
query, searches = await self.build_searches(phrases)
if searches:
# Execute SQL until an appropriate result is found.
results = await self.execute_searches(query, searches[:50])
results = self.pre_filter_results(results)
await add_result_details(self.conn, results, self.params)
log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
self.rerank_by_query(query, results)
log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
results = self.sort_and_cut_results(results)
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
return results
# pylint: disable=invalid-name,too-many-locals
def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
start: int = 0) -> Iterator[Optional[List[Any]]]:
yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
'Qualifier', 'Catgeory', 'Rankings']
def tk(tl: List[int]) -> str:
tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
return f"[{','.join(tstr)}]"
def fmt_ranking(f: Any) -> str:
if not f:
return ''
ranks = ','.join((f"{tk(r.tokens)}^{r.penalty:.3g}" for r in f.rankings))
if len(ranks) > 100:
ranks = ranks[:100] + '...'
return f"{f.column}({ranks},def={f.default:.3g})"
def fmt_lookup(l: Any) -> str:
if not l:
return ''
return f"{l.lookup_type}({l.column}{tk(l.tokens)})"
def fmt_cstr(c: Any) -> str:
if not c:
return ''
return f'{c[0]}^{c[1]}'
for search in searches[start:]:
fields = ('lookups', 'rankings', 'countries', 'housenumbers',
'postcodes', 'qualifiers')
if hasattr(search, 'search'):
iters = itertools.zip_longest([f"{search.penalty:.3g}"],
*(getattr(search.search, attr, []) for attr in fields),
getattr(search, 'categories', []),
fillvalue='')
else:
iters = itertools.zip_longest([f"{search.penalty:.3g}"],
*(getattr(search, attr, []) for attr in fields),
[],
fillvalue='')
for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
yield None

View File

@@ -1,312 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of query analysis for the ICU tokenizer.
"""
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
from collections import defaultdict
import dataclasses
import difflib
from icu import Transliterator
import sqlalchemy as sa
from nominatim.typing import SaRow
from nominatim.api.connection import SearchConnection
from nominatim.api.logging import log
from nominatim.api.search import query as qmod
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
from nominatim.db.sqlalchemy_types import Json
DB_TO_TOKEN_TYPE = {
'W': qmod.TokenType.WORD,
'w': qmod.TokenType.PARTIAL,
'H': qmod.TokenType.HOUSENUMBER,
'P': qmod.TokenType.POSTCODE,
'C': qmod.TokenType.COUNTRY
}
class QueryPart(NamedTuple):
""" Normalized and transliterated form of a single term in the query.
When the term came out of a split during the transliteration,
the normalized string is the full word before transliteration.
The word number keeps track of the word before transliteration
and can be used to identify partial transliterated terms.
"""
token: str
normalized: str
word_number: int
QueryParts = List[QueryPart]
WordDict = Dict[str, List[qmod.TokenRange]]
def yield_words(terms: List[QueryPart], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
""" Return all combinations of words in the terms list after the
given position.
"""
total = len(terms)
for first in range(start, total):
word = terms[first].token
yield word, qmod.TokenRange(first, first + 1)
for last in range(first + 1, min(first + 20, total)):
word = ' '.join((word, terms[last].token))
yield word, qmod.TokenRange(first, last + 1)
@dataclasses.dataclass
class ICUToken(qmod.Token):
""" Specialised token for ICU tokenizer.
"""
word_token: str
info: Optional[Dict[str, Any]]
def get_category(self) -> Tuple[str, str]:
assert self.info
return self.info.get('class', ''), self.info.get('type', '')
def rematch(self, norm: str) -> None:
""" Check how well the token matches the given normalized string
and add a penalty, if necessary.
"""
if not self.lookup_word:
return
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
distance = 0
for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
distance += 1
elif tag == 'replace':
distance += max((ato-afrom), (bto-bfrom))
elif tag != 'equal':
distance += abs((ato-afrom) - (bto-bfrom))
self.penalty += (distance/len(self.lookup_word))
@staticmethod
def from_db_row(row: SaRow) -> 'ICUToken':
""" Create a ICUToken from the row of the word table.
"""
count = 1 if row.info is None else row.info.get('count', 1)
penalty = 0.0
if row.type == 'w':
penalty = 0.3
elif row.type == 'W':
if len(row.word_token) == 1 and row.word_token == row.word:
penalty = 0.2 if row.word.isdigit() else 0.3
elif row.type == 'H':
penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
if all(not c.isdigit() for c in row.word_token):
penalty += 0.2 * (len(row.word_token) - 1)
elif row.type == 'C':
if len(row.word_token) == 1:
penalty = 0.3
if row.info is None:
lookup_word = row.word
else:
lookup_word = row.info.get('lookup', row.word)
if lookup_word:
lookup_word = lookup_word.split('@', 1)[0]
else:
lookup_word = row.word_token
return ICUToken(penalty=penalty, token=row.word_id, count=count,
lookup_word=lookup_word, is_indexed=True,
word_token=row.word_token, info=row.info)
class ICUQueryAnalyzer(AbstractQueryAnalyzer):
""" Converter for query strings into a tokenized query
using the tokens created by a ICU tokenizer.
"""
def __init__(self, conn: SearchConnection) -> None:
self.conn = conn
async def setup(self) -> None:
""" Set up static data structures needed for the analysis.
"""
async def _make_normalizer() -> Any:
rules = await self.conn.get_property('tokenizer_import_normalisation')
return Transliterator.createFromRules("normalization", rules)
self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
_make_normalizer)
async def _make_transliterator() -> Any:
rules = await self.conn.get_property('tokenizer_import_transliteration')
return Transliterator.createFromRules("transliteration", rules)
self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
_make_transliterator)
if 'word' not in self.conn.t.meta.tables:
sa.Table('word', self.conn.t.meta,
sa.Column('word_id', sa.Integer),
sa.Column('word_token', sa.Text, nullable=False),
sa.Column('type', sa.Text, nullable=False),
sa.Column('word', sa.Text),
sa.Column('info', Json))
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
""" Analyze the given list of phrases and return the
tokenized query.
"""
log().section('Analyze query (using ICU tokenizer)')
normalized = list(filter(lambda p: p.text,
(qmod.Phrase(p.ptype, self.normalize_text(p.text))
for p in phrases)))
query = qmod.QueryStruct(normalized)
log().var_dump('Normalized query', query.source)
if not query.source:
return query
parts, words = self.split_query(query)
log().var_dump('Transliterated query', lambda: _dump_transliterated(query, parts))
for row in await self.lookup_in_db(list(words.keys())):
for trange in words[row.word_token]:
token = ICUToken.from_db_row(row)
if row.type == 'S':
if row.info['op'] in ('in', 'near'):
if trange.start == 0:
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
if trange.start == 0 and trange.end == query.num_token_slots():
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
self.add_extra_tokens(query, parts)
self.rerank_tokens(query, parts)
log().table_dump('Word tokens', _dump_word_tokens(query))
return query
def normalize_text(self, text: str) -> str:
""" Bring the given text into a normalized form. That is the
standardized form search will work with. All information removed
at this stage is inevitably lost.
"""
return cast(str, self.normalizer.transliterate(text))
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
""" Transliterate the phrases and split them into tokens.
Returns the list of transliterated tokens together with their
normalized form and a dictionary of words for lookup together
with their position.
"""
parts: QueryParts = []
phrase_start = 0
words = defaultdict(list)
wordnr = 0
for phrase in query.source:
query.nodes[-1].ptype = phrase.ptype
for word in phrase.text.split(' '):
trans = self.transliterator.transliterate(word)
if trans:
for term in trans.split(' '):
if term:
parts.append(QueryPart(term, word, wordnr))
query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
query.nodes[-1].btype = qmod.BreakType.WORD
wordnr += 1
query.nodes[-1].btype = qmod.BreakType.PHRASE
for word, wrange in yield_words(parts, phrase_start):
words[word].append(wrange)
phrase_start = len(parts)
query.nodes[-1].btype = qmod.BreakType.END
return parts, words
async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
""" Return the token information from the database for the
given word tokens.
"""
t = self.conn.t.meta.tables['word']
return await self.conn.execute(t.select().where(t.c.word_token.in_(words)))
def add_extra_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
""" Add tokens to query that are not saved in the database.
"""
for part, node, i in zip(parts, query.nodes, range(1000)):
if len(part.token) <= 4 and part[0].isdigit()\
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
ICUToken(0.5, 0, 1, part.token, True, part.token, None))
def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None:
""" Add penalties to tokens that depend on presence of other token.
"""
for i, node, tlist in query.iter_token_lists():
if tlist.ttype == qmod.TokenType.POSTCODE:
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
and (repl.ttype != qmod.TokenType.HOUSENUMBER
or len(tlist.tokens[0].lookup_word) > 4):
repl.add_penalty(0.39)
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
and len(tlist.tokens[0].lookup_word) <= 3:
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL):
norm = parts[i].normalized
for j in range(i + 1, tlist.end):
if parts[j - 1].word_number != parts[j].word_number:
norm += ' ' + parts[j].normalized
for token in tlist.tokens:
cast(ICUToken, token).rematch(norm)
def _dump_transliterated(query: qmod.QueryStruct, parts: QueryParts) -> str:
out = query.nodes[0].btype.value
for node, part in zip(query.nodes[1:], parts):
out += part.token + node.btype.value
return out
def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
for node in query.nodes:
for tlist in node.starting:
for token in tlist.tokens:
t = cast(ICUToken, token)
yield [tlist.ttype.name, t.token, t.word_token or '',
t.lookup_word or '', t.penalty, t.count, t.info]
async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
""" Create and set up a new query analyzer for a database based
on the ICU tokenizer.
"""
out = ICUQueryAnalyzer(conn)
await out.setup()
return out

View File

@@ -1,271 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of query analysis for the legacy tokenizer.
"""
from typing import Tuple, Dict, List, Optional, Iterator, Any, cast
from copy import copy
from collections import defaultdict
import dataclasses
import sqlalchemy as sa
from nominatim.typing import SaRow
from nominatim.api.connection import SearchConnection
from nominatim.api.logging import log
from nominatim.api.search import query as qmod
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
def yield_words(terms: List[str], start: int) -> Iterator[Tuple[str, qmod.TokenRange]]:
""" Return all combinations of words in the terms list after the
given position.
"""
total = len(terms)
for first in range(start, total):
word = terms[first]
yield word, qmod.TokenRange(first, first + 1)
for last in range(first + 1, min(first + 20, total)):
word = ' '.join((word, terms[last]))
yield word, qmod.TokenRange(first, last + 1)
@dataclasses.dataclass
class LegacyToken(qmod.Token):
""" Specialised token for legacy tokenizer.
"""
word_token: str
category: Optional[Tuple[str, str]]
country: Optional[str]
operator: Optional[str]
@property
def info(self) -> Dict[str, Any]:
""" Dictionary of additional properties of the token.
Should only be used for debugging purposes.
"""
return {'category': self.category,
'country': self.country,
'operator': self.operator}
def get_category(self) -> Tuple[str, str]:
assert self.category
return self.category
class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
""" Converter for query strings into a tokenized query
using the tokens created by a legacy tokenizer.
"""
def __init__(self, conn: SearchConnection) -> None:
self.conn = conn
async def setup(self) -> None:
""" Set up static data structures needed for the analysis.
"""
self.max_word_freq = int(await self.conn.get_property('tokenizer_maxwordfreq'))
if 'word' not in self.conn.t.meta.tables:
sa.Table('word', self.conn.t.meta,
sa.Column('word_id', sa.Integer),
sa.Column('word_token', sa.Text, nullable=False),
sa.Column('word', sa.Text),
sa.Column('class', sa.Text),
sa.Column('type', sa.Text),
sa.Column('country_code', sa.Text),
sa.Column('search_name_count', sa.Integer),
sa.Column('operator', sa.Text))
async def analyze_query(self, phrases: List[qmod.Phrase]) -> qmod.QueryStruct:
""" Analyze the given list of phrases and return the
tokenized query.
"""
log().section('Analyze query (using Legacy tokenizer)')
normalized = []
if phrases:
for row in await self.conn.execute(sa.select(*(sa.func.make_standard_name(p.text)
for p in phrases))):
normalized = [qmod.Phrase(p.ptype, r) for r, p in zip(row, phrases) if r]
break
query = qmod.QueryStruct(normalized)
log().var_dump('Normalized query', query.source)
if not query.source:
return query
parts, words = self.split_query(query)
lookup_words = list(words.keys())
log().var_dump('Split query', parts)
log().var_dump('Extracted words', lookup_words)
for row in await self.lookup_in_db(lookup_words):
for trange in words[row.word_token.strip()]:
token, ttype = self.make_token(row)
if ttype == qmod.TokenType.NEAR_ITEM:
if trange.start == 0:
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype == qmod.TokenType.QUALIFIER:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
query.add_token(trange, ttype, token)
self.add_extra_tokens(query, parts)
self.rerank_tokens(query)
log().table_dump('Word tokens', _dump_word_tokens(query))
return query
def normalize_text(self, text: str) -> str:
""" Bring the given text into a normalized form.
This only removes case, so some difference with the normalization
in the phrase remains.
"""
return text.lower()
def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
Dict[str, List[qmod.TokenRange]]]:
""" Transliterate the phrases and split them into tokens.
Returns a list of transliterated tokens and a dictionary
of words for lookup together with their position.
"""
parts: List[str] = []
phrase_start = 0
words = defaultdict(list)
for phrase in query.source:
query.nodes[-1].ptype = phrase.ptype
for trans in phrase.text.split(' '):
if trans:
for term in trans.split(' '):
if term:
parts.append(trans)
query.add_node(qmod.BreakType.TOKEN, phrase.ptype)
query.nodes[-1].btype = qmod.BreakType.WORD
query.nodes[-1].btype = qmod.BreakType.PHRASE
for word, wrange in yield_words(parts, phrase_start):
words[word].append(wrange)
phrase_start = len(parts)
query.nodes[-1].btype = qmod.BreakType.END
return parts, words
async def lookup_in_db(self, words: List[str]) -> 'sa.Result[Any]':
""" Return the token information from the database for the
given word tokens.
"""
t = self.conn.t.meta.tables['word']
sql = t.select().where(t.c.word_token.in_(words + [' ' + w for w in words]))
return await self.conn.execute(sql)
def make_token(self, row: SaRow) -> Tuple[LegacyToken, qmod.TokenType]:
""" Create a LegacyToken from the row of the word table.
Also determines the type of token.
"""
penalty = 0.0
is_indexed = True
rowclass = getattr(row, 'class')
if row.country_code is not None:
ttype = qmod.TokenType.COUNTRY
lookup_word = row.country_code
elif rowclass is not None:
if rowclass == 'place' and row.type == 'house':
ttype = qmod.TokenType.HOUSENUMBER
lookup_word = row.word_token[1:]
elif rowclass == 'place' and row.type == 'postcode':
ttype = qmod.TokenType.POSTCODE
lookup_word = row.word_token[1:]
else:
ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
else qmod.TokenType.QUALIFIER
lookup_word = row.word
elif row.word_token.startswith(' '):
ttype = qmod.TokenType.WORD
lookup_word = row.word or row.word_token[1:]
else:
ttype = qmod.TokenType.PARTIAL
lookup_word = row.word_token
penalty = 0.21
if row.search_name_count > self.max_word_freq:
is_indexed = False
return LegacyToken(penalty=penalty, token=row.word_id,
count=row.search_name_count or 1,
lookup_word=lookup_word,
word_token=row.word_token.strip(),
category=(rowclass, row.type) if rowclass is not None else None,
country=row.country_code,
operator=row.operator,
is_indexed=is_indexed),\
ttype
def add_extra_tokens(self, query: qmod.QueryStruct, parts: List[str]) -> None:
""" Add tokens to query that are not saved in the database.
"""
for part, node, i in zip(parts, query.nodes, range(1000)):
if len(part) <= 4 and part.isdigit()\
and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
LegacyToken(penalty=0.5, token=0, count=1,
lookup_word=part, word_token=part,
category=None, country=None,
operator=None, is_indexed=True))
def rerank_tokens(self, query: qmod.QueryStruct) -> None:
""" Add penalties to tokens that depend on presence of other token.
"""
for _, node, tlist in query.iter_token_lists():
if tlist.ttype == qmod.TokenType.POSTCODE:
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.POSTCODE \
and (repl.ttype != qmod.TokenType.HOUSENUMBER
or len(tlist.tokens[0].lookup_word) > 4):
repl.add_penalty(0.39)
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
and len(tlist.tokens[0].lookup_word) <= 3:
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
def _dump_word_tokens(query: qmod.QueryStruct) -> Iterator[List[Any]]:
yield ['type', 'token', 'word_token', 'lookup_word', 'penalty', 'count', 'info']
for node in query.nodes:
for tlist in node.starting:
for token in tlist.tokens:
t = cast(LegacyToken, token)
yield [tlist.ttype.name, t.token, t.word_token or '',
t.lookup_word or '', t.penalty, t.count, t.info]
async def create_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
""" Create and set up a new query analyzer for a database based
on the ICU tokenizer.
"""
out = LegacyQueryAnalyzer(conn)
await out.setup()
return out

View File

@@ -1,296 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Datastructures for a tokenized query.
"""
from typing import List, Tuple, Optional, Iterator
from abc import ABC, abstractmethod
import dataclasses
import enum
class BreakType(enum.Enum):
""" Type of break between tokens.
"""
START = '<'
""" Begin of the query. """
END = '>'
""" End of the query. """
PHRASE = ','
""" Break between two phrases. """
WORD = ' '
""" Break between words. """
PART = '-'
""" Break inside a word, for example a hyphen or apostrophe. """
TOKEN = '`'
""" Break created as a result of tokenization.
This may happen in languages without spaces between words.
"""
class TokenType(enum.Enum):
""" Type of token.
"""
WORD = enum.auto()
""" Full name of a place. """
PARTIAL = enum.auto()
""" Word term without breaks, does not necessarily represent a full name. """
HOUSENUMBER = enum.auto()
""" Housenumber term. """
POSTCODE = enum.auto()
""" Postal code term. """
COUNTRY = enum.auto()
""" Country name or reference. """
QUALIFIER = enum.auto()
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
NEAR_ITEM = enum.auto()
""" Special term used as searchable object(e.g. supermarket in ...). """
class PhraseType(enum.Enum):
""" Designation of a phrase.
"""
NONE = 0
""" No specific designation (i.e. source is free-form query). """
AMENITY = enum.auto()
""" Contains name or type of a POI. """
STREET = enum.auto()
""" Contains a street name optionally with a housenumber. """
CITY = enum.auto()
""" Contains the postal city. """
COUNTY = enum.auto()
""" Contains the equivalent of a county. """
STATE = enum.auto()
""" Contains a state or province. """
POSTCODE = enum.auto()
""" Contains a postal code. """
COUNTRY = enum.auto()
""" Contains the country name or code. """
def compatible_with(self, ttype: TokenType,
is_full_phrase: bool) -> bool:
""" Check if the given token type can be used with the phrase type.
"""
if self == PhraseType.NONE:
return not is_full_phrase or ttype != TokenType.QUALIFIER
if self == PhraseType.AMENITY:
return ttype in (TokenType.WORD, TokenType.PARTIAL)\
or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
or (not is_full_phrase and ttype == TokenType.QUALIFIER)
if self == PhraseType.STREET:
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
if self == PhraseType.POSTCODE:
return ttype == TokenType.POSTCODE
if self == PhraseType.COUNTRY:
return ttype == TokenType.COUNTRY
return ttype in (TokenType.WORD, TokenType.PARTIAL)
@dataclasses.dataclass
class Token(ABC):
""" Base type for tokens.
Specific query analyzers must implement the concrete token class.
"""
penalty: float
token: int
count: int
lookup_word: str
is_indexed: bool
@abstractmethod
def get_category(self) -> Tuple[str, str]:
""" Return the category restriction for qualifier terms and
category objects.
"""
@dataclasses.dataclass
class TokenRange:
""" Indexes of query nodes over which a token spans.
"""
start: int
end: int
def __lt__(self, other: 'TokenRange') -> bool:
return self.end <= other.start
def __le__(self, other: 'TokenRange') -> bool:
return NotImplemented
def __gt__(self, other: 'TokenRange') -> bool:
return self.start >= other.end
def __ge__(self, other: 'TokenRange') -> bool:
return NotImplemented
def replace_start(self, new_start: int) -> 'TokenRange':
""" Return a new token range with the new start.
"""
return TokenRange(new_start, self.end)
def replace_end(self, new_end: int) -> 'TokenRange':
""" Return a new token range with the new end.
"""
return TokenRange(self.start, new_end)
def split(self, index: int) -> Tuple['TokenRange', 'TokenRange']:
""" Split the span into two spans at the given index.
The index must be within the span.
"""
return self.replace_end(index), self.replace_start(index)
@dataclasses.dataclass
class TokenList:
""" List of all tokens of a given type going from one breakpoint to another.
"""
end: int
ttype: TokenType
tokens: List[Token]
def add_penalty(self, penalty: float) -> None:
""" Add the given penalty to all tokens in the list.
"""
for token in self.tokens:
token.penalty += penalty
@dataclasses.dataclass
class QueryNode:
""" A node of the query representing a break between terms.
"""
btype: BreakType
ptype: PhraseType
starting: List[TokenList] = dataclasses.field(default_factory=list)
def has_tokens(self, end: int, *ttypes: TokenType) -> bool:
""" Check if there are tokens of the given types ending at the
given node.
"""
return any(tl.end == end and tl.ttype in ttypes for tl in self.starting)
def get_tokens(self, end: int, ttype: TokenType) -> Optional[List[Token]]:
""" Get the list of tokens of the given type starting at this node
and ending at the node 'end'. Returns 'None' if no such
tokens exist.
"""
for tlist in self.starting:
if tlist.end == end and tlist.ttype == ttype:
return tlist.tokens
return None
@dataclasses.dataclass
class Phrase:
""" A normalized query part. Phrases may be typed which means that
they then represent a specific part of the address.
"""
ptype: PhraseType
text: str
class QueryStruct:
""" A tokenized search query together with the normalized source
from which the tokens have been parsed.
The query contains a list of nodes that represent the breaks
between words. Tokens span between nodes, which don't necessarily
need to be direct neighbours. Thus the query is represented as a
directed acyclic graph.
When created, a query contains a single node: the start of the
query. Further nodes can be added by appending to 'nodes'.
"""
def __init__(self, source: List[Phrase]) -> None:
self.source = source
self.nodes: List[QueryNode] = \
[QueryNode(BreakType.START, source[0].ptype if source else PhraseType.NONE)]
def num_token_slots(self) -> int:
""" Return the length of the query in vertice steps.
"""
return len(self.nodes) - 1
def add_node(self, btype: BreakType, ptype: PhraseType) -> None:
""" Append a new break node with the given break type.
The phrase type denotes the type for any tokens starting
at the node.
"""
self.nodes.append(QueryNode(btype, ptype))
def add_token(self, trange: TokenRange, ttype: TokenType, token: Token) -> None:
""" Add a token to the query. 'start' and 'end' are the indexes of the
nodes from which to which the token spans. The indexes must exist
and are expected to be in the same phrase.
'ttype' denotes the type of the token and 'token' the token to
be inserted.
If the token type is not compatible with the phrase it should
be added to, then the token is silently dropped.
"""
snode = self.nodes[trange.start]
full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
if snode.ptype.compatible_with(ttype, full_phrase):
tlist = snode.get_tokens(trange.end, ttype)
if tlist is None:
snode.starting.append(TokenList(trange.end, ttype, [token]))
else:
tlist.append(token)
def get_tokens(self, trange: TokenRange, ttype: TokenType) -> List[Token]:
""" Get the list of tokens of a given type, spanning the given
nodes. The nodes must exist. If no tokens exist, an
empty list is returned.
"""
return self.nodes[trange.start].get_tokens(trange.end, ttype) or []
def get_partials_list(self, trange: TokenRange) -> List[Token]:
""" Create a list of partial tokens between the given nodes.
The list is composed of the first token of type PARTIAL
going to the subsequent node. Such PARTIAL tokens are
assumed to exist.
"""
return [next(iter(self.get_tokens(TokenRange(i, i+1), TokenType.PARTIAL)))
for i in range(trange.start, trange.end)]
def iter_token_lists(self) -> Iterator[Tuple[int, QueryNode, TokenList]]:
""" Iterator over all token lists in the query.
"""
for i, node in enumerate(self.nodes):
for tlist in node.starting:
yield i, node, tlist
def find_lookup_word_by_id(self, token: int) -> str:
""" Find the first token with the given token ID and return
its lookup word. Returns 'None' if no such token exists.
The function is very slow and must only be used for
debugging.
"""
for node in self.nodes:
for tlist in node.starting:
for t in tlist.tokens:
if t.token == token:
return f"[{tlist.ttype.name[0]}]{t.lookup_word}"
return 'None'

View File

@@ -1,54 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Factory for creating a query analyzer for the configured tokenizer.
"""
from typing import List, cast, TYPE_CHECKING
from abc import ABC, abstractmethod
from pathlib import Path
import importlib
from nominatim.api.logging import log
from nominatim.api.connection import SearchConnection
if TYPE_CHECKING:
from nominatim.api.search.query import Phrase, QueryStruct
class AbstractQueryAnalyzer(ABC):
""" Class for analysing incoming queries.
Query analyzers are tied to the tokenizer used on import.
"""
@abstractmethod
async def analyze_query(self, phrases: List['Phrase']) -> 'QueryStruct':
""" Analyze the given phrases and return the tokenized query.
"""
@abstractmethod
def normalize_text(self, text: str) -> str:
""" Bring the given text into a normalized form. That is the
standardized form search will work with. All information removed
at this stage is inevitably lost.
"""
async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
""" Create a query analyzer for the tokenizer used by the database.
"""
name = await conn.get_property('tokenizer')
src_file = Path(__file__).parent / f'{name}_tokenizer.py'
if not src_file.is_file():
log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
raise RuntimeError('Tokenizer not found')
module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer')
return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))

View File

@@ -1,421 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Create query interpretations where each vertice in the query is assigned
a specific function (expressed as a token type).
"""
from typing import Optional, List, Iterator
import dataclasses
import nominatim.api.search.query as qmod
from nominatim.api.logging import log
# pylint: disable=too-many-return-statements,too-many-branches
@dataclasses.dataclass
class TypedRange:
""" A token range for a specific type of tokens.
"""
ttype: qmod.TokenType
trange: qmod.TokenRange
PENALTY_TOKENCHANGE = {
qmod.BreakType.START: 0.0,
qmod.BreakType.END: 0.0,
qmod.BreakType.PHRASE: 0.0,
qmod.BreakType.WORD: 0.1,
qmod.BreakType.PART: 0.2,
qmod.BreakType.TOKEN: 0.4
}
TypedRangeSeq = List[TypedRange]
@dataclasses.dataclass
class TokenAssignment: # pylint: disable=too-many-instance-attributes
""" Representation of a possible assignment of token types
to the tokens in a tokenized query.
"""
penalty: float = 0.0
name: Optional[qmod.TokenRange] = None
address: List[qmod.TokenRange] = dataclasses.field(default_factory=list)
housenumber: Optional[qmod.TokenRange] = None
postcode: Optional[qmod.TokenRange] = None
country: Optional[qmod.TokenRange] = None
near_item: Optional[qmod.TokenRange] = None
qualifier: Optional[qmod.TokenRange] = None
@staticmethod
def from_ranges(ranges: TypedRangeSeq) -> 'TokenAssignment':
""" Create a new token assignment from a sequence of typed spans.
"""
out = TokenAssignment()
for token in ranges:
if token.ttype == qmod.TokenType.PARTIAL:
out.address.append(token.trange)
elif token.ttype == qmod.TokenType.HOUSENUMBER:
out.housenumber = token.trange
elif token.ttype == qmod.TokenType.POSTCODE:
out.postcode = token.trange
elif token.ttype == qmod.TokenType.COUNTRY:
out.country = token.trange
elif token.ttype == qmod.TokenType.NEAR_ITEM:
out.near_item = token.trange
elif token.ttype == qmod.TokenType.QUALIFIER:
out.qualifier = token.trange
return out
class _TokenSequence:
""" Working state used to put together the token assignments.
Represents an intermediate state while traversing the tokenized
query.
"""
def __init__(self, seq: TypedRangeSeq,
direction: int = 0, penalty: float = 0.0) -> None:
self.seq = seq
self.direction = direction
self.penalty = penalty
def __str__(self) -> str:
seq = ''.join(f'[{r.trange.start} - {r.trange.end}: {r.ttype.name}]' for r in self.seq)
return f'{seq} (dir: {self.direction}, penalty: {self.penalty})'
@property
def end_pos(self) -> int:
""" Return the index of the global end of the current sequence.
"""
return self.seq[-1].trange.end if self.seq else 0
def has_types(self, *ttypes: qmod.TokenType) -> bool:
""" Check if the current sequence contains any typed ranges of
the given types.
"""
return any(s.ttype in ttypes for s in self.seq)
def is_final(self) -> bool:
""" Return true when the sequence cannot be extended by any
form of token anymore.
"""
# Country and category must be the final term for left-to-right
return len(self.seq) > 1 and \
self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
""" Check if the give token type is appendable to the existing sequence.
Returns None if the token type is not appendable, otherwise the
new direction of the sequence after adding such a type. The
token is not added.
"""
if ttype == qmod.TokenType.WORD:
return None
if not self.seq:
# Append unconditionally to the empty list
if ttype == qmod.TokenType.COUNTRY:
return -1
if ttype in (qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
return 1
return self.direction
# Name tokens are always acceptable and don't change direction
if ttype == qmod.TokenType.PARTIAL:
# qualifiers cannot appear in the middle of the query. They need
# to be near the next phrase.
if self.direction == -1 \
and any(t.ttype == qmod.TokenType.QUALIFIER for t in self.seq[:-1]):
return None
return self.direction
# Other tokens may only appear once
if self.has_types(ttype):
return None
if ttype == qmod.TokenType.HOUSENUMBER:
if self.direction == 1:
if len(self.seq) == 1 and self.seq[0].ttype == qmod.TokenType.QUALIFIER:
return None
if len(self.seq) > 2 \
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
return None # direction left-to-right: housenumber must come before anything
elif self.direction == -1 \
or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
return -1 # force direction right-to-left if after other terms
return self.direction
if ttype == qmod.TokenType.POSTCODE:
if self.direction == -1:
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
return None
return -1
if self.direction == 1:
return None if self.has_types(qmod.TokenType.COUNTRY) else 1
if self.has_types(qmod.TokenType.HOUSENUMBER, qmod.TokenType.QUALIFIER):
return 1
return self.direction
if ttype == qmod.TokenType.COUNTRY:
return None if self.direction == -1 else 1
if ttype == qmod.TokenType.NEAR_ITEM:
return self.direction
if ttype == qmod.TokenType.QUALIFIER:
if self.direction == 1:
if (len(self.seq) == 1
and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
or (len(self.seq) == 2
and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
return 1
return None
if self.direction == -1:
return -1
tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
if len(tempseq) == 0:
return 1
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
return None
if len(tempseq) > 1 or self.has_types(qmod.TokenType.POSTCODE, qmod.TokenType.COUNTRY):
return -1
return 0
return None
def advance(self, ttype: qmod.TokenType, end_pos: int,
btype: qmod.BreakType) -> Optional['_TokenSequence']:
""" Return a new token sequence state with the given token type
extended.
"""
newdir = self.appendable(ttype)
if newdir is None:
return None
if not self.seq:
newseq = [TypedRange(ttype, qmod.TokenRange(0, end_pos))]
new_penalty = 0.0
else:
last = self.seq[-1]
if btype != qmod.BreakType.PHRASE and last.ttype == ttype:
# extend the existing range
newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))]
new_penalty = 0.0
else:
# start a new range
newseq = list(self.seq) + [TypedRange(ttype,
qmod.TokenRange(last.trange.end, end_pos))]
new_penalty = PENALTY_TOKENCHANGE[btype]
return _TokenSequence(newseq, newdir, self.penalty + new_penalty)
def _adapt_penalty_from_priors(self, priors: int, new_dir: int) -> bool:
if priors == 2:
self.penalty += 1.0
elif priors > 2:
if self.direction == 0:
self.direction = new_dir
else:
return False
return True
def recheck_sequence(self) -> bool:
""" Check that the sequence is a fully valid token assignment
and adapt direction and penalties further if necessary.
This function catches some impossible assignments that need
forward context and can therefore not be excluded when building
the assignment.
"""
# housenumbers may not be further than 2 words from the beginning.
# If there are two words in front, give it a penalty.
hnrpos = next((i for i, tr in enumerate(self.seq)
if tr.ttype == qmod.TokenType.HOUSENUMBER),
None)
if hnrpos is not None:
if self.direction != -1:
priors = sum(1 for t in self.seq[:hnrpos] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, -1):
return False
if self.direction != 1:
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1):
return False
if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
self.penalty += 1.0
return True
def _get_assignments_postcode(self, base: TokenAssignment,
query_len: int) -> Iterator[TokenAssignment]:
""" Yield possible assignments of Postcode searches with an
address component.
"""
assert base.postcode is not None
if (base.postcode.start == 0 and self.direction != -1)\
or (base.postcode.end == query_len and self.direction != 1):
log().comment('postcode search')
# <address>,<postcode> should give preference to address search
if base.postcode.start == 0:
penalty = self.penalty
self.direction = -1 # name searches are only possible backwards
else:
penalty = self.penalty + 0.1
self.direction = 1 # name searches are only possible forwards
yield dataclasses.replace(base, penalty=penalty)
def _get_assignments_address_forward(self, base: TokenAssignment,
query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
""" Yield possible assignments of address searches with
left-to-right reading.
"""
first = base.address[0]
log().comment('first word = name')
yield dataclasses.replace(base, penalty=self.penalty,
name=first, address=base.address[1:])
# To paraphrase:
# * if another name term comes after the first one and before the
# housenumber
# * a qualifier comes after the name
# * the containing phrase is strictly typed
if (base.housenumber and first.end < base.housenumber.start)\
or (base.qualifier and base.qualifier > first)\
or (query.nodes[first.start].ptype != qmod.PhraseType.NONE):
return
penalty = self.penalty
# Penalty for:
# * <name>, <street>, <housenumber> , ...
# * queries that are comma-separated
if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
penalty += 0.25
for i in range(first.start + 1, first.end):
name, addr = first.split(i)
log().comment(f'split first word = name ({i - first.start})')
yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:],
penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
def _get_assignments_address_backward(self, base: TokenAssignment,
query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
""" Yield possible assignments of address searches with
right-to-left reading.
"""
last = base.address[-1]
if self.direction == -1 or len(base.address) > 1:
log().comment('last word = name')
yield dataclasses.replace(base, penalty=self.penalty,
name=last, address=base.address[:-1])
# To paraphrase:
# * if another name term comes before the last one and after the
# housenumber
# * a qualifier comes before the name
# * the containing phrase is strictly typed
if (base.housenumber and last.start > base.housenumber.end)\
or (base.qualifier and base.qualifier < last)\
or (query.nodes[last.start].ptype != qmod.PhraseType.NONE):
return
penalty = self.penalty
if base.housenumber and base.housenumber < last:
penalty += 0.4
if len(query.source) > 1:
penalty += 0.25
for i in range(last.start + 1, last.end):
addr, name = last.split(i)
log().comment(f'split last word = name ({i - last.start})')
yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr],
penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
""" Yield possible assignments for the current sequence.
This function splits up general name assignments into name
and address and yields all possible variants of that.
"""
base = TokenAssignment.from_ranges(self.seq)
num_addr_tokens = sum(t.end - t.start for t in base.address)
if num_addr_tokens > 50:
return
# Postcode search (postcode-only search is covered in next case)
if base.postcode is not None and base.address:
yield from self._get_assignments_postcode(base, query.num_token_slots())
# Postcode or country-only search
if not base.address:
if not base.housenumber and (base.postcode or base.country or base.near_item):
log().comment('postcode/country search')
yield dataclasses.replace(base, penalty=self.penalty)
else:
# <postcode>,<address> should give preference to postcode search
if base.postcode and base.postcode.start == 0:
self.penalty += 0.1
# Right-to-left reading of the address
if self.direction != -1:
yield from self._get_assignments_address_forward(base, query)
# Left-to-right reading of the address
if self.direction != 1:
yield from self._get_assignments_address_backward(base, query)
# variant for special housenumber searches
if base.housenumber and not base.qualifier:
yield dataclasses.replace(base, penalty=self.penalty)
def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
""" Return possible word type assignments to word positions.
The assignments are computed from the concrete tokens listed
in the tokenized query.
The result includes the penalty for transitions from one word type to
another. It does not include penalties for transitions within a
type.
"""
todo = [_TokenSequence([], direction=0 if query.source[0].ptype == qmod.PhraseType.NONE else 1)]
while todo:
state = todo.pop()
node = query.nodes[state.end_pos]
for tlist in node.starting:
newstate = state.advance(tlist.ttype, tlist.end, node.btype)
if newstate is not None:
if newstate.end_pos == query.num_token_slots():
if newstate.recheck_sequence():
log().var_dump('Assignment', newstate)
yield from newstate.get_assignments(query)
elif not newstate.is_final():
todo.append(newstate)

View File

@@ -1,52 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Classes and function related to status call.
"""
from typing import Optional
import datetime as dt
import dataclasses
import sqlalchemy as sa
from nominatim.api.connection import SearchConnection
from nominatim import version
@dataclasses.dataclass
class StatusResult:
""" Result of a call to the status API.
"""
status: int
message: str
software_version = version.NOMINATIM_VERSION
data_updated: Optional[dt.datetime] = None
database_version: Optional[version.NominatimVersion] = None
async def get_status(conn: SearchConnection) -> StatusResult:
""" Execute a status API call.
"""
status = StatusResult(0, 'OK')
# Last update date
sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
status.data_updated = await conn.scalar(sql)
if status.data_updated is not None:
if status.data_updated.tzinfo is None:
status.data_updated = status.data_updated.replace(tzinfo=dt.timezone.utc)
else:
status.data_updated = status.data_updated.astimezone(dt.timezone.utc)
# Database version
try:
verstr = await conn.get_property('database_version')
status.database_version = version.parse_version(verstr)
except ValueError:
pass
return status

View File

@@ -1,550 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Complex datatypes used by the Nominatim API.
"""
from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \
Any, List, Sequence
from collections import abc
import dataclasses
import enum
import math
from struct import unpack
from binascii import unhexlify
from nominatim.errors import UsageError
from nominatim.api.localization import Locales
# pylint: disable=no-member,too-many-boolean-expressions,too-many-instance-attributes
@dataclasses.dataclass
class PlaceID:
""" Reference a place by Nominatim's internal ID.
A PlaceID may reference place from the main table placex, from
the interpolation tables or the postcode tables. Place IDs are not
stable between installations. You may use this type theefore only
with place IDs obtained from the same database.
"""
place_id: int
"""
The internal ID of the place to reference.
"""
@dataclasses.dataclass
class OsmID:
""" Reference a place by its OSM ID and potentially the basic category.
The OSM ID may refer to places in the main table placex and OSM
interpolation lines.
"""
osm_type: str
""" OSM type of the object. Must be one of `N`(node), `W`(way) or
`R`(relation).
"""
osm_id: int
""" The OSM ID of the object.
"""
osm_class: Optional[str] = None
""" The same OSM object may appear multiple times in the database under
different categories. The optional class parameter allows to distinguish
the different categories and corresponds to the key part of the category.
If there are multiple objects in the database and `osm_class` is
left out, then one of the objects is returned at random.
"""
def __post_init__(self) -> None:
if self.osm_type not in ('N', 'W', 'R'):
raise ValueError(f"Illegal OSM type '{self.osm_type}'. Must be one of N, W, R.")
PlaceRef = Union[PlaceID, OsmID]
class Point(NamedTuple):
""" A geographic point in WGS84 projection.
"""
x: float
y: float
@property
def lat(self) -> float:
""" Return the latitude of the point.
"""
return self.y
@property
def lon(self) -> float:
""" Return the longitude of the point.
"""
return self.x
def to_geojson(self) -> str:
""" Return the point in GeoJSON format.
"""
return f'{{"type": "Point","coordinates": [{self.x}, {self.y}]}}'
@staticmethod
def from_wkb(wkb: Union[str, bytes]) -> 'Point':
""" Create a point from EWKB as returned from the database.
"""
if isinstance(wkb, str):
wkb = unhexlify(wkb)
if len(wkb) != 25:
raise ValueError(f"Point wkb has unexpected length {len(wkb)}")
if wkb[0] == 0:
gtype, srid, x, y = unpack('>iidd', wkb[1:])
elif wkb[0] == 1:
gtype, srid, x, y = unpack('<iidd', wkb[1:])
else:
raise ValueError("WKB has unknown endian value.")
if gtype != 0x20000001:
raise ValueError("WKB must be a point geometry.")
if srid != 4326:
raise ValueError("Only WGS84 WKB supported.")
return Point(x, y)
@staticmethod
def from_param(inp: Any) -> 'Point':
""" Create a point from an input parameter. The parameter
may be given as a point, a string or a sequence of
strings or floats. Raises a UsageError if the format is
not correct.
"""
if isinstance(inp, Point):
return inp
seq: Sequence[str]
if isinstance(inp, str):
seq = inp.split(',')
elif isinstance(inp, abc.Sequence):
seq = inp
if len(seq) != 2:
raise UsageError('Point parameter needs 2 coordinates.')
try:
x, y = filter(math.isfinite, map(float, seq))
except ValueError as exc:
raise UsageError('Point parameter needs to be numbers.') from exc
if x < -180.0 or x > 180.0 or y < -90.0 or y > 90.0:
raise UsageError('Point coordinates invalid.')
return Point(x, y)
def to_wkt(self) -> str:
""" Return the WKT representation of the point.
"""
return f'POINT({self.x} {self.y})'
AnyPoint = Union[Point, Tuple[float, float]]
WKB_BBOX_HEADER_LE = b'\x01\x03\x00\x00\x20\xE6\x10\x00\x00\x01\x00\x00\x00\x05\x00\x00\x00'
WKB_BBOX_HEADER_BE = b'\x00\x20\x00\x00\x03\x00\x00\x10\xe6\x00\x00\x00\x01\x00\x00\x00\x05'
class Bbox:
""" A bounding box in WGS84 projection.
The coordinates are available as an array in the 'coord'
property in the order (minx, miny, maxx, maxy).
"""
def __init__(self, minx: float, miny: float, maxx: float, maxy: float) -> None:
""" Create a new bounding box with the given coordinates in WGS84
projection.
"""
self.coords = (minx, miny, maxx, maxy)
@property
def minlat(self) -> float:
""" Southern-most latitude, corresponding to the minimum y coordinate.
"""
return self.coords[1]
@property
def maxlat(self) -> float:
""" Northern-most latitude, corresponding to the maximum y coordinate.
"""
return self.coords[3]
@property
def minlon(self) -> float:
""" Western-most longitude, corresponding to the minimum x coordinate.
"""
return self.coords[0]
@property
def maxlon(self) -> float:
""" Eastern-most longitude, corresponding to the maximum x coordinate.
"""
return self.coords[2]
@property
def area(self) -> float:
""" Return the area of the box in WGS84.
"""
return (self.coords[2] - self.coords[0]) * (self.coords[3] - self.coords[1])
def contains(self, pt: Point) -> bool:
""" Check if the point is inside or on the boundary of the box.
"""
return self.coords[0] <= pt[0] and self.coords[1] <= pt[1]\
and self.coords[2] >= pt[0] and self.coords[3] >= pt[1]
def to_wkt(self) -> str:
""" Return the WKT representation of the Bbox. This
is a simple polygon with four points.
"""
return 'POLYGON(({0} {1},{0} {3},{2} {3},{2} {1},{0} {1}))'\
.format(*self.coords) # pylint: disable=consider-using-f-string
@staticmethod
def from_wkb(wkb: Union[None, str, bytes]) -> 'Optional[Bbox]':
""" Create a Bbox from a bounding box polygon as returned by
the database. Returns `None` if the input value is None.
"""
if wkb is None:
return None
if isinstance(wkb, str):
wkb = unhexlify(wkb)
if len(wkb) != 97:
raise ValueError("WKB must be a bounding box polygon")
if wkb.startswith(WKB_BBOX_HEADER_LE):
x1, y1, _, _, x2, y2 = unpack('<dddddd', wkb[17:65])
elif wkb.startswith(WKB_BBOX_HEADER_BE):
x1, y1, _, _, x2, y2 = unpack('>dddddd', wkb[17:65])
else:
raise ValueError("WKB has wrong header")
return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
@staticmethod
def from_point(pt: Point, buffer: float) -> 'Bbox':
""" Return a Bbox around the point with the buffer added to all sides.
"""
return Bbox(pt[0] - buffer, pt[1] - buffer,
pt[0] + buffer, pt[1] + buffer)
@staticmethod
def from_param(inp: Any) -> 'Bbox':
""" Return a Bbox from an input parameter. The box may be
given as a Bbox, a string or a list or strings or integer.
Raises a UsageError if the format is incorrect.
"""
if isinstance(inp, Bbox):
return inp
seq: Sequence[str]
if isinstance(inp, str):
seq = inp.split(',')
elif isinstance(inp, abc.Sequence):
seq = inp
if len(seq) != 4:
raise UsageError('Bounding box parameter needs 4 coordinates.')
try:
x1, y1, x2, y2 = filter(math.isfinite, map(float, seq))
except ValueError as exc:
raise UsageError('Bounding box parameter needs to be numbers.') from exc
x1 = min(180, max(-180, x1))
x2 = min(180, max(-180, x2))
y1 = min(90, max(-90, y1))
y2 = min(90, max(-90, y2))
if x1 == x2 or y1 == y2:
raise UsageError('Bounding box with invalid parameters.')
return Bbox(min(x1, x2), min(y1, y2), max(x1, x2), max(y1, y2))
class GeometryFormat(enum.Flag):
""" All search functions support returning the full geometry of a place in
various formats. The internal geometry is converted by PostGIS to
the desired format and then returned as a string. It is possible to
request multiple formats at the same time.
"""
NONE = 0
""" No geometry requested. Alias for a empty flag.
"""
GEOJSON = enum.auto()
"""
[GeoJSON](https://geojson.org/) format
"""
KML = enum.auto()
"""
[KML](https://en.wikipedia.org/wiki/Keyhole_Markup_Language) format
"""
SVG = enum.auto()
"""
[SVG](http://www.w3.org/TR/SVG/paths.html) format
"""
TEXT = enum.auto()
"""
[WKT](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) format
"""
class DataLayer(enum.Flag):
""" The `DataLayer` flag type defines the layers that can be selected
for reverse and forward search.
"""
ADDRESS = enum.auto()
""" The address layer contains all places relevant for addresses:
fully qualified addresses with a house number (or a house name equivalent,
for some addresses) and places that can be part of an address like
roads, cities, states.
"""
POI = enum.auto()
""" Layer for points of interest like shops, restaurants but also
recycling bins or postboxes.
"""
RAILWAY = enum.auto()
""" Layer with railway features including tracks and other infrastructure.
Note that in Nominatim's standard configuration, only very few railway
features are imported into the database. Thus a custom configuration
is required to make full use of this layer.
"""
NATURAL = enum.auto()
""" Layer with natural features like rivers, lakes and mountains.
"""
MANMADE = enum.auto()
""" Layer with other human-made features and boundaries. This layer is
the catch-all and includes all features not covered by the other
layers. A typical example for this layer are national park boundaries.
"""
def format_country(cc: Any) -> List[str]:
""" Extract a list of country codes from the input which may be either
a string or list of strings. Filters out all values that are not
a two-letter string.
"""
clist: Sequence[str]
if isinstance(cc, str):
clist = cc.split(',')
elif isinstance(cc, abc.Sequence):
clist = cc
else:
raise UsageError("Parameter 'country' needs to be a comma-separated list "
"or a Python list of strings.")
return [cc.lower() for cc in clist if isinstance(cc, str) and len(cc) == 2]
def format_excluded(ids: Any) -> List[int]:
""" Extract a list of place ids from the input which may be either
a string or a list of strings or ints. Ignores empty value but
throws a UserError on anything that cannot be converted to int.
"""
plist: Sequence[str]
if isinstance(ids, str):
plist = [s.strip() for s in ids.split(',')]
elif isinstance(ids, abc.Sequence):
plist = ids
else:
raise UsageError("Parameter 'excluded' needs to be a comma-separated list "
"or a Python list of numbers.")
if not all(isinstance(i, int) or
(isinstance(i, str) and (not i or i.isdigit())) for i in plist):
raise UsageError("Parameter 'excluded' only takes place IDs.")
return [int(id) for id in plist if id] or [0]
def format_categories(categories: List[Tuple[str, str]]) -> List[Tuple[str, str]]:
""" Extract a list of categories. Currently a noop.
"""
return categories
TParam = TypeVar('TParam', bound='LookupDetails') # pylint: disable=invalid-name
@dataclasses.dataclass
class LookupDetails:
""" Collection of parameters that define which kind of details are
returned with a lookup or details result.
"""
geometry_output: GeometryFormat = GeometryFormat.NONE
""" Add the full geometry of the place to the result. Multiple
formats may be selected. Note that geometries can become quite large.
"""
address_details: bool = False
""" Get detailed information on the places that make up the address
for the result.
"""
linked_places: bool = False
""" Get detailed information on the places that link to the result.
"""
parented_places: bool = False
""" Get detailed information on all places that this place is a parent
for, i.e. all places for which it provides the address details.
Only POI places can have parents.
"""
keywords: bool = False
""" Add information about the search terms used for this place.
"""
geometry_simplification: float = 0.0
""" Simplification factor for a geometry in degrees WGS. A factor of
0.0 means the original geometry is kept. The higher the value, the
more the geometry gets simplified.
"""
locales: Locales = Locales()
""" Preferred languages for localization of results.
"""
@classmethod
def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam:
""" Load the data fields of the class from a dictionary.
Unknown entries in the dictionary are ignored, missing ones
get the default setting.
The function supports type checking and throws a UsageError
when the value does not fit.
"""
def _check_field(v: Any, field: 'dataclasses.Field[Any]') -> Any:
if v is None:
return field.default_factory() \
if field.default_factory != dataclasses.MISSING \
else field.default
if field.metadata and 'transform' in field.metadata:
return field.metadata['transform'](v)
if not isinstance(v, field.type):
raise UsageError(f"Parameter '{field.name}' needs to be of {field.type!s}.")
return v
return cls(**{f.name: _check_field(kwargs[f.name], f)
for f in dataclasses.fields(cls) if f.name in kwargs})
@dataclasses.dataclass
class ReverseDetails(LookupDetails):
""" Collection of parameters for the reverse call.
"""
max_rank: int = dataclasses.field(default=30,
metadata={'transform': lambda v: max(0, min(v, 30))}
)
""" Highest address rank to return.
"""
layers: DataLayer = DataLayer.ADDRESS | DataLayer.POI
""" Filter which kind of data to include.
"""
@dataclasses.dataclass
class SearchDetails(LookupDetails):
""" Collection of parameters for the search call.
"""
max_results: int = 10
""" Maximum number of results to be returned. The actual number of results
may be less.
"""
min_rank: int = dataclasses.field(default=0,
metadata={'transform': lambda v: max(0, min(v, 30))}
)
""" Lowest address rank to return.
"""
max_rank: int = dataclasses.field(default=30,
metadata={'transform': lambda v: max(0, min(v, 30))}
)
""" Highest address rank to return.
"""
layers: Optional[DataLayer] = dataclasses.field(default=None,
metadata={'transform': lambda r : r})
""" Filter which kind of data to include. When 'None' (the default) then
filtering by layers is disabled.
"""
countries: List[str] = dataclasses.field(default_factory=list,
metadata={'transform': format_country})
""" Restrict search results to the given countries. An empty list (the
default) will disable this filter.
"""
excluded: List[int] = dataclasses.field(default_factory=list,
metadata={'transform': format_excluded})
""" List of OSM objects to exclude from the results. Currently only
works when the internal place ID is given.
An empty list (the default) will disable this filter.
"""
viewbox: Optional[Bbox] = dataclasses.field(default=None,
metadata={'transform': Bbox.from_param})
""" Focus the search on a given map area.
"""
bounded_viewbox: bool = False
""" Use 'viewbox' as a filter and restrict results to places within the
given area.
"""
near: Optional[Point] = dataclasses.field(default=None,
metadata={'transform': Point.from_param})
""" Order results by distance to the given point.
"""
near_radius: Optional[float] = dataclasses.field(default=None,
metadata={'transform': lambda r : r})
""" Use near point as a filter and drop results outside the given
radius. Radius is given in degrees WSG84.
"""
categories: List[Tuple[str, str]] = dataclasses.field(default_factory=list,
metadata={'transform': format_categories})
""" Restrict search to places with one of the given class/type categories.
An empty list (the default) will disable this filter.
"""
viewbox_x2: Optional[Bbox] = None
def __post_init__(self) -> None:
if self.viewbox is not None:
xext = (self.viewbox.maxlon - self.viewbox.minlon)/2
yext = (self.viewbox.maxlat - self.viewbox.minlat)/2
self.viewbox_x2 = Bbox(self.viewbox.minlon - xext, self.viewbox.minlat - yext,
self.viewbox.maxlon + xext, self.viewbox.maxlat + yext)
def restrict_min_max_rank(self, new_min: int, new_max: int) -> None:
""" Change the min_rank and max_rank fields to respect the
given boundaries.
"""
assert new_min <= new_max
self.min_rank = max(self.min_rank, new_min)
self.max_rank = min(self.max_rank, new_max)
def is_impossible(self) -> bool:
""" Check if the parameter configuration is contradictionary and
cannot yield any results.
"""
return (self.min_rank > self.max_rank
or (self.bounded_viewbox
and self.viewbox is not None and self.near is not None
and self.viewbox.contains(self.near))
or (self.layers is not None and not self.layers)
or (self.max_rank <= 4 and
self.layers is not None and not self.layers & DataLayer.ADDRESS))
def layer_enabled(self, layer: DataLayer) -> bool:
""" Check if the given layer has been chosen. Also returns
true when layer restriction has been disabled completely.
"""
return self.layers is None or bool(self.layers & layer)

View File

@@ -1,21 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Implementation of API version v1 (aka the legacy version).
"""
#pylint: disable=useless-import-alias
from nominatim.api.v1.server_glue import (ASGIAdaptor as ASGIAdaptor,
EndpointFunc as EndpointFunc,
ROUTES as ROUTES)
import nominatim.api.v1.format as _format
list_formats = _format.dispatch.list_formats
supports_format = _format.dispatch.supports_format
format_result = _format.dispatch.format_result

View File

@@ -1,200 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Hard-coded information about tag categories.
These tables have been copied verbatim from the old PHP code. For future
version a more flexible formatting is required.
"""
from typing import Tuple, Optional, Mapping, Union
import nominatim.api as napi
def get_label_tag(category: Tuple[str, str], extratags: Optional[Mapping[str, str]],
rank: int, country: Optional[str]) -> str:
""" Create a label tag for the given place that can be used as an XML name.
"""
if rank < 26 and extratags and 'place' in extratags:
label = extratags['place']
elif rank < 26 and extratags and 'linked_place' in extratags:
label = extratags['linked_place']
elif category == ('boundary', 'administrative'):
label = ADMIN_LABELS.get((country or '', int(rank/2)))\
or ADMIN_LABELS.get(('', int(rank/2)))\
or 'Administrative'
elif category[1] == 'postal_code':
label = 'postcode'
elif rank < 26:
label = category[1] if category[1] != 'yes' else category[0]
elif rank < 28:
label = 'road'
elif category[0] == 'place'\
and category[1] in ('house_number', 'house_name', 'country_code'):
label = category[1]
else:
label = category[0]
return label.lower().replace(' ', '_')
def bbox_from_result(result: Union[napi.ReverseResult, napi.SearchResult]) -> napi.Bbox:
""" Compute a bounding box for the result. For ways and relations
a given boundingbox is used. For all other object, a box is computed
around the centroid according to dimensions derived from the
search rank.
"""
if (result.osm_object and result.osm_object[0] == 'N') or result.bbox is None:
extent = NODE_EXTENT.get(result.category, 0.00005)
return napi.Bbox.from_point(result.centroid, extent)
return result.bbox
# pylint: disable=line-too-long
OSM_ATTRIBUTION = 'Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright'
OSM_TYPE_NAME = {
'N': 'node',
'W': 'way',
'R': 'relation'
}
ADMIN_LABELS = {
('', 1): 'Continent',
('', 2): 'Country',
('', 3): 'Region',
('', 4): 'State',
('', 5): 'State District',
('', 6): 'County',
('', 7): 'Municipality',
('', 8): 'City',
('', 9): 'City District',
('', 10): 'Suburb',
('', 11): 'Neighbourhood',
('', 12): 'City Block',
('no', 3): 'State',
('no', 4): 'County',
('se', 3): 'State',
('se', 4): 'County'
}
ICONS = {
('boundary', 'administrative'): 'poi_boundary_administrative',
('place', 'city'): 'poi_place_city',
('place', 'town'): 'poi_place_town',
('place', 'village'): 'poi_place_village',
('place', 'hamlet'): 'poi_place_village',
('place', 'suburb'): 'poi_place_village',
('place', 'locality'): 'poi_place_village',
('place', 'airport'): 'transport_airport2',
('aeroway', 'aerodrome'): 'transport_airport2',
('railway', 'station'): 'transport_train_station2',
('amenity', 'place_of_worship'): 'place_of_worship_unknown3',
('amenity', 'pub'): 'food_pub',
('amenity', 'bar'): 'food_bar',
('amenity', 'university'): 'education_university',
('tourism', 'museum'): 'tourist_museum',
('amenity', 'arts_centre'): 'tourist_art_gallery2',
('tourism', 'zoo'): 'tourist_zoo',
('tourism', 'theme_park'): 'poi_point_of_interest',
('tourism', 'attraction'): 'poi_point_of_interest',
('leisure', 'golf_course'): 'sport_golf',
('historic', 'castle'): 'tourist_castle',
('amenity', 'hospital'): 'health_hospital',
('amenity', 'school'): 'education_school',
('amenity', 'theatre'): 'tourist_theatre',
('amenity', 'library'): 'amenity_library',
('amenity', 'fire_station'): 'amenity_firestation3',
('amenity', 'police'): 'amenity_police2',
('amenity', 'bank'): 'money_bank2',
('amenity', 'post_office'): 'amenity_post_office',
('tourism', 'hotel'): 'accommodation_hotel2',
('amenity', 'cinema'): 'tourist_cinema',
('tourism', 'artwork'): 'tourist_art_gallery2',
('historic', 'archaeological_site'): 'tourist_archaeological2',
('amenity', 'doctors'): 'health_doctors',
('leisure', 'sports_centre'): 'sport_leisure_centre',
('leisure', 'swimming_pool'): 'sport_swimming_outdoor',
('shop', 'supermarket'): 'shopping_supermarket',
('shop', 'convenience'): 'shopping_convenience',
('amenity', 'restaurant'): 'food_restaurant',
('amenity', 'fast_food'): 'food_fastfood',
('amenity', 'cafe'): 'food_cafe',
('tourism', 'guest_house'): 'accommodation_bed_and_breakfast',
('amenity', 'pharmacy'): 'health_pharmacy_dispensing',
('amenity', 'fuel'): 'transport_fuel',
('natural', 'peak'): 'poi_peak',
('natural', 'wood'): 'landuse_coniferous_and_deciduous',
('shop', 'bicycle'): 'shopping_bicycle',
('shop', 'clothes'): 'shopping_clothes',
('shop', 'hairdresser'): 'shopping_hairdresser',
('shop', 'doityourself'): 'shopping_diy',
('shop', 'estate_agent'): 'shopping_estateagent2',
('shop', 'car'): 'shopping_car',
('shop', 'garden_centre'): 'shopping_garden_centre',
('shop', 'car_repair'): 'shopping_car_repair',
('shop', 'bakery'): 'shopping_bakery',
('shop', 'butcher'): 'shopping_butcher',
('shop', 'apparel'): 'shopping_clothes',
('shop', 'laundry'): 'shopping_laundrette',
('shop', 'beverages'): 'shopping_alcohol',
('shop', 'alcohol'): 'shopping_alcohol',
('shop', 'optician'): 'health_opticians',
('shop', 'chemist'): 'health_pharmacy',
('shop', 'gallery'): 'tourist_art_gallery2',
('shop', 'jewelry'): 'shopping_jewelry',
('tourism', 'information'): 'amenity_information',
('historic', 'ruins'): 'tourist_ruin',
('amenity', 'college'): 'education_school',
('historic', 'monument'): 'tourist_monument',
('historic', 'memorial'): 'tourist_monument',
('historic', 'mine'): 'poi_mine',
('tourism', 'caravan_site'): 'accommodation_caravan_park',
('amenity', 'bus_station'): 'transport_bus_station',
('amenity', 'atm'): 'money_atm2',
('tourism', 'viewpoint'): 'tourist_view_point',
('tourism', 'guesthouse'): 'accommodation_bed_and_breakfast',
('railway', 'tram'): 'transport_tram_stop',
('amenity', 'courthouse'): 'amenity_court',
('amenity', 'recycling'): 'amenity_recycling',
('amenity', 'dentist'): 'health_dentist',
('natural', 'beach'): 'tourist_beach',
('railway', 'tram_stop'): 'transport_tram_stop',
('amenity', 'prison'): 'amenity_prison',
('highway', 'bus_stop'): 'transport_bus_stop2'
}
NODE_EXTENT = {
('place', 'continent'): 25,
('place', 'country'): 7,
('place', 'state'): 2.6,
('place', 'province'): 2.6,
('place', 'region'): 1.0,
('place', 'county'): 0.7,
('place', 'city'): 0.16,
('place', 'municipality'): 0.16,
('place', 'island'): 0.32,
('place', 'postcode'): 0.16,
('place', 'town'): 0.04,
('place', 'village'): 0.02,
('place', 'hamlet'): 0.02,
('place', 'district'): 0.02,
('place', 'borough'): 0.02,
('place', 'suburb'): 0.02,
('place', 'locality'): 0.01,
('place', 'neighbourhood'): 0.01,
('place', 'quarter'): 0.01,
('place', 'city_block'): 0.01,
('landuse', 'farm'): 0.01,
('place', 'farm'): 0.01,
('place', 'airport'): 0.015,
('aeroway', 'aerodrome'): 0.015,
('railway', 'station'): 0.005
}

Some files were not shown because too many files have changed in this diff Show More