prepare release 4.3.2

adapt typing to newest version of SQLAlchemy
improve code to collect the PostGIS version
2026-02-14 10:27:57 +00:00 · 2023-11-17 10:36:42 +01:00 · 2023-11-17 10:08:54 +01:00 · 2023-11-17 10:06:39 +01:00 · 2023-11-17 10:06:18 +01:00 · 2023-11-17 10:05:37 +01:00
789 changed files with 95105 additions and 20926 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@@ -0,0 +1,2 @@
+github: lonvia
+custom: "https://nominatim.org/funding/"
--- a/.github/ISSUE_TEMPLATE/report-issues-with-search-results.md
+++ b/.github/ISSUE_TEMPLATE/report-issues-with-search-results.md
@@ -7,6 +7,8 @@ assignees: ''

 ---

+<!-- Note: this template is for reporting problems with searching. If you have found an issue with the data, you need to report/fix the issue directly in OpenStreetMap. See https://www.openstreetmap.org/fixthemap for details. -->
+
 ## What did you search for?

 <!-- Please try to provide a link to your search. You  can go to https://nominatim.openstreetmap.org and repeat your search there. If you originally found the issue somewhere else, please tell us what software/website you were using. -->
@@ -15,11 +17,11 @@ assignees: ''

 ## What result did you expect?

-**Is the result in the right place and just named wrongly?** 
+**When the result in the right place and just named wrongly:** 

 <!-- Please tell us the display name you expected. -->

-**Is the result missing completely?**
+**When the result missing completely:**

 <!-- Make sure that the data you are looking for is in OpenStreetMap. Provide a link to the OpenStreetMap object or if you cannot get it, a link to the map on https://openstreetmap.org where you expect the result to be.

--- a/.github/ISSUE_TEMPLATE/report-problems-with-the-software.md
+++ b/.github/ISSUE_TEMPLATE/report-problems-with-the-software.md
@@ -7,10 +7,13 @@ assignees: ''

 ---

-<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first. -->
+<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first.
+
+     Do not send screen shots! Copy any console output directly into the issue.
+ -->

 **Describe the bug**
-<!-- A clear and concise description of what the bug is. -->
+<!-- A clear and concise description of what the bug is.-->

 **To Reproduce**
 <!-- Please describe what you did to get to the issue. -->
@@ -25,12 +28,15 @@ assignees: ''
 - RAM: 
 - number of CPUs:
 - type and size of disks:
- bare metal/AWS/other cloud service: 

 **Postgresql Configuration:**

 <!-- List any configuration items you changed in your postgresql configuration. -->

+**Nominatim Configuration:**
+
+<!-- List the contents of your customized `.env` file. -->
+
 **Additional context**

 <!-- Add any other context about the problem here. -->
--- a/.github/actions/build-nominatim/action.yml
+++ b/.github/actions/build-nominatim/action.yml
@@ -1,25 +1,48 @@
 name: 'Build Nominatim'

+inputs:
+    flavour:
+        description: 'Version of Ubuntu to install on'
+        required: false
+        default: 'ubuntu-20'
+    cmake-args:
+        description: 'Additional options to hand to cmake'
+        required: false
+        default: ''
+    lua:
+        description: 'Version of Lua to use'
+        required: false
+        default: '5.3'
+
 runs:
    using: "composite"

    steps:
-        - name: Install prerequisites
+        - name: Clean out the disk
          run: |
-            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu python3-argparse-manpage
+            sudo rm -rf /opt/hostedtoolcache/go /opt/hostedtoolcache/CodeQL /usr/lib/jvm /usr/local/share/chromium /usr/local/lib/android
+            df -h
          shell: bash
-
-        - name: Download dependencies
+        - name: Install${{ matrix.flavour }} prerequisites
          run: |
-              if [ ! -f country_grid.sql.gz ]; then
-                  wget --no-verbose https://www.nominatim.org/data/country_grid.sql.gz
-              fi
-              cp country_grid.sql.gz Nominatim/data/country_osm_grid.sql.gz
+            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev
+            if [ "$FLAVOUR" == "oldstuff" ]; then
+                pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg
+            else
+                sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
+                pip3 install sqlalchemy psycopg
+            fi
          shell: bash
+          env:
+            FLAVOUR: ${{ inputs.flavour }}
+            CMAKE_ARGS: ${{ inputs.cmake-args }}
+            LUA_VERSION: ${{ inputs.lua }}

        - name: Configure
-          run: mkdir build && cd build && cmake ../Nominatim
+          run: mkdir build && cd build && cmake $CMAKE_ARGS ../Nominatim
          shell: bash
+          env:
+            CMAKE_ARGS: ${{ inputs.cmake-args }}

        - name: Build
          run: |
--- a/.github/actions/setup-postgresql/action.yml
+++ b/.github/actions/setup-postgresql/action.yml
@@ -14,13 +14,17 @@ runs:
    steps:
        - name: Remove existing PostgreSQL
          run: |
-              sudo apt-get update -qq
              sudo apt-get purge -yq postgresql*
+              sudo apt install curl ca-certificates gnupg
+              curl https://www.postgresql.org/media/keys/ACCC4CF8.asc | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/apt.postgresql.org.gpg >/dev/null
+              sudo sh -c 'echo "deb https://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
+              sudo apt-get update -qq
+
          shell: bash

        - name: Install PostgreSQL
          run: |
-              sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER} postgresql-server-dev-${PGVER}
+              sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER}
          shell: bash
          env:
              PGVER: ${{ inputs.postgresql-version }}
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -3,57 +3,127 @@ name: CI Tests
 on: [ push, pull_request ]

 jobs:
-    tests:
-        runs-on: ubuntu-20.04
-
-        strategy:
-            matrix:
-                postgresql: [9.5, 13]
-                include:
-                    - postgresql: 9.5
-                      postgis: 2.5
-                    - postgresql: 13
-                      postgis: 3
+    create-archive:
+        runs-on: ubuntu-latest

        steps:
-            - uses: actions/checkout@v2
+            - uses: actions/checkout@v4
              with:
-                  submodules: true
-                  path: Nominatim
+                submodules: true
+
+            - uses: actions/cache@v3
+              with:
+                  path: |
+                     data/country_osm_grid.sql.gz
+                  key: nominatim-country-data-1
+
+            - name: Package tarball
+              run: |
+                  if [ ! -f data/country_osm_grid.sql.gz ]; then
+                      wget --no-verbose -O data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
+                  fi
+                  cd ..
+                  tar czf nominatim-src.tar.bz2 Nominatim
+                  mv nominatim-src.tar.bz2 Nominatim
+
+            - name: 'Upload Artifact'
+              uses: actions/upload-artifact@v3
+              with:
+                  name: full-source
+                  path: nominatim-src.tar.bz2
+                  retention-days: 1
+
+    tests:
+        needs: create-archive
+        strategy:
+            matrix:
+                flavour: [oldstuff, "ubuntu-20", "ubuntu-22"]
+                include:
+                    - flavour: oldstuff
+                      ubuntu: 20
+                      postgresql: '9.6'
+                      postgis: '2.5'
+                      php: '7.3'
+                      lua: '5.1'
+                    - flavour: ubuntu-20
+                      ubuntu: 20
+                      postgresql: 13
+                      postgis: 3
+                      php: '7.4'
+                      lua: '5.3'
+                    - flavour: ubuntu-22
+                      ubuntu: 22
+                      postgresql: 15
+                      postgis: 3
+                      php: '8.1'
+                      lua: '5.3'
+
+        runs-on: ubuntu-${{ matrix.ubuntu }}.04
+
+        steps:
+            - uses: actions/download-artifact@v3
+              with:
+                  name: full-source
+
+            - name: Unpack Nominatim
+              run: tar xf nominatim-src.tar.bz2

            - name: Setup PHP
              uses: shivammathur/setup-php@v2
              with:
-                  php-version: '7.4'
-                  tools: phpunit, phpcs
+                  php-version: ${{ matrix.php }}
+                  tools: phpunit:9, phpcs, composer
+                  ini-values: opcache.jit=disable
+              env:
+                  GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

-            - name: Get Date
-              id: get-date
-              run: |
-                  echo "::set-output name=date::$(/bin/date -u "+%Y%W")"
-              shell: bash
-
-            - uses: actions/cache@v2
+            - uses: actions/setup-python@v4
              with:
-                  path: |
-                     country_grid.sql.gz
-                  key: nominatim-country-data-${{ steps.get-date.outputs.date }}
+                python-version: 3.7
+              if: matrix.flavour == 'oldstuff'

            - uses: ./Nominatim/.github/actions/setup-postgresql
              with:
                  postgresql-version: ${{ matrix.postgresql }}
                  postgis-version: ${{ matrix.postgis }}
-            - uses: ./Nominatim/.github/actions/build-nominatim

-            - name: Install test prerequsites
-              run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave
+            - uses: ./Nominatim/.github/actions/build-nominatim
+              with:
+                  flavour: ${{ matrix.flavour }}
+                  lua: ${{ matrix.lua }}
+
+            - name: Install test prerequsites (behave from apt)
+              run: sudo apt-get install -y -qq python3-behave
+              if: matrix.flavour == 'ubuntu-20'
+
+            - name: Install test prerequsites (behave from pip)
+              run: pip3 install behave==1.2.6
+              if: (matrix.flavour == 'oldstuff') || (matrix.flavour == 'ubuntu-22')
+
+            - name: Install test prerequsites (from apt for Ununtu 2x)
+              run: sudo apt-get install -y -qq python3-pytest python3-pytest-asyncio uvicorn
+              if: matrix.flavour != 'oldstuff'
+
+            - name: Install newer pytest-asyncio
+              run: pip3 install -U pytest-asyncio
+              if: matrix.flavour == 'ubuntu-20'
+
+            - name: Install test prerequsites (from pip for Ubuntu 18)
+              run: pip3 install pytest pytest-asyncio uvicorn
+              if: matrix.flavour == 'oldstuff'
+
+            - name: Install Python webservers
+              run: pip3 install falcon starlette
+
+            - name: Install latest pylint
+              run: pip3 install -U pylint asgi_lifespan

            - name: PHP linting
              run: phpcs --report-width=120 .
              working-directory: Nominatim

            - name: Python linting
-              run: pylint --extension-pkg-whitelist=osmium nominatim
+              run: python3 -m pylint nominatim
              working-directory: Nominatim

            - name: PHP unit tests
@@ -61,80 +131,218 @@ jobs:
              working-directory: Nominatim/test/php

            - name: Python unit tests
-              run: py.test-3 test/python
+              run: python3 -m pytest test/python
              working-directory: Nominatim

            - name: BDD tests
-              run: behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
+              run: |
+                  python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
              working-directory: Nominatim/test/bdd

-    import:
+            - name: Install mypy and typechecking info
+              run: pip3 install -U mypy osmium uvicorn types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests types-ujson types-Pygments typing-extensions
+              if: matrix.flavour != 'oldstuff'
+
+            - name: Python static typechecking
+              run: python3 -m mypy --strict nominatim
+              working-directory: Nominatim
+              if: matrix.flavour != 'oldstuff'
+
+    legacy-test:
+        needs: create-archive
        runs-on: ubuntu-20.04

        steps:
-            - uses: actions/checkout@v2
+            - uses: actions/download-artifact@v3
              with:
-                  submodules: true
-                  path: Nominatim
+                  name: full-source

-            - name: Get Date
-              id: get-date
-              run: |
-                  echo "::set-output name=date::$(/bin/date -u "+%Y%W")"
-              shell: bash
+            - name: Unpack Nominatim
+              run: tar xf nominatim-src.tar.bz2

-            - uses: actions/cache@v2
+            - name: Setup PHP
+              uses: shivammathur/setup-php@v2
              with:
-                  path: |
-                     country_grid.sql.gz
-                  key: nominatim-country-data-${{ steps.get-date.outputs.date }}
-
-            - uses: actions/cache@v2
-              with:
-                  path: |
-                     monaco-latest.osm.pbf
-                  key: nominatim-test-data-${{ steps.get-date.outputs.date }}
+                  php-version: '7.4'

            - uses: ./Nominatim/.github/actions/setup-postgresql
              with:
                  postgresql-version: 13
                  postgis-version: 3
-            - uses: ./Nominatim/.github/actions/build-nominatim

-            - name: Clean installation
-              run: rm -rf Nominatim build
+            - name: Install Postgresql server dev
+              run: sudo apt-get install postgresql-server-dev-13
+
+            - uses: ./Nominatim/.github/actions/build-nominatim
+              with:
+                  cmake-args: -DBUILD_MODULE=on
+
+            - name: Install test prerequsites
+              run: sudo apt-get install -y -qq python3-behave
+
+            - name: BDD tests (legacy tokenizer)
+              run: |
+                  python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy --format=progress3
+              working-directory: Nominatim/test/bdd
+
+
+    python-api-test:
+        needs: create-archive
+        runs-on: ubuntu-22.04
+
+        steps:
+            - uses: actions/download-artifact@v3
+              with:
+                  name: full-source
+
+            - name: Unpack Nominatim
+              run: tar xf nominatim-src.tar.bz2
+
+            - uses: ./Nominatim/.github/actions/setup-postgresql
+              with:
+                  postgresql-version: 15
+                  postgis-version: 3
+
+            - uses: ./Nominatim/.github/actions/build-nominatim
+              with:
+                  flavour: 'ubuntu-22'
+
+            - name: Install test prerequsites
+              run: sudo apt-get install -y -qq python3-behave
+
+            - name: Install Python webservers
+              run: pip3 install starlette asgi_lifespan httpx
+
+            - name: BDD tests (starlette)
+              run: |
+                  python3 -m behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DAPI_ENGINE=starlette --format=progress3
+              working-directory: Nominatim/test/bdd
+
+
+    install:
+        runs-on: ubuntu-latest
+        needs: create-archive
+
+        strategy:
+            matrix:
+                name: [Ubuntu-20, Ubuntu-22]
+                include:
+                    - name: Ubuntu-20
+                      image: "ubuntu:20.04"
+                      ubuntu: 20
+                      install_mode: install-apache
+                    - name: Ubuntu-22
+                      image: "ubuntu:22.04"
+                      ubuntu: 22
+                      install_mode: install-apache
+
+        container:
+            image: ${{ matrix.image }}
+            env:
+                LANG: en_US.UTF-8
+
+        defaults:
+            run:
+                shell: sudo -Hu nominatim bash --noprofile --norc -eo pipefail {0}
+
+        steps:
+            - name: Prepare container (Ubuntu)
+              run: |
+                  export APT_LISTCHANGES_FRONTEND=none
+                  export DEBIAN_FRONTEND=noninteractive
+                  apt-get update -qq
+                  apt-get install -y git sudo wget
+                  ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
              shell: bash

+            - name: Setup import user
+              run: |
+                  useradd -m nominatim
+                  echo 'nominatim   ALL=(ALL:ALL) NOPASSWD: ALL' > /etc/sudoers.d/nominiatim
+                  echo "/home/nominatim/Nominatim/vagrant/Install-on-${OS}.sh no $INSTALL_MODE" > /home/nominatim/vagrant.sh
+              shell: bash
+              env:
+                OS: ${{ matrix.name }}
+                INSTALL_MODE: ${{ matrix.install_mode }}
+
+            - uses: actions/download-artifact@v3
+              with:
+                  name: full-source
+                  path: /home/nominatim
+
+            - name: Install Nominatim
+              run: |
+                export USERNAME=nominatim
+                export USERHOME=/home/nominatim
+                export NOSYSTEMD=yes
+                export HAVE_SELINUX=no
+                tar xf nominatim-src.tar.bz2
+                . vagrant.sh
+              working-directory: /home/nominatim
+
            - name: Prepare import environment
              run: |
-                  if [ ! -f monaco-latest.osm.pbf ]; then
-                      wget --no-verbose https://download.geofabrik.de/europe/monaco-latest.osm.pbf
-                  fi
-                  mkdir data-env
-                  cd data-env
-              shell: bash
+                  mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
+                  mv Nominatim/settings/flex-base.lua flex-base.lua
+                  mv Nominatim/settings/import-extratags.lua import-extratags.lua
+                  mv Nominatim/settings/taginfo.lua taginfo.lua
+                  rm -rf Nominatim
+                  mkdir data-env-reverse
+              working-directory: /home/nominatim
+
+            - name: Print version
+              run: nominatim --version
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Print taginfo
+              run: lua taginfo.lua
+              working-directory: /home/nominatim
+
+            - name: Collect host OS information
+              run: nominatim admin --collect-os-info
+              working-directory: /home/nominatim/nominatim-project

            - name: Import
-              run: nominatim import --osm-file ../monaco-latest.osm.pbf
-              shell: bash
-              working-directory: data-env
+              run: nominatim import --osm-file ../test.pbf
+              working-directory: /home/nominatim/nominatim-project

            - name: Import special phrases
              run: nominatim special-phrases --import-from-wiki
-              working-directory: data-env
+              working-directory: /home/nominatim/nominatim-project

-            - name: Check import
+            - name: Check full import
              run: nominatim admin --check-database
-              working-directory: data-env
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Warm up database
+              run: nominatim admin --warm
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Prepare update (Ubuntu)
+              run: apt-get install -y python3-pip
+              shell: bash

            - name: Run update
              run: |
-                   nominatim replication --init
-                   nominatim replication --once
-              working-directory: data-env
+                  pip3 install --user osmium
+                  nominatim replication --init
+                  NOMINATIM_REPLICATION_MAX_DIFF=1 nominatim replication --once
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Clean up database
+              run: nominatim refresh --postcodes --word-tokens
+              working-directory: /home/nominatim/nominatim-project

            - name: Run reverse-only import
-              run : nominatim import --osm-file ../monaco-latest.osm.pbf --reverse-only
-              working-directory: data-env
-              env:
-                  NOMINATIM_DATABASE_DSN: pgsql:dbname=reverse
+              run : |
+                  echo 'NOMINATIM_DATABASE_DSN="pgsql:dbname=reverse"' >> .env
+                  nominatim import --osm-file ../test.pbf --reverse-only --no-updates
+              working-directory: /home/nominatim/data-env-reverse
+
+            - name: Check reverse-only import
+              run: nominatim admin --check-database
+              working-directory: /home/nominatim/data-env-reverse
+
+            - name: Clean up database (reverse-only import)
+              run: nominatim refresh --postcodes --word-tokens
+              working-directory: /home/nominatim/nominatim-project
--- a/.gitignore
+++ b/.gitignore
@@ -1,12 +1,9 @@
 *.log
 *.pyc

-build
-settings/local.php
+docs/develop/*.png

-data/wiki_import.sql
-data/wiki_specialphrases.sql
-data/osmosischange.osc
+build

 .vagrant
 data/country_osm_grid.sql.gz
--- a/.mypy.ini
+++ b/.mypy.ini
@@ -0,0 +1,23 @@
+[mypy]
+plugins = sqlalchemy.ext.mypy.plugin
+
+[mypy-sanic_cors.*]
+ignore_missing_imports = True
+
+[mypy-icu.*]
+ignore_missing_imports = True
+
+[mypy-asyncpg.*]
+ignore_missing_imports = True
+
+[mypy-datrie.*]
+ignore_missing_imports = True
+
+[mypy-dotenv.*]
+ignore_missing_imports = True
+
+[mypy-falcon.*]
+ignore_missing_imports = True
+
+[mypy-geoalchemy2.*]
+ignore_missing_imports = True
--- a/.pylintrc
+++ b/.pylintrc
@@ -1,7 +1,7 @@
 [MASTER]

-extension-pkg-whitelist=osmium
-ignored-modules=icu
+extension-pkg-whitelist=osmium,falcon
+ignored-modules=icu,datrie

 [MESSAGES CONTROL]

@@ -10,3 +10,9 @@ ignored-modules=icu
 # closing added here because it sometimes triggers a false positive with
 # 'with' statements.
 ignored-classes=NominatimArgs,closing
+# 'too-many-ancestors' is triggered already by deriving from UserDict
+# 'not-context-manager' disabled because it causes false positives once
+#   typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
+disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager,use-dict-literal,chained-comparison,attribute-defined-outside-init
+
+good-names=i,j,x,y,m,t,fd,db,cc,x1,x2,y1,y2,pt,k,v,nr
--- a/16
+++ b/16
@@ -1,15 +1,15 @@
 Nominatim was written by:

-  Brian Quinion
-  Sarah Hoffmann
-  Marc Tobias Metten
+* Brian Quinion
+* Sarah Hoffmann
+* Marc Tobias Metten

-  markigail
-  gemo1011
-  IrlJidel
-  Frederik Ramm
+* markigail
+* AntoJvlt
+* gemo1011
+* darkshredder

 and many more.

-For a full list of contributors see
+For a full list of contributors see the Git logs or visit
 https://github.com/openstreetmap/Nominatim/graphs/contributors
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,14 +18,25 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")

 project(nominatim)

-set(NOMINATIM_VERSION_MAJOR 3)
-set(NOMINATIM_VERSION_MINOR 7)
-set(NOMINATIM_VERSION_PATCH 0)
+set(NOMINATIM_VERSION_MAJOR 4)
+set(NOMINATIM_VERSION_MINOR 3)
+set(NOMINATIM_VERSION_PATCH 2)

 set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")

 add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")

+# Setting GIT_HASH
+find_package(Git)
+if (GIT_FOUND)
+    execute_process(
+        COMMAND "${GIT_EXECUTABLE}" log -1 --format=%h
+        WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
+        OUTPUT_VARIABLE GIT_HASH
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        ERROR_QUIET
+        )
+endif()

 #-----------------------------------------------------------------------------
 #  Configuration
@@ -33,11 +44,12 @@ add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")

 set(BUILD_IMPORTER on CACHE BOOL "Build everything for importing/updating the database")
 set(BUILD_API on CACHE BOOL "Build everything for the API server")
-set(BUILD_MODULE on CACHE BOOL "Build PostgreSQL module")
+set(BUILD_MODULE off CACHE BOOL "Build PostgreSQL module for legacy tokenizer")
 set(BUILD_TESTS on CACHE BOOL "Build test suite")
 set(BUILD_DOCS on CACHE BOOL "Build documentation")
 set(BUILD_MANPAGE on CACHE BOOL "Build Manual Page")
 set(BUILD_OSM2PGSQL on CACHE BOOL "Build osm2pgsql (expert only)")
+set(INSTALL_MUNIN_PLUGINS on CACHE BOOL "Install Munin plugins for supervising Nominatim")

 #-----------------------------------------------------------------------------
 #  osm2pgsql (imports/updates only)
@@ -51,7 +63,6 @@ if (BUILD_IMPORTER AND BUILD_OSM2PGSQL)
    endif()
    set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
    set(BUILD_TESTS off)
-    set(WITH_LUA off CACHE BOOL "")
    add_subdirectory(osm2pgsql)
    set(BUILD_TESTS ${BUILD_TESTS_SAVED})
 endif()
@@ -62,7 +73,7 @@ endif()
 #-----------------------------------------------------------------------------

 if (BUILD_IMPORTER)
-    find_package(PythonInterp 3.5 REQUIRED)
+    find_package(PythonInterp 3.7 REQUIRED)
 endif()

 #-----------------------------------------------------------------------------
@@ -81,16 +92,6 @@ if (BUILD_API OR BUILD_IMPORTER)
    else()
        message (STATUS "Using PHP binary " ${PHP_BIN})
    endif()
-    if (NOT PHPCGI_BIN)
-        find_program (PHPCGI_BIN php-cgi)
-    endif()
-    # sanity check if PHP binary exists
-    if (NOT EXISTS ${PHPCGI_BIN})
-        message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
-        set (PHPCGI_BIN "")
-    else()
-        message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
-    endif()
 endif()

 #-----------------------------------------------------------------------------
@@ -109,21 +110,6 @@ if (BUILD_IMPORTER)
                           "    wget -O ${PROJECT_SOURCE_DIR}/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz")
   endif()

-   set(CUSTOMSCRIPTS
-       check_import_finished.php
-       country_languages.php
-       export.php
-       query.php
-       setup.php
-       update.php
-       warm.php
-      )
-
-   foreach (script_source ${CUSTOMSCRIPTS})
-       configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl
-                      ${PROJECT_BINARY_DIR}/utils/${script_source})
-   endforeach()
-
   configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl
                  ${PROJECT_BINARY_DIR}/nominatim)
 endif()
@@ -168,7 +154,7 @@ if (BUILD_TESTS)
    if (PHPCS)
        message(STATUS "Using phpcs binary ${PHPCS}")
        add_test(NAME phpcs
-                 COMMAND ${PHPCS} --report-width=120 --colors lib website utils
+                 COMMAND ${PHPCS} --report-width=120 --colors lib-php
                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
    else()
        message(WARNING "phpcs not found. PHP linting tests disabled." )
@@ -214,7 +200,7 @@ endif()
 #-----------------------------------------------------------------------------

 if (BUILD_MANPAGE)
-   add_subdirectory(manual)
+   add_subdirectory(man)
 endif()

 #-----------------------------------------------------------------------------
@@ -226,6 +212,7 @@ include(GNUInstallDirs)
 set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
 set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
 set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
+set(NOMINATIM_MUNINDIR ${CMAKE_INSTALL_FULL_DATADIR}/munin/plugins)

 if (BUILD_IMPORTER)
    configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
@@ -236,11 +223,17 @@ if (BUILD_IMPORTER)
    install(DIRECTORY nominatim
            DESTINATION ${NOMINATIM_LIBDIR}/lib-python
            FILES_MATCHING PATTERN "*.py"
+            PATTERN "paths.py" EXCLUDE
            PATTERN __pycache__ EXCLUDE)
+
+    configure_file(${PROJECT_SOURCE_DIR}/cmake/paths-py.tmpl paths-py.installed)
+    install(FILES ${PROJECT_BINARY_DIR}/paths-py.installed
+            DESTINATION ${NOMINATIM_LIBDIR}/lib-python/nominatim
+            RENAME paths.py)
+
    install(DIRECTORY lib-sql DESTINATION ${NOMINATIM_LIBDIR})

-    install(FILES data/country_name.sql
-                  ${COUNTRY_GRID_FILE}
+    install(FILES ${COUNTRY_GRID_FILE}
                  data/words.sql
            DESTINATION ${NOMINATIM_DATADIR})
 endif()
@@ -268,9 +261,24 @@ endif()
 install(FILES settings/env.defaults
              settings/address-levels.json
              settings/phrase-settings.json
-              settings/import-admin.style
-              settings/import-street.style
-              settings/import-address.style
-              settings/import-full.style
-              settings/import-extratags.style
+              settings/import-admin.lua
+              settings/import-street.lua
+              settings/import-address.lua
+              settings/import-full.lua
+              settings/import-extratags.lua
+              settings/flex-base.lua
+              settings/icu_tokenizer.yaml
+              settings/country_settings.yaml
        DESTINATION ${NOMINATIM_CONFIGDIR})
+
+install(DIRECTORY settings/icu-rules
+        DESTINATION ${NOMINATIM_CONFIGDIR})
+install(DIRECTORY settings/country-names
+        DESTINATION ${NOMINATIM_CONFIGDIR})
+
+if (INSTALL_MUNIN_PLUGINS)
+    install(FILES munin/nominatim_importlag
+                  munin/nominatim_query_speed
+                  munin/nominatim_requests
+            DESTINATION ${NOMINATIM_MUNINDIR})
+endif()
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -36,7 +36,7 @@ Nominatim historically hasn't followed a particular coding style but we
 are in process of consolidating the style. The following rules apply:

 * Python code uses the official Python style
- * indention
+ * indentation
   * SQL use 2 spaces
   * all other file types use 4 spaces
   * [BSD style](https://en.wikipedia.org/wiki/Indent_style#Allman_style) for braces
@@ -64,3 +64,39 @@ Before submitting a pull request make sure that the tests pass:
  cd build
  make test
 ```
+
+## Releases
+
+Nominatim follows semantic versioning. Major releases are done for large changes
+that require (or at least strongly recommend) a reimport of the databases.
+Minor releases can usually be applied to exisiting databases. Patch releases
+contain bug fixes only and are released from a separate branch where the
+relevant changes are cherry-picked from the master branch.
+
+Checklist for releases:
+
+* [ ] increase version in `nominatim/version.py` and CMakeLists.txt
+* [ ] update `ChangeLog` (copy information from patch releases from release branch)
+* [ ] complete `docs/admin/Migration.md`
+* [ ] update EOL dates in `SECURITY.md`
+* [ ] commit and make sure CI tests pass
+* [ ] test migration
+  * download, build and import previous version
+  * migrate using master version
+  * run updates using master version
+* [ ] prepare tarball:
+  * `git clone --recursive https://github.com/osm-search/Nominatim` (switch to right branch!)
+  * `rm -r .git* osm2pgsql/.git*`
+  * copy country data into `data/`
+  * add version to base directory and package
+* [ ] upload tarball to https://nominatim.org
+* [ ] prepare documentation
+  * check out new docs branch
+  * change git checkout instructions to tarball download instructions or adapt version on existing ones
+  * build documentation and copy to https://github.com/osm-search/nominatim-org-site
+  * add new version to history
+* [ ] check release tarball
+  * download tarball as per new documentation instructions
+  * compile and import Nominatim
+  * run `nominatim --version` to confirm correct version
+* [ ] tag new release and add a release on github.com
--- a/196
+++ b/196
@@ -1,3 +1,198 @@
+4.3.2
+ * fix potential SQL injection issue for 'nominatim admin --collect-os-info'
+ * PHP frontend: fix on-the-fly lookup of postcode areas near boundaries
+ * Python frontend: improve handling of viewbox
+ * Python frontend: correct deployment instructions
+
+4.3.1
+ * reintroduce result rematching
+ * improve search of multi-part names
+ * fix accidentally switched meaning of --reverse-only and --search-only in
+   warm command
+
+4.3.0
+ * fix failing importance recalculation command
+ * fix merging of linked names into unnamed boundaries
+ * fix a number of corner cases with interpolation splitting resulting in
+   invalid geometries
+ * fix failure in website generation when password contains curly brackets
+ * fix broken use of ST_Project in PostGIS 3.4
+ * new NOMINATIM_SEARCH_WITHIN_COUNTRIES setting to restrict reverse lookups
+   to known countries (thanks @alfmarcua)
+ * allow negative OSM IDs (thanks @alfmarcua)
+ * disallow import of Tiger data in a frozen DB
+ * avoid UPDATE to change settings to be compatible with r/o DBs (thanks @t-tomek)
+ * update bundled osm2pgsql to 1.9.2
+ * reorganise osm2pgsql flex style and make it the default
+ * exclude names ending in :wikipedia from indexing
+ * no longer accept comma as a list separator in name tags
+ * process forward dependencies on update to catch updates in geometries
+   of ways and relations
+ * fix handling of isolated silent letters during transliteration
+ * no longer assign postcodes to large linear features like rivers
+ * introduce nominatim.paths module for finding data and libraries
+ * documentation layout changed to material theme
+ * new documentation section for library
+ * various smaller fixes to existing documentation
+   (thanks @woodpeck, @bloom256, @biswajit-k)
+ * updates to vagrant install scripts, drop support for Ubunut 18
+   (thanks @n-timofeev)
+ * removed obsolete configuration variables from env.defaults
+ * add script for generating a taginfo description (thanks @biswajit-k)
+ * modernize Python code around BDD test and add testing of Python frontend
+ * lots of new BDD tests for API output
+
+4.2.3
+
+ * fix deletion handling for 'nominatim add-data'
+ * adapt place_force_delete() to new deletion handling
+ * flex style: avoid dropping of postcode areas
+ * fix update errors on address interpolation handling
+
+4.2.2
+
+ * extend flex-style library to fully support all default styles
+ * fix handling of Hebrew aleph
+ * do not assign postcodes to rivers
+ * fix string matching in PHP code
+ * update osm2pgsql (various updates to flex)
+ * fix slow query when deleting places on update
+ * fix CLI details query
+ * fix recalculation of importance values
+ * fix polygon simplification in reverse results
+ * add class/type information to reverse geocodejson result
+ * minor improvements to default tokenizer configuration
+ * various smaller fixes to documentation
+
+4.2.1
+
+ * fix XSS vulnerability in debug view
+
+4.2.0
+
+ * add experimental support for osm2pgsql flex style
+ * introduce secondary importance value to be retrieved from a raster data file
+   (currently still unused, to replace address importance, thanks to @tareqpi)
+ * add new report tool `nominatim admin --collect-os-info`
+   (thanks @micahcochran, @tareqpi)
+ * reorganise index to improve lookup performance and size
+ * run index creation after import in parallel
+ * run ANALYZE more selectively to speed up continuation of indexing
+ * fix crash on update when addr:interpolation receives an illegal value
+ * fix minimum number of retrieved results to be at least 10
+ * fix search for combinations of special term + name (e.g Hotel Bellevue)
+ * do not return interpolations without a parent street on reverse search
+ * improve invalidation of linked places on updates
+ * fix address parsing for interpolation lines
+ * make sure socket timeouts are respected during replication
+   (working around a bug in some versions of pyosmium)
+ * update bundled osm2pgsql to 1.7.1
+ * add support for PostgreSQL 15
+ * typing fixes to work with latest type annotations from typeshed
+ * smaller improvements to documentation (thanks to @mausch)
+
+4.1.1
+
+ * fix XSS vulnerability in debug view
+
+4.1.0
+
+ * switch to ICU tokenizer as default
+ * add housenumber normalization and support optional spaces during search
+ * add postcode format checking and support optional spaces during search
+ * add function for cleaning housenumbers in word table
+ * add updates/deletion of country names imported from OSM
+ * linked places no longer overwrite names from a place permanently
+ * move default country name configuration into yaml file (thanks @tareqpi)
+ * more compact layout for interpolation and TIGER tables
+ * introduce mutations to ICU tokenizer (used for German umlauts)
+ * support reinitializing a full project directory with refresh --website
+ * fix various issues with linked places on updates
+ * add support for external sanitizers and token analyzers
+ * add CLI commands for forced indexing
+ * add CLI command for version report
+ * add offline import mode
+ * change geocodejson to return a feature class in the 'type' field
+ * add ISO3166-2 to address output (thanks @I70l0teN4ik)
+ * improve parsing and matching of addr: tags
+ * support relations as street members of associatedStreet
+ * better ranking for address results from TIGER data
+ * adapt rank classification to changed tag usage in OSM
+ * update bundled osm2pgsql to 1.6.0
+ * add typing information to Python code
+ * improve unit test coverage
+ * reorganise and speed up code for BDD tests, drop support for scenes
+ * move PHP unit tests to PHP 9.5
+ * extensive typo fixes in documentation (thanks @woodpeck,@StephanGeorg,
+   @amandasaurus, @nslxndr, @stefkiourk, @Luflosi, @kianmeng)
+ * drop official support for installation on CentOS
+ * add installation instructions for Ubuntu 22.04
+ * add support for PHP8
+ * add setup instructions for updates and systemd
+ * drop support for PostgreSQL 9.5
+
+4.0.2
+
+ * fix XSS vulnerability in debug view
+
+4.0.1
+
+ * fix initialisation error in replication script
+ * ICU tokenizer: avoid any special characters in word tokens
+ * better error message when API php script does not exist
+ * fix quoting of house numbers in SQL queries
+ * small fixes and improvements in search query parsing
+ * add documentation for moving the database to a different machine
+
+4.0.0
+
+ * refactor name token computation and introduce ICU tokenizer
+   * name processing now happens in the indexer outside the DB
+   * reorganizes abbreviation handling and moves it to the indexing phases
+   * adds preprocessing of names
+ * add country-specific ranking for Spain, Slovakia
+ * partially switch to using SP-GIST indexes
+ * better updating of dependent addresses for name changes in streets
+ * remove unused/broken tables for external housenumbers
+ * move external postcodes to CSV format and no longer save them in tables
+   (adds support for postcodes for arbitrary countries)
+ * remove postcode helper entries from placex (thanks @AntoJvlt)
+ * change required format for TIGER data to CSV
+ * move configuration of default languages from wiki into config file
+ * expect customized configuration files in project directory by default
+ * disable search API for reverse-only import (thanks @darkshredder)
+ * port most of maintenance/import code to Python and remove PHP utils
+ * add catch-up mode for replication
+ * add updating of special phrases (thanks @AntoJvlt)
+ * add support for special phrases in CSV files (thanks @AntoJvlt)
+ * switch to case-independent matching between place and boundary names
+ * remove disabling of reverse query parsing
+ * minor tweaks to search algorithm to avoid more false positives
+ * major overhaul of the administrator and developer documentation
+ * add security disclosure policy
+ * add testing of installation scripts via CI
+ * drop support for Python < 3.6 and Postgresql < 9.5
+
+3.7.3
+
+ * fix XSS vulnerability in debug view
+
+3.7.2
+
+ * fix database check for reverse-only imports
+ * do not error out in status API result when import date is missing
+ * add array_key_last function for PHP < 7.3 (thanks to @woodpeck)
+ * fix more url when server name is unknown (thanks to @mogita)
+ * commit changes to replication log table
+
+3.7.1
+
+ * fix smaller issues with special phrases import (thanks @AntoJvlt)
+ * add index to speed up continued indexing during import
+ * fix index on location_property_tiger(parent_place_id) (thanks @changpingc)
+ * make sure Python code is backward-compatible with Python 3.5
+ * various documentation fixes
+
 3.7.0

 * switch to dotenv for configuration file
@@ -20,7 +215,6 @@
 * add non-key indexes to speed up housenumber + street searches
 * switch housenumber field in placex to save transliterated names

-
 3.6.0

 * add full support for searching by and displaying of addr:* tags
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
 [![Build Status](https://github.com/osm-search/Nominatim/workflows/CI%20Tests/badge.svg)](https://github.com/osm-search/Nominatim/actions?query=workflow%3A%22CI+Tests%22)
+[![codecov](https://codecov.io/gh/osm-search/Nominatim/branch/master/graph/badge.svg?token=8P1LXrhCMy)](https://codecov.io/gh/osm-search/Nominatim)

 Nominatim
 =========
@@ -19,14 +20,6 @@ https://nominatim.org/release-docs/develop/ .
 Installation
 ============

-**Nominatim is a complex piece of software and runs in a complex environment.
-Installing and running Nominatim is something for experienced system
-administrators only who can do some trouble-shooting themselves. We are sorry,
-but we can not provide installation support. We are all doing this in our free
-time and there is just so much of that time to go around. Do not open issues in
-our bug tracker if you need help. Use the discussions forum
-or ask for help on [help.openstreetmap.org](https://help.openstreetmap.org/).**
-
 The latest stable release can be downloaded from https://nominatim.org.
 There you can also find [installation instructions for the release](https://nominatim.org/release-docs/latest/admin/Installation), as well as an extensive [Troubleshooting/FAQ section](https://nominatim.org/release-docs/latest/admin/Faq/).

--- a/SECURITY.md
+++ b/SECURITY.md
@@ -0,0 +1,40 @@
+# Security Policy
+
+## Supported Versions
+
+All Nominatim releases receive security updates for two years.
+
+The following table lists the end of support for all currently supported
+versions.
+
+| Version | End of support for security updates |
+| ------- | ----------------------------------- |
+| 4.3.x   | 2025-09-07                          |
+| 4.2.x   | 2024-11-24                          |
+| 4.1.x   | 2024-08-05                          |
+| 4.0.x   | 2023-11-02                          |
+
+## Reporting a Vulnerability
+
+If you believe, you have found an issue in Nominatim that has implications on
+security, please send a description of the issue to **security@nominatim.org**.
+You will receive an acknowledgement of your mail within 3 work days where we
+also notify you of the next steps.
+
+## How we Disclose Security Issues
+
+** The following section only applies to security issues found in released
+versions. Issues that concern the master development branch only will be
+fixed immediately on the branch with the corresponding PR containing the
+description of the nature and severity of the issue. **
+
+Patches for identified security issues are applied to all affected versions and
+new minor versions are released. At the same time we release a statement at
+the [Nominatim blog](https://nominatim.org/blog/) describing the nature of the
+incident. Announcements will also be published at the
+[geocoding mailinglist](https://lists.openstreetmap.org/listinfo/geocoding).
+
+## List of Previous Incidents
+
+* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
+* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
--- a/VAGRANT.md
+++ b/VAGRANT.md
@@ -1,6 +1,6 @@
 # Install Nominatim in a virtual machine for development and testing

-This document describes how you can install Nominatim inside a Ubuntu 16
+This document describes how you can install Nominatim inside a Ubuntu 22
 virtual machine on your desktop/laptop (host machine). The goal is to give
 you a development environment to easily edit code and run the test suite
 without affecting the rest of your system. 
@@ -42,9 +42,9 @@ is.

      ```
      # inside the virtual machine:
-      cd build
-      wget --no-verbose --output-document=/tmp/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf
-      ./utils/setup.php --osm-file /tmp/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | tee monaco.$$.log
+      cd nominatim-project
+      wget --no-verbose --output-document=monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf
+      nominatim import --osm-file monaco.osm.pbf 2>&1 | tee monaco.$$.log
      ```

    To repeat an import you'd need to delete the database first
@@ -56,7 +56,7 @@ is.
 ## Development

 Vagrant maps the virtual machine's port 8089 to your host machine. Thus you can
-see Nominatim in action on [locahost:8089](http://localhost:8089/nominatim/).
+see Nominatim in action on [localhost:8089](http://localhost:8089/nominatim/).

 You edit code on your host machine in any editor you like. There is no need to
 restart any software: just refresh your browser window.
@@ -69,8 +69,7 @@ installation.
 PHP errors are written to `/var/log/apache2/error.log`.

 With `echo` and `var_dump()` you write into the output (HTML/XML/JSON) when
-you either add `&debug=1` to the URL (preferred) or set
-`@define('CONST_Debug', true);` in `settings/local.php`.
+you either add `&debug=1` to the URL.

 In the Python BDD test you can use `logger.info()` for temporary debug
 statements.
@@ -130,6 +129,10 @@ and then
 Yes, Vagrant and Virtualbox can be installed on MS Windows just fine. You need a 64bit
 version of Windows.

+##### Will it run on Apple Silicon?
+
+You might need to replace Virtualbox with [Parallels](https://www.parallels.com/products/desktop/).
+There is no free/open source version of Parallels.

 ##### Why Monaco, can I use another country?

@@ -141,11 +144,12 @@ No. Long running Nominatim installations will differ once new import features (o
 bug fixes) get added since those usually only get applied to new/changed data.

 Also this document skips the optional Wikipedia data import which affects ranking
-of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation) for details.
+of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation)
+for details.

 ##### Why Ubuntu? Can I test CentOS/Fedora/CoreOS/FreeBSD?

-There is a Vagrant script for CentOS available, but the Nominatim directory
+There used to be a Vagrant script for CentOS available, but the Nominatim directory
 isn't symlinked/mounted to the host which makes development trickier. We used
 it mainly for debugging installation with SELinux.

@@ -154,14 +158,17 @@ are slightly different, e.g. the name of the package manager, Apache2 package
 name, location of files. We chose Ubuntu because that is closest to the
 nominatim.openstreetmap.org production environment.

-You can configure/download other Vagrant boxes from [https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).
+You can configure/download other Vagrant boxes from
+[https://app.vagrantup.com/boxes/search](https://app.vagrantup.com/boxes/search).

 ##### How can I connect to an existing database?

-Let's say you have a Postgres database named `nominatim_it` on server `your-server.com` and port `5432`. The Postgres username is `postgres`. You can edit `settings/local.php` and point Nominatim to it.
+Let's say you have a Postgres database named `nominatim_it` on server `your-server.com`
+and port `5432`. The Postgres username is `postgres`. You can edit the `.env` in your
+project directory and point Nominatim to it.
+
+    NOMINATIM_DATABASE_DSN="pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it

-    pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
-    
 No data import or restarting necessary.

 If the Postgres installation is behind a firewall, you can try
@@ -169,11 +176,12 @@ If the Postgres installation is behind a firewall, you can try
    ssh -L 9999:localhost:5432 your-username@your-server.com

 inside the virtual machine. It will map the port to `localhost:9999` and then
-you edit `settings/local.php` with
+you edit `.env` file with

-    @define('CONST_Database_DSN', 'pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it');
+    NOMINATIM_DATABASE_DSN="pgsql:host=localhost;port=9999;user=postgres;dbname=nominatim_it"

-To access postgres directly remember to specify the hostname, e.g. `psql --host localhost --port 9999 nominatim_it`
+To access postgres directly remember to specify the hostname,
+e.g. `psql --host localhost --port 9999 nominatim_it`


 ##### My computer is slow and the import takes too long. Can I start the virtual machine "in the cloud"?
--- a/88
+++ b/88
@@ -17,6 +17,14 @@ Vagrant.configure("2") do |config|
    checkout = "no"
  end

+  config.vm.provider "hyperv" do |hv, override|
+    hv.memory = 2048
+    hv.linked_clone = true
+    if ENV['CHECKOUT'] != 'y' then
+      override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: "smb", smb_host: ENV['SMB_HOST'] || ENV['COMPUTERNAME']
+    end
+  end
+
  config.vm.provider "virtualbox" do |vb, override|
    vb.gui = false
    vb.memory = 2048
@@ -34,7 +42,34 @@ Vagrant.configure("2") do |config|
    end
  end

-  config.vm.define "ubuntu", primary: true do |sub|
+  config.vm.define "ubuntu22", primary: true do |sub|
+      sub.vm.box = "generic/ubuntu2204"
+      sub.vm.provision :shell do |s|
+        s.path = "vagrant/Install-on-Ubuntu-22.sh"
+        s.privileged = false
+        s.args = [checkout]
+      end
+  end
+
+  config.vm.define "ubuntu22-apache" do |sub|
+      sub.vm.box = "generic/ubuntu2204"
+      sub.vm.provision :shell do |s|
+        s.path = "vagrant/Install-on-Ubuntu-22.sh"
+        s.privileged = false
+        s.args = [checkout, "install-apache"]
+      end
+  end
+
+  config.vm.define "ubuntu22-nginx" do |sub|
+      sub.vm.box = "generic/ubuntu2204"
+      sub.vm.provision :shell do |s|
+        s.path = "vagrant/Install-on-Ubuntu-22.sh"
+        s.privileged = false
+        s.args = [checkout, "install-nginx"]
+      end
+  end
+
+  config.vm.define "ubuntu20" do |sub|
      sub.vm.box = "generic/ubuntu2004"
      sub.vm.provision :shell do |s|
        s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -43,7 +78,7 @@ Vagrant.configure("2") do |config|
      end
  end

-  config.vm.define "ubuntu-apache" do |sub|
+  config.vm.define "ubuntu20-apache" do |sub|
      sub.vm.box = "generic/ubuntu2004"
      sub.vm.provision :shell do |s|
        s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -52,7 +87,7 @@ Vagrant.configure("2") do |config|
      end
  end

-  config.vm.define "ubuntu-nginx" do |sub|
+  config.vm.define "ubuntu20-nginx" do |sub|
      sub.vm.box = "generic/ubuntu2004"
      sub.vm.provision :shell do |s|
        s.path = "vagrant/Install-on-Ubuntu-20.sh"
@@ -60,51 +95,4 @@ Vagrant.configure("2") do |config|
        s.args = [checkout, "install-nginx"]
      end
  end
-
-  config.vm.define "ubuntu18" do |sub|
-      sub.vm.box = "generic/ubuntu1804"
-      sub.vm.provision :shell do |s|
-        s.path = "vagrant/Install-on-Ubuntu-18.sh"
-        s.privileged = false
-        s.args = [checkout]
-      end
-  end
-
-  config.vm.define "ubuntu18-apache" do |sub|
-      sub.vm.box = "generic/ubuntu1804"
-      sub.vm.provision :shell do |s|
-        s.path = "vagrant/Install-on-Ubuntu-18.sh"
-        s.privileged = false
-        s.args = [checkout, "install-apache"]
-      end
-  end
-
-  config.vm.define "ubuntu18-nginx" do |sub|
-      sub.vm.box = "generic/ubuntu1804"
-      sub.vm.provision :shell do |s|
-        s.path = "vagrant/Install-on-Ubuntu-18.sh"
-        s.privileged = false
-        s.args = [checkout, "install-nginx"]
-      end
-  end
-
-  config.vm.define "centos7" do |sub|
-      sub.vm.box = "centos/7"
-      sub.vm.provision :shell do |s|
-        s.path = "vagrant/Install-on-Centos-7.sh"
-        s.privileged = false
-        s.args = [checkout]
-      end
-  end
-
-  config.vm.define "centos" do |sub|
-      sub.vm.box = "generic/centos8"
-      sub.vm.provision :shell do |s|
-        s.path = "vagrant/Install-on-Centos-8.sh"
-        s.privileged = false
-        s.args = [checkout]
-      end
-  end
-
-
 end
--- a/cmake/paths-py.tmpl
+++ b/cmake/paths-py.tmpl
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Path settings for extra data used by Nominatim (installed version).
+"""
+from pathlib import Path
+
+PHPLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-php').resolve()
+SQLLIB_DIR = (Path('@NOMINATIM_LIBDIR@') / 'lib-sql').resolve()
+DATA_DIR = Path('@NOMINATIM_DATADIR@').resolve()
+CONFIG_DIR = Path('@NOMINATIM_CONFIGDIR@').resolve()
--- a/cmake/script.tmpl
+++ b/cmake/script.tmpl
@@ -1,14 +0,0 @@
-#!@PHP_BIN@ -Cq
-<?php
-require('@CMAKE_SOURCE_DIR@/lib-php/dotenv_loader.php');
-
-@define('CONST_Default_ModulePath', '@CMAKE_BINARY_DIR@/module');
-@define('CONST_Default_Osm2pgsql', '@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql');
-@define('CONST_DataDir', '@CMAKE_SOURCE_DIR@/data');
-@define('CONST_SqlDir', '@CMAKE_SOURCE_DIR@/lib-sql');
-@define('CONST_ConfigDir', '@CMAKE_SOURCE_DIR@/settings');
-
-loadDotEnv();
-$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim';
-
-require_once('@CMAKE_SOURCE_DIR@/lib-php/admin/@script_source@');
--- a/cmake/tool-installed.tmpl
+++ b/cmake/tool-installed.tmpl
@@ -4,14 +4,10 @@ import os

 sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')

-os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
-
 from nominatim import cli
+from nominatim import version
+
+version.GIT_COMMIT_HASH = '@GIT_HASH@'

 exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
-                   osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
-                   phplib_dir='@NOMINATIM_LIBDIR@/lib-php',
-                   sqllib_dir='@NOMINATIM_LIBDIR@/lib-sql',
-                   data_dir='@NOMINATIM_DATADIR@',
-                   config_dir='@NOMINATIM_CONFIGDIR@',
-                   phpcgi_path='@PHPCGI_BIN@'))
+                   osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql'))
--- a/cmake/tool.tmpl
+++ b/cmake/tool.tmpl
@@ -4,14 +4,10 @@ import os

 sys.path.insert(1, '@CMAKE_SOURCE_DIR@')

-os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
-
 from nominatim import cli
+from nominatim import version
+
+version.GIT_COMMIT_HASH = '@GIT_HASH@'

 exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
-                   osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
-                   phplib_dir='@CMAKE_SOURCE_DIR@/lib-php',
-                   sqllib_dir='@CMAKE_SOURCE_DIR@/lib-sql',
-                   data_dir='@CMAKE_SOURCE_DIR@/data',
-                   config_dir='@CMAKE_SOURCE_DIR@/settings',
-                   phpcgi_path='@PHPCGI_BIN@'))
+                   osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql'))
--- a/data/country_name.sql
+++ b/data/country_name.sql
--- a/data/words.sql
+++ b/data/words.sql
@@ -29787,7 +29787,7 @@ st	5557484

 -- prefill word table

-select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
+select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
 select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;

 -- copy the word frequencies
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -10,6 +10,8 @@ set (DOC_SOURCES
     admin
     develop
     api
+     customize
+     library
     index.md
     extra.css
     styles.css
@@ -22,11 +24,12 @@ foreach (src ${DOC_SOURCES})
 endforeach()

 ADD_CUSTOM_TARGET(doc
-   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
-   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-8.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-8.md
-   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
+   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
   COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
 )

-
+ADD_CUSTOM_TARGET(serve-doc
+    COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
+)
--- a/docs/admin/Advanced-Installations.md
+++ b/docs/admin/Advanced-Installations.md
@@ -5,107 +5,102 @@ your Nominatim database. It is assumed that you have already successfully
 installed the Nominatim software itself, if not return to the 
 [installation page](Installation.md).

-## Importing multiple regions
+## Importing multiple regions (without updates)

-To import multiple regions in your database, you need to configure and run `utils/import_multiple_regions.sh` file. This script will set up the update directory which has the following structure:
+To import multiple regions in your database you can simply give multiple
+OSM files to the import command:
+
+```
+nominatim import --osm-file file1.pbf --osm-file file2.pbf
+```
+
+If you already have imported a file and want to add another one, you can
+use the add-data function to import the additional data as follows:
+
+```
+nominatim add-data --file <FILE>
+nominatim refresh --postcodes
+nominatim index -j <NUMBER OF THREADS>
+```
+
+Please note that adding additional data is always significantly slower than
+the original import.
+
+## Importing multiple regions (with updates)
+
+If you want to import multiple regions _and_ be able to keep them up-to-date
+with updates, then you can use the scripts provided in the `utils` directory.
+
+These scripts will set up an `update` directory in your project directory,
+which has the following structure:

 ```bash
 update
-    ├── europe
-    │   ├── andorra
-    │   │   └── sequence.state
-    │   └── monaco
-    │       └── sequence.state
-    └── tmp
-        ├── combined.osm.pbf
-        └── europe
-                ├── andorra-latest.osm.pbf
-                └── monaco-latest.osm.pbf
-
+ ├── europe
+ │    ├── andorra
+ │    │    └── sequence.state
+ │    └── monaco
+ │         └── sequence.state
+ └── tmp
+      └── europe
+           ├── andorra-latest.osm.pbf
+           └── monaco-latest.osm.pbf

 ```

-The `sequence.state` files will contain the sequence ID, which will be used by pyosmium to get updates. The tmp folder is used for import dump.
+The `sequence.state` files contain the sequence ID for each region. They will
+be used by pyosmium to get updates. The `tmp` folder is used for import dump and
+can be deleted once the import is complete.

-### Configuring multiple regions
-
-The file `import_multiple_regions.sh` needs to be edited as per your requirement:
-
-1. List of countries. eg:
-
-        COUNTRIES="europe/monaco europe/andorra"
-
-2. Path to Build directory. eg:
-
-        NOMINATIMBUILD="/srv/nominatim/build"
-
-3. Path to Update directory. eg:
-        
-        UPDATEDIR="/srv/nominatim/update"
-
-4. Replication URL. eg:
-    
-        BASEURL="https://download.geofabrik.de"
-        DOWNCOUNTRYPOSTFIX="-latest.osm.pbf"

 ### Setting up multiple regions

-!!! tip
-    If your database already exists and you want to add more countries,
-    replace the setting up part
-    `${SETUPFILE} --osm-file ${UPDATEDIR}/tmp/combined.osm.pbf --all 2>&1`
-    with `${UPDATEFILE} --import-file ${UPDATEDIR}/tmp/combined.osm.pbf --index --index-instances N 2>&1`
-    where N is the numbers of CPUs in your system.
+Create a project directory as described for the
+[simple import](Import.md#creating-the-project-directory). If necessary,
+you can also add an `.env` configuration with customized options. In particular,
+you need to make sure that `NOMINATIM_REPLICATION_UPDATE_INTERVAL` and
+`NOMINATIM_REPLICATION_RECHECK_INTERVAL` are set according to the update
+interval of the extract server you use.

-Run the following command from your Nominatim directory after configuring the file.
+Copy the scripts `utils/import_multiple_regions.sh` and `utils/update_database.sh`
+into the project directory.

-    bash ./utils/import_multiple_regions.sh
+Now customize both files as per your requirements

-!!! danger "Important"
-        This file uses osmium-tool. It must be installed before executing the import script.
-        Installation instructions can be found [here](https://osmcode.org/osmium-tool/manual.html#installation).
-
-### Updating multiple regions
-
-To import multiple regions in your database, you need to configure and run ```utils/update_database.sh```.
-This uses the update directory set up while setting up the DB.   
-
-### Configuring multiple regions
-
-The file `update_database.sh` needs to be edited as per your requirement:
-
-1. List of countries. eg:
+1. List of countries. e.g.

        COUNTRIES="europe/monaco europe/andorra"

-2. Path to Build directory. eg:
+2. URL to the service providing the extracts and updates. eg:

-        NOMINATIMBUILD="/srv/nominatim/build"
-
-3. Path to Update directory. eg:
-        
-        UPDATEDIR="/srv/nominatim/update"
-
-4. Replication URL. eg:
-    
        BASEURL="https://download.geofabrik.de"
-        DOWNCOUNTRYPOSTFIX="-updates"
+        DOWNCOUNTRYPOSTFIX="-latest.osm.pbf"

-5. Followup can be set according to your installation. eg: For Photon,
+5. Followup in the update script can be set according to your installation.
+   E.g. for Photon,

        FOLLOWUP="curl http://localhost:2322/nominatim-update"

    will handle the indexing.

+
+To start the initial import, change into the project directory and run
+
+```
+    bash import_multiple_regions.sh
+```
+
 ### Updating the database

-Run the following command from your Nominatim directory after configuring the file.
+Change into the project directory and run the following command:

-    bash ./utils/update_database.sh
+    bash update_database.sh

-This will get diffs from the replication server, import diffs and index the database. The default replication server in the script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
+This will get diffs from the replication server, import diffs and index
+the database. The default replication server in the
+script ([Geofabrik](https://download.geofabrik.de)) provides daily updates.

-## Importing Nominatim to an external PostgreSQL database
+## Using an external PostgreSQL database

 You can install Nominatim using a database that runs on a different server when
 you have physical access to the file system on the other server. Nominatim
@@ -113,6 +108,11 @@ uses a custom normalization library that needs to be made accessible to the
 PostgreSQL server. This section explains how to set up the normalization
 library.

+!!! note
+    The external module is only needed when using the legacy tokenizer.
+    If you have chosen the ICU tokenizer, then you can ignore this section
+    and follow the standard import documentation.
+
 ### Option 1: Compiling the library on the database server

 The most sure way to get a working library is to compile it on the database
@@ -170,4 +170,46 @@ NOMINATIM_DATABASE_MODULE_PATH="<directory on the database server where nominati
 ```

 Now change the `NOMINATIM_DATABASE_DSN` to point to your remote server and continue
-to follow the [standard instructions for importing](/admin/Import).
+to follow the [standard instructions for importing](Import.md).
+
+
+## Moving the database to another machine
+
+For some configurations it may be useful to run the import on one machine, then
+move the database to another machine and run the Nominatim service from there.
+For example, you might want to use a large machine to be able to run the import
+quickly but only want a smaller machine for production because there is not so
+much load. Or you might want to do the import once and then replicate the
+database to many machines.
+
+The important thing to keep in mind when transferring the Nominatim installation
+is that you need to transfer the database _and the project directory_. Both
+parts are essential for your installation.
+
+The Nominatim database can be transferred using the `pg_dump`/`pg_restore` tool.
+Make sure to use the same version of PostgreSQL and PostGIS on source and
+target machine.
+
+!!! note
+    Before creating a dump of your Nominatim database, consider running
+    `nominatim freeze` first. Your database looses the ability to receive further
+    data updates but the resulting database is only about a third of the size
+    of a full database.
+
+Next install Nominatim on the target machine by following the standard installation
+instructions. Again, make sure to use the same version as the source machine.
+
+Create a project directory on your destination machine and set up the `.env`
+file to match the configuration on the source machine. Finally run
+
+    nominatim refresh --website
+
+to make sure that the local installation of Nominatim will be used.
+
+If you are using the legacy tokenizer you might also have to switch to the
+PostgreSQL module that was compiled on your target machine. If you get errors
+that PostgreSQL cannot find or access `nominatim.so` then rerun
+
+   nominatim refresh --functions
+
+on the target machine to update the the location of the module.
--- a/docs/admin/Deployment-PHP.md
+++ b/docs/admin/Deployment-PHP.md
@@ -1,22 +1,23 @@
-# Deploying Nominatim
+# Deploying Nominatim using the PHP frontend

 The Nominatim API is implemented as a PHP application. The `website/` directory
-in the build directory contains the configured website. You can serve this
+in the project directory contains the configured website. You can serve this
 in a production environment with any web server that is capable to run
 PHP scripts.

 This section gives a quick overview on how to configure Apache and Nginx to
 serve Nominatim. It is not meant as a full system administration guide on how
 to run a web service. Please refer to the documentation of
-[Apache](http://httpd.apache.org/docs/current/) and
+[Apache](https://httpd.apache.org/docs/current/) and
 [Nginx](https://nginx.org/en/docs/)
 for background information on configuring the services.

 !!! Note
-    Throughout this page, we assume that your Nominatim build directory is
-    located in `/srv/nominatim/build` and the source code in
-    `/srv/nominatim/Nominatim`. If you have put it somewhere else, you
-    need to adjust the commands and configuration accordingly.
+    Throughout this page, we assume your Nominatim project directory is
+    located in `/srv/nominatim-project` and you have installed Nominatim
+    using the default installation prefix `/usr/local`. If you have put it
+    somewhere else, you need to adjust the commands and configuration
+    accordingly.

    We further assume that your web server runs as user `www-data`. Older
    versions of CentOS may still use the user name `apache`. You also need
@@ -29,7 +30,7 @@ web server user. You can check that the permissions are correct by accessing
 on of the php files as the web server user:

 ``` sh
-sudo -u www-data head -n 1 /srv/nominatim/build/website/search.php
+sudo -u www-data head -n 1 /srv/nominatim-project/website/search.php
 ```

 If this shows a permission error, then you need to adapt the permissions of
@@ -40,11 +41,11 @@ web server access. At a minimum the following SELinux labelling should be done
 for Nominatim:

 ``` sh
-sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/Nominatim/(website|lib|settings)(/.*)?"
-sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim/build/(website|settings)(/.*)?"
-sudo semanage fcontext -a -t lib_t "/srv/nominatim/build/module/nominatim.so"
-sudo restorecon -R -v /srv/nominatim/Nominatim
-sudo restorecon -R -v /srv/nominatim/build
+sudo semanage fcontext -a -t httpd_sys_content_t "/usr/local/nominatim/lib/lib-php(/.*)?"
+sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim-project/website(/.*)?"
+sudo semanage fcontext -a -t lib_t "/srv/nominatim-project/module/nominatim.so"
+sudo restorecon -R -v /usr/local/lib/nominatim
+sudo restorecon -R -v /srv/nominatim-project
 ```

 ## Nominatim with Apache
@@ -65,13 +66,13 @@ Make sure your Apache configuration contains the required permissions for the
 directory and create an alias:

 ``` apache
-<Directory "/srv/nominatim/build/website">
+<Directory "/srv/nominatim-project/website">
  Options FollowSymLinks MultiViews
  AddType text/html   .php
  DirectoryIndex search.php
  Require all granted
 </Directory>
-Alias /nominatim /srv/nominatim/build/website
+Alias /nominatim /srv/nominatim-project/website
 ```

 After making changes in the apache config you need to restart apache.
@@ -81,7 +82,7 @@ The website should now be available on `http://localhost/nominatim`.

 ### Installing the required packages

-Nginx has no built-in PHP interpreter. You need to use php-fpm as a deamon for
+Nginx has no built-in PHP interpreter. You need to use php-fpm as a daemon for
 serving PHP cgi.

 On Ubuntu/Debian install nginx and php-fpm with:
@@ -98,7 +99,7 @@ Unix socket instead, change the pool configuration

 ``` ini
 ; Replace the tcp listener and add the unix socket
-listen = /var/run/php-fpm.sock
+listen = /var/run/php-fpm-nominatim.sock

 ; Ensure that the daemon runs as the correct user
 listen.owner = www-data
@@ -110,7 +111,7 @@ Tell nginx that php files are special and to fastcgi_pass to the php-fpm
 unix socket by adding the location definition to the default configuration.

 ``` nginx
-root /srv/nominatim/build/website;
+root /srv/nominatim-project/website;
 index search.php;
 location / {
    try_files $uri $uri/ @php;
@@ -120,7 +121,7 @@ location @php {
    fastcgi_param SCRIPT_FILENAME "$document_root$uri.php";
    fastcgi_param PATH_TRANSLATED "$document_root$uri.php";
    fastcgi_param QUERY_STRING    $args;
-    fastcgi_pass unix:/var/run/php-fpm.sock;
+    fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
    fastcgi_index index.php;
    include fastcgi_params;
 }
@@ -130,7 +131,7 @@ location ~ [^/]\.php(/|$) {
    if (!-f $document_root$fastcgi_script_name) {
        return 404;
    }
-    fastcgi_pass unix:/var/run/php-fpm.sock;
+    fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
    fastcgi_index search.php;
    include fastcgi.conf;
 }
@@ -139,3 +140,9 @@ location ~ [^/]\.php(/|$) {
 Restart the nginx and php-fpm services and the website should now be available
 at `http://localhost/`.

+## Nominatim with other webservers
+
+Users have created instructions for other webservers:
+
+* [Caddy](https://github.com/osm-search/Nominatim/discussions/2580)
+
--- a/docs/admin/Deployment-Python.md
+++ b/docs/admin/Deployment-Python.md
@@ -0,0 +1,140 @@
+# Deploying the Nominatim Python frontend
+
+The Nominatim can be run as a Python-based 
+[ASGI web application](https://asgi.readthedocs.io/en/latest/). You have the
+choice between [Falcon](https://falcon.readthedocs.io/en/stable/)
+and [Starlette](https://www.starlette.io/) as the ASGI framework.
+
+This section gives a quick overview on how to configure Nginx to serve
+Nominatim. Please refer to the documentation of
+[Nginx](https://nginx.org/en/docs/) for background information on how
+to configure it.
+
+!!! Note
+    Throughout this page, we assume your Nominatim project directory is
+    located in `/srv/nominatim-project` and you have installed Nominatim
+    using the default installation prefix `/usr/local`. If you have put it
+    somewhere else, you need to adjust the commands and configuration
+    accordingly.
+
+    We further assume that your web server runs as user `www-data`. Older
+    versions of CentOS may still use the user name `apache`. You also need
+    to adapt the instructions in this case.
+
+### Installing the required packages
+
+The recommended way to deploy a Python ASGI application is to run
+the ASGI runner [uvicorn](https://uvicorn.org/)
+together with [gunicorn](https://gunicorn.org/) HTTP server. We use
+Falcon here as the web framework.
+
+Create a virtual environment for the Python packages and install the necessary
+dependencies:
+
+``` sh
+sudo apt install virtualenv
+virtualenv /srv/nominatim-venv
+/srv/nominatim-venv/bin/pip install SQLAlchemy PyICU psycopg[binary] \
+   psycopg2-binary python-dotenv PyYAML falcon uvicorn gunicorn
+```
+
+### Setting up Nominatim as a systemd job
+
+Next you need to set up the service that runs the Nominatim frontend. This is
+easiest done with a systemd job.
+
+First you need to tell systemd to create a socket file to be used by
+hunicorn. Crate the following file `/etc/systemd/system/nominatim.socket`:
+
+``` systemd
+[Unit]
+Description=Gunicorn socket for Nominatim
+
+[Socket]
+ListenStream=/run/nominatim.sock
+SocketUser=www-data
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Now you can add the systemd service for Nominatim itself.
+Create the following file `/etc/systemd/system/nominatim.service`:
+
+``` systemd
+[Unit]
+Description=Nominatim running as a gunicorn application
+After=network.target
+Requires=nominatim.socket
+
+[Service]
+Type=simple
+Environment="PYTHONPATH=/usr/local/lib/nominatim/lib-python/"
+User=www-data
+Group=www-data
+WorkingDirectory=/srv/nominatim-project
+ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker nominatim.server.falcon.server:run_wsgi
+ExecReload=/bin/kill -s HUP $MAINPID
+StandardOutput=append:/var/log/gunicorn-nominatim.log
+StandardError=inherit
+PrivateTmp=true
+TimeoutStopSec=5
+KillMode=mixed
+
+[Install]
+WantedBy=multi-user.target
+```
+
+This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
+its own Python process using
+[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
+connections to the database to serve requests in parallel.
+
+Make the new services known to systemd and start it:
+
+``` sh
+sudo systemctl daemon-reload
+sudo systemctl enable nominatim.socket
+sudo systemctl start nominatim.socket
+sudo systemctl enable nominatim.service
+sudo systemctl start nominatim.service
+```
+
+This sets the service up, so that Nominatim is automatically started
+on reboot.
+
+### Configuring nginx
+
+To make the service available to the world, you need to proxy it through
+nginx. Add the following definition to the default configuration:
+
+``` nginx
+upstream nominatim_service {
+  server unix:/run/nominatim.sock fail_timeout=0;
+}
+
+server {
+    listen 80;
+    listen [::]:80;
+
+    root /var/www/html;
+    index /search;
+
+    location / {
+            proxy_set_header Host $http_host;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_redirect off;
+            proxy_pass http://nominatim_service;
+    }
+}
+```
+
+Reload nginx with
+
+```
+sudo systemctl reload nginx
+```
+
+and you should be able to see the status of your server under
+`http://localhost/status`.
--- a/docs/admin/Faq.md
+++ b/docs/admin/Faq.md
@@ -79,7 +79,7 @@ When running the import you may get a version mismatch:

 pg_config seems to use bad includes sometimes when multiple versions
 of PostgreSQL are available in the system. Make sure you remove the
-server development libraries (`postgresql-server-dev-9.5` on Ubuntu)
+server development libraries (`postgresql-server-dev-13` on Ubuntu)
 and recompile (`cmake .. && make`).


@@ -106,11 +106,6 @@ If you are using a flatnode file, then it may also be that the underlying
 filesystem does not fully support 'mmap'. A notable candidate is virtualbox's
 vboxfs.

-### I see the error: "clang: Command not found" on CentOS
-
-On CentOS 7 users reported `/opt/rh/llvm-toolset-7/root/usr/bin/clang: Command not found`.
-Double-check clang is installed. Instead of `make` try running `make CLANG=true`.
-
 ### nominatim UPDATE failed: ERROR: buffer 179261 is not owned by resource owner Portal

 Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168)
@@ -126,22 +121,6 @@ The server cannot access your database. Add `&debug=1` to your URL
 to get the full error message.


-### On CentOS the website shows "Could not connect to server"
-
-`could not connect to server: No such file or directory`
-
-On CentOS v7 the PostgreSQL server is started with `systemd`. Check if
-`/usr/lib/systemd/system/httpd.service` contains a line `PrivateTmp=true`. If
-so then Apache cannot see the `/tmp/.s.PGSQL.5432` file. It's a good security
-feature, so use the
-[preferred solution](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
-
-However, you can solve this the quick and dirty way by commenting out that line and then run
-
-    sudo systemctl daemon-reload
-    sudo systemctl restart httpd
-
-
 ### Website reports "DB Error: insufficient permissions"

 The user the webserver, e.g. Apache, runs under needs to have access to the
@@ -181,9 +160,6 @@ by everybody, e.g.

 Try `chmod a+r nominatim.so; chmod a+x nominatim.so`.

-When running SELinux, make sure that the
-[context is set up correctly](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
-
 When you recently updated your operating system, updated PostgreSQL to
 a new version or moved files (e.g. the build directory) you should
 recreate `nominatim.so`. Try
--- a/docs/admin/Import.md
+++ b/docs/admin/Import.md
@@ -40,15 +40,16 @@ all commands from the project directory.

 ### Configuration setup in `.env`

-The Nominatim server can be customized via an `.env` configuration file in the
+The Nominatim server can be customized via an `.env` configuration file in the 
 project directory. This is a file in [dotenv](https://github.com/theskumar/python-dotenv)
 format which looks the same as variable settings in a standard shell environment.
 You can also set the same configuration via environment variables. All
 settings have a `NOMINATIM_` prefix to avoid conflicts with other environment
 variables.

-There are lots of configuration settings you can tweak. Have a look
-at `settings/env.default` for a full list. Most should have a sensible default.
+There are lots of configuration settings you can tweak. A full reference
+can be found in the chapter [Configuration Settings](../customize/Settings.md).
+Most should have a sensible default.

 #### Flatnode files

@@ -73,25 +74,29 @@ but it will improve the quality of the results if this is installed.
 This data is available as a binary download. Put it into your project directory:

    cd $PROJECT_DIR
-    wget https://www.nominatim.org/data/wikimedia-importance.sql.gz
+    wget https://nominatim.org/data/wikimedia-importance.sql.gz

 The file is about 400MB and adds around 4GB to the Nominatim database.

 !!! tip
-    If you forgot to download the wikipedia rankings, you can also add
-    importances after the import. Download the files, then run
-    `nominatim refresh --wiki-data --importance`. Updating importances for
-    a planet can take a couple of hours.
+    If you forgot to download the wikipedia rankings, then you can
+    also add importances after the import. Download the SQL files, then
+    run `nominatim refresh --wiki-data --importance`. Updating
+    importances for a planet will take a couple of hours.

-### Great Britain, USA postcodes
+### External postcodes

-Nominatim can use postcodes from an external source to improve searches that
-involve a GB or US postcode. This data can be optionally downloaded into the
-project directory:
+Nominatim can use postcodes from an external source to improve searching with
+postcodes. We provide precomputed postcodes sets for the US (using TIGER data)
+and the UK (using the [CodePoint OpenData set](https://osdatahub.os.uk/downloads/open/CodePointOpen).
+This data can be optionally downloaded into the project directory:

    cd $PROJECT_DIR
-    wget https://www.nominatim.org/data/gb_postcode_data.sql.gz
-    wget https://www.nominatim.org/data/us_postcode_data.sql.gz
+    wget https://nominatim.org/data/gb_postcodes.csv.gz
+    wget https://nominatim.org/data/us_postcodes.csv.gz
+
+You can also add your own custom postcode sources, see
+[Customization of postcodes](../customize/Postcodes.md).

 ## Choosing the data to import

@@ -107,7 +112,7 @@ If you only need geocoding for a smaller region, then precomputed OSM extracts
 are a good way to reduce the database size and import time.
 [Geofabrik](https://download.geofabrik.de) offers extracts for most countries.
 They even have daily updates which can be used with the update process described
-[in the next section](../Update). There are also
+[in the next section](Update.md). There are also
 [other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading).

 Please be aware that some extracts are not cut exactly along the country
@@ -133,11 +138,19 @@ Note that you still need to provide for sufficient disk space for the initial
 import. So this option is particularly interesting if you plan to transfer the
 database or reuse the space later.

+!!! warning
+    The data structure for updates are also required when adding additional data
+    after the import, for example [TIGER housenumber data](../customize/Tiger.md).
+    If you plan to use those, you must not use the `--no-updates` parameter.
+    Do a normal import, add the external data and once you are done with
+    everything run `nominatim freeze`.
+
+
 ### Reverse-only Imports

 If you only want to use the Nominatim database for reverse lookups or
 if you plan to use the installation only for exports to a
-[photon](https://photon.komoot.de/) database, then you can set up a database
+[photon](https://photon.komoot.io/) database, then you can set up a database
 without search indexes. Add `--reverse-only` to your setup command above.

 This saves about 5% of disk space.
@@ -148,15 +161,15 @@ Nominatim normally sets up a full search database containing administrative
 boundaries, places, streets, addresses and POI data. There are also other
 import styles available which only read selected data:

-* **settings/import-admin.style**
+* **admin**
  Only import administrative boundaries and places.
-* **settings/import-street.style**
+* **street**
  Like the admin style but also adds streets.
-* **settings/import-address.style**
+* **address**
  Import all data necessary to compute addresses down to house number level.
-* **settings/import-full.style**
+* **full**
  Default style that also includes points of interest.
-* **settings/import-extratags.style**
+* **extratags**
  Like the full style but also adds most of the OSM tags into the extratags
  column.

@@ -179,8 +192,8 @@ full      |   54h        |  640 GB    |  330 GB
 extratags |   54h        |  650 GB    |  340 GB

 You can also customize the styles further.
-A [description of the style format](../develop/Import.md#configuring-the-import) 
-can be found in the development section.
+A [description of the style format](../customize/Import-Styles.md)
+can be found in the customization guide.

 ## Initial import of the data

@@ -189,12 +202,15 @@ can be found in the development section.
    [Geofabrik](https://download.geofabrik.de).

 Download the data to import. Then issue the following command
-from the **build directory** to start the import:
+from the **project directory** to start the import:

 ```sh
 nominatim import --osm-file <data file> 2>&1 | tee setup.log
 ```

+The **project directory** is the one that you have set up at the beginning.
+See [creating the project directory](#creating-the-project-directory).
+
 ### Notes on full planet imports

 Even on a perfectly configured machine
@@ -212,7 +228,7 @@ to load the OSM data into the PostgreSQL database. This step is very demanding
 in terms of RAM usage. osm2pgsql and PostgreSQL are running in parallel at 
 this point. PostgreSQL blocks at least the part of RAM that has been configured
 with the `shared_buffers` parameter during
-[PostgreSQL tuning](Installation#postgresql-tuning)
+[PostgreSQL tuning](Installation.md#postgresql-tuning)
 and needs some memory on top of that. osm2pgsql needs at least 2GB of RAM for
 its internal data structures, potentially more when it has to process very large
 relations. In addition it needs to maintain a cache for node locations. The size
@@ -231,76 +247,86 @@ reduce the cache size or even consider using a flatnode file.

 ### Testing the installation

-Run this script to verify all required tables and indices got created successfully.
+Run this script to verify that all required tables and indices got created
+successfully.

 ```sh
 nominatim admin --check-database
 ```

-Now you can try out your installation by running:
+Now you can try out your installation by executing a simple query on the
+command line:
+
+``` sh
+nominatim search --query Berlin
+```
+
+or, when you have a reverse-only installation:
+
+``` sh
+nominatim reverse --lat 51 --lon 45
+```
+
+If you want to run Nominatim as a service, you need to make a choice between
+running the traditional PHP frontend or the new experimental Python frontend.
+Make sure you have installed the right packages as per
+[Installation](Installation.md#software).
+
+#### Testing the PHP frontend
+
+You can run a small test server with the PHP frontend like this:

 ```sh
 nominatim serve
 ```

-This runs a small test server normally used for development. You can use it
-to verify that your installation is working. Go to
-`http://localhost:8088/status.php` and you should see the message `OK`.
-You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
+Go to `http://localhost:8088/status.php` and you should see the message `OK`.
+You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
+or, for reverse-only installations a reverse query,
+e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.

-To run Nominatim via webservers like Apache or nginx, please read the
-[Deployment chapter](Deployment.md).
+Do not use this test server in production.
+To run Nominatim via webservers like Apache or nginx, please continue reading
+[Deploy the PHP frontend](Deployment-PHP.md).

-## Tuning the database
+#### Testing the Python frontend

-Accurate word frequency information for search terms helps PostgreSQL's query
-planner to make the right decisions. Recomputing them can improve the performance
-of forward geocoding in particular under high load. To recompute word counts run:
+To run the test server against the Python frontend, you must choose a
+web framework to use, either starlette or falcon. Make sure the appropriate
+packages are installed. Then run

-```sh
-nominatim refresh --word-counts
+``` sh
+nominatim serve --engine falcon
 ```

-This will take a couple of hours for a full planet installation. You can
-also defer that step to a later point in time when you realise that
-performance becomes an issue. Just make sure that updates are stopped before
-running this function.
+or

-If you want to be able to search for places by their type through
-[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
+``` sh
+nominatim serve --engine starlette
+```
+
+Go to `http://localhost:8088/status.php` and you should see the message `OK`.
+You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
+or, for reverse-only installations a reverse query,
+e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
+
+Do not use this test server in production.
+To run Nominatim via webservers like Apache or nginx, please continue reading
+[Deploy the Python frontend](Deployment-Python.md).
+
+
+## Enabling search by category phrases
+
+To be able to search for places by their type using
+[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
 you also need to import these key phrases like this:

-    nominatim special-phrases --import-from-wiki
+```sh
+nominatim special-phrases --import-from-wiki
+```

 Note that this command downloads the phrases from the wiki link above. You
 need internet access for the step.

-
-## Installing Tiger housenumber data for the US
-
-Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
-address set to complement the OSM house number data in the US. You can add
-TIGER data to your own Nominatim instance by following these steps. The
-entire US adds about 10GB to your database.
-
-  1. Get preprocessed TIGER 2020 data:
-
-        cd $PROJECT_DIR
-        wget https://nominatim.org/data/tiger2020-nominatim-preprocessed.tar.gz
-
-  2. Import the data into your Nominatim database:
-
-        nominatim add-data --tiger-data tiger2020-nominatim-preprocessed.tar.gz
-
-  3. Enable use of the Tiger data in your `.env` by adding:
-
-        echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
-
-  4. Apply the new settings:
-
-        nominatim refresh --functions
-
-
-See the [developer's guide](../develop/data-sources.md#us-census-tiger) for more
-information on how the data got preprocessed.
-
+You can also import special phrases from a csv file, for more 
+information please see the [Customization part](../customize/Special-Phrases.md).
--- a/docs/admin/Installation.md
+++ b/docs/admin/Installation.md
@@ -4,10 +4,8 @@ This page contains generic installation instructions for Nominatim and its
 prerequisites. There are also step-by-step instructions available for
 the following operating systems:

+  * [Ubuntu 22.04](../appendix/Install-on-Ubuntu-22.md)
  * [Ubuntu 20.04](../appendix/Install-on-Ubuntu-20.md)
-  * [Ubuntu 18.04](../appendix/Install-on-Ubuntu-18.md)
-  * [CentOS 8](../appendix/Install-on-Centos-8.md)
-  * [CentOS 7.2](../appendix/Install-on-Centos-7.md)

 These OS-specific instructions can also be found in executable form
 in the `vagrant/` directory.
@@ -17,12 +15,18 @@ and can't offer support.

  * [Docker](https://github.com/mediagis/nominatim-docker)
  * [Docker on Kubernetes](https://github.com/peter-evans/nominatim-k8s)
+  * [Kubernetes with Helm](https://github.com/robjuz/helm-charts/blob/master/charts/nominatim/README.md)
  * [Ansible](https://github.com/synthesio/infra-ansible-nominatim)

 ## Prerequisites

 ### Software

+!!! Warning
+    For larger installations you **must have** PostgreSQL 11+ and PostGIS 3+
+    otherwise import and queries will be slow to the point of being unusable.
+    Query performance has marked improvements with PostgreSQL 13+ and PostGIS 3.2+.
+
 For compiling:

  * [cmake](https://cmake.org/)
@@ -31,44 +35,58 @@ For compiling:
  * [bzip2](http://www.bzip.org/)
  * [zlib](https://www.zlib.net/)
  * [ICU](http://site.icu-project.org/)
+  * [nlohmann/json](https://json.nlohmann.me/)
  * [Boost libraries](https://www.boost.org/), including system and filesystem
  * PostgreSQL client libraries
  * a recent C++ compiler (gcc 5+ or Clang 3.8+)

 For running Nominatim:

-  * [PostgreSQL](https://www.postgresql.org) (9.3+ will work, 11+ strongly recommended)
-  * [PostGIS](https://postgis.net) (2.2+)
-  * [Python 3](https://www.python.org/) (3.5+)
+  * [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
+  * [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
+  * [Python 3](https://www.python.org/) (3.7+)
  * [Psycopg2](https://www.psycopg.org) (2.7+)
  * [Python Dotenv](https://github.com/theskumar/python-dotenv)
  * [psutil](https://github.com/giampaolo/psutil)
  * [Jinja2](https://palletsprojects.com/p/jinja/)
+  * [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
+  * [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
  * [PyICU](https://pypi.org/project/PyICU/)
-  * [PHP](https://php.net) (7.0 or later)
+  * [PyYaml](https://pyyaml.org/) (5.1+)
+  * [datrie](https://github.com/pytries/datrie)
+
+When running the PHP frontend:
+
+  * [PHP](https://php.net) (7.3+)
  * PHP-pgsql
  * PHP-intl (bundled with PHP)
-  * PHP-cgi (for running queries from the command line)

 For running continuous updates:

  * [pyosmium](https://osmcode.org/pyosmium/)

+For running the experimental Python frontend:
+
+  * one of the following web frameworks:
+    * [falcon](https://falconframework.org/) (3.0+)
+    * [starlette](https://www.starlette.io/)
+  * [uvicorn](https://www.uvicorn.org/)
+
 For dependencies for running tests and building documentation, see
 the [Development section](../develop/Development-Environment.md).

 ### Hardware

 A minimum of 2GB of RAM is required or installation will fail. For a full
-planet import 64GB of RAM or more are strongly recommended. Do not report
+planet import 128GB of RAM or more are strongly recommended. Do not report
 out of memory problems if you have less than 64GB RAM.

-For a full planet install you will need at least 900GB of hard disk space.
+For a full planet install you will need at least 1TB of hard disk space.
 Take into account that the OSM database is growing fast.
 Fast disks are essential. Using NVME disks is recommended.

 Even on a well configured machine the import of a full planet takes
-around 2 days. On traditional spinning disks, 7-8 days are more realistic.
+around 2 days. When using traditional SSDs, 4-5 days are more realistic.

 ## Tuning the PostgreSQL database

@@ -82,8 +100,7 @@ your `postgresql.conf` file.
    work_mem = (50MB)
    effective_cache_size = (24GB)
    synchronous_commit = off
-    checkpoint_segments = 100 # only for postgresql <= 9.4
-    max_wal_size = 1GB # postgresql > 9.4
+    max_wal_size = 1GB
    checkpoint_timeout = 10min
    checkpoint_completion_target = 0.9

@@ -101,15 +118,6 @@ you might consider setting:
 and even reduce `autovacuum_work_mem` further. This will reduce the amount
 of memory that autovacuum takes away from the import process.

-For the initial import, you should also set:
-
-    fsync = off
-    full_page_writes = off
-
-Don't forget to reenable them after the initial import or you risk database
-corruption.
-
-
 ## Downloading and building Nominatim

 ### Downloading the latest release
@@ -123,13 +131,13 @@ If you want to install latest development version from github, make sure to
 also check out the osm2pgsql subproject:

 ```
-git clone --recursive git://github.com/openstreetmap/Nominatim.git
+git clone --recursive https://github.com/openstreetmap/Nominatim.git
 ```

 The development version does not include the country grid. Download it separately:

 ```
-wget -O Nominatim/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
+wget -O Nominatim/data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
 ```

 ### Building Nominatim
@@ -151,6 +159,17 @@ make
 sudo make install
 ```

+!!! warning
+    The default installation no longer compiles the PostgreSQL module that
+    is needed for the legacy tokenizer from older Nominatim versions. If you
+    are upgrading an older database or want to run the
+    [legacy tokenizer](../customize/Tokenizers.md#legacy-tokenizer) for
+    some other reason, you need to enable the PostgreSQL module via
+    cmake: `cmake -DBUILD_MODULE=on ../Nominatim`. To compile the module
+    you need to have the server development headers for PostgreSQL installed.
+    On Ubuntu/Debian run: `sudo apt install postgresql-server-dev-<postgresql version>`
+
+
 Nominatim installs itself into `/usr/local` per default. To choose a different
 installation directory add `-DCMAKE_INSTALL_PREFIX=<install root>` to the
 cmake command. Make sure that the `bin` directory is available in your path
--- a/docs/admin/Maintenance.md
+++ b/docs/admin/Maintenance.md
@@ -0,0 +1,75 @@
+This chapter describes the various operations the Nominatim database administrator
+may use to clean and maintain the database. None of these operations is mandatory
+but they may help improve the performance and accuracy of results.
+
+
+## Updating postcodes
+
+Command: `nominatim refresh --postcodes`
+
+Postcode centroids (aka 'calculated postcodes') are generated by looking at all
+postcodes of a country, grouping them and calculating the geometric centroid.
+There is currently no logic to deal with extreme outliers (typos or other
+mistakes in OSM data). There is also no check if a postcodes adheres to a
+country's format, e.g. if Swiss postcodes are 4 digits.
+
+When running regular updates, postcodes results can be improved by running
+this command on a regular basis. Note that only the postcode table and the
+postcode search terms are updated. The postcode that is assigned to each place
+is only updated when the place is updated.
+
+The command takes around 70min to run on the planet and needs ca. 40GB of
+temporary disk space.
+
+
+## Updating word counts
+
+Command: `nominatim refresh --word-counts`
+
+Nominatim keeps frequency statistics about all search terms it indexes. These
+statistics are currently used to optimise queries to the database. Thus better
+statistics mean better performance. Word counts are created once after import
+and are usually sufficient even when running regular updates. You might want
+to rerun the statistics computation when adding larger amounts of new data,
+for example, when adding an additional country via `nominatim add-data`.
+
+
+## Forcing recomputation of places and areas
+
+Command: `nominatim refresh --data-object [NWR]<id> --data-area [NWR]<id>`
+
+When running replication updates, Nominatim tries to recompute the search
+and address information for all places that are affected by a change. But it
+needs to restrict the total number of changes to make sure it can keep up
+with the minutely updates. Therefore it will refrain from propagating changes
+that affect a lot of objects.
+
+The administrator may force an update of places in the database.
+`nominatim refresh --data-object` invalidates a single OSM object.
+`nominatim refresh --data-area` invalidates an OSM object and all dependent
+objects. That are usually the places that inside its area or around the
+center of the object. Both commands expect the OSM object as an argument
+of the form OSM type + OSM id. The type must be `N` (node), `W` (way) or
+`R` (relation).
+
+After invalidating the object, indexing must be run again. If continuous
+update are running in the background, the objects will be recomputed together
+with the next round of updates. Otherwise you need to run `nominatim index`
+to finish the recomputation.
+
+
+## Removing large deleted objects
+
+Nominatim refuses to delete very large areas because often these deletions are
+accidental and are reverted within hours. Instead the deletions are logged in
+the `import_polygon_delete` table and left to the administrator to clean up.
+
+There is currently no command to do that. You can use the following SQL
+query to force a deletion on all objects that have been deleted more than
+a certain timespan ago (here: 1 month):
+
+```sql
+SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
+WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
+      and age(p.indexed_date) > '1 month'::interval
+```
--- a/docs/admin/Migration.md
+++ b/docs/admin/Migration.md
@@ -15,8 +15,76 @@ breaking changes. **Please read them before running the migration.**
    If you are migrating from a version <3.6, then you still have to follow
    the manual migration steps up to 3.6.

+## 4.2.0 -> 4.3.0
+
+### New indexes for reverse lookup
+
+The reverse lookup algorithm has changed slightly to improve performance.
+This change needs a different index in the database. The required index
+will be automatically build during migration. Until the new index is available
+performance of the /reverse endpoint is significantly reduced. You should
+therefore either remove traffic from the machine before attempting a
+version update or create the index manually **before** starting the update
+using the following SQL:
+
+```sql
+CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
+  ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
+  WHERE rank_address between 4 and 25 AND type != 'postcode'
+    AND name is not null AND linked_place_id is null AND osm_type = 'N';
+```
+
+## 4.0.0 -> 4.1.0
+
+### ICU tokenizer is the new default
+
+Nominatim now installs the [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer)
+by default. This only has an effect on newly installed databases. When
+updating older databases, it keeps its installed tokenizer. If you still
+run with the legacy tokenizer, make sure to compile Nominatim with the
+PostgreSQL module, see [Installation](Installation.md#building-nominatim).
+
+### geocodejson output changed
+
+The `type` field of the geocodejson output has changed. It now contains
+the address class of the object instead of the value of the OSM tag. If
+your client has used the `type` field, switch them to read `osm_value`
+instead.
+
+## 3.7.0 -> 4.0.0
+
+### NOMINATIM_PHRASE_CONFIG removed
+
+Custom blacklist configurations for special phrases now need to be handed
+with the `--config` parameter to `nominatim special-phrases`. Alternatively
+you can put your custom configuration in the project directory in a file
+named `phrase-settings.json`.
+
+Version 3.8 also removes the automatic converter for the php format of
+the configuration in older versions. If you are updating from Nominatim < 3.7
+and still work with a custom `phrase-settings.php`, you need to manually
+convert it into a json format.
+
+### PHP utils removed
+
+The old PHP utils have now been removed completely. You need to switch to
+the appropriate functions of the nominatim  command line tool. See
+[Introducing `nominatim` command line tool](#introducing-nominatim-command-line-tool)
+below.
+
 ## 3.6.0 -> 3.7.0

+### New format and name of configuration file
+
+The configuration for an import is now saved in a `.env` file in the project
+directory. This file follows the dotenv format. For more information, see
+the [installation chapter](Import.md#configuration-setup-in-env).
+
+To migrate to the new system, create a new project directory, add the `.env`
+file and port your custom configuration from `settings/local.php`. Most
+settings are named similar and only have received a `NOMINATIM_` prefix.
+Use the default settings in `settings/env.defaults` as a reference.
+
 ### New location for data files

 External data files for Wikipedia importance, postcodes etc. are no longer
@@ -69,7 +137,7 @@ done

 The debugging UI is no longer directly provided with Nominatim. Instead we
 now provide a simple Javascript application. Please refer to
-[Setting up the Nominatim UI](../Setup-Nominatim-UI) for details on how to
+[Setting up the Nominatim UI](Setup-Nominatim-UI.md) for details on how to
 set up the UI.

 The icons served together with the API responses have been moved to the
@@ -113,6 +181,14 @@ configuration file, run the following command after updating:
 ./utils/setup.php --setup-website
 ```

+### Update SQL code
+
+To update the SQL code to the leatest version run:
+
+```
+./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
+```
+
 ## 3.4.0 -> 3.5.0

 ### New Wikipedia/Wikidata importance tables
--- a/docs/admin/Setup-Nominatim-UI.md
+++ b/docs/admin/Setup-Nominatim-UI.md
@@ -10,20 +10,20 @@ installation. For more details, please also have a look at the

 ## Installing nominatim-ui

-nominatim-ui does not need any special installation, just download, configure
-and run it.
+We provide regular releases of nominatim-ui that contain the packaged website.
+They do not need any special installation. Just download, configure
+and run it. Grab the latest release from
+[nominatim-ui's Github release page](https://github.com/osm-search/nominatim-ui/releases)
+and unpack it. You can use `nominatim-ui-x.x.x.tar.gz` or `nominatim-ui-x.x.x.zip`.

-Clone the source from github:
-
-    git clone https://github.com/osm-search/nominatim-ui
-
-Copy the example configuration into the right place:
+Next you need to adapt the UI to your installation. Custom settings need to be
+put into `dist/theme/config.theme.js`. At a minimum you need to
+set `Nominatim_API_Endpoint` to point to your Nominatim installation:

    cd nominatim-ui
-    cp dist/config.example.js dist/config.js
+    echo "Nominatim_Config.Nominatim_API_Endpoint='https://myserver.org/nominatim/';" > dist/theme/config.theme.js

-Now adapt the configuration to your needs. You need at least
-to change the `Nominatim_API_Endpoint` to point to your Nominatim installation.
+For the full set of available settings, have a look at `dist/config.defaults.js`.

 Then you can just test it locally by spinning up a webserver in the `dist`
 directory. For example, with Python:
@@ -161,24 +161,16 @@ directory like this:
  # If no endpoint is given, then use search.
  RewriteRule ^(/|$)   "search.php"

-  # If format-html is explicity requested, forward to the UI.
+  # If format-html is explicitly requested, forward to the UI.
  RewriteCond %{QUERY_STRING} "format=html"
-  RewriteRule ^([^/]+).php ui/$1.html [R,END]
-  # Same but .php suffix is missing.
-  RewriteCond %{QUERY_STRING} "format=html"
-  RewriteRule ^([^/]+) ui/$1.html [R,END]
+  RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]

  # If no format parameter is there then forward anything
  # but /reverse and /lookup to the UI.
  RewriteCond %{QUERY_STRING} "!format="
  RewriteCond %{REQUEST_URI}  "!/lookup"
  RewriteCond %{REQUEST_URI}  "!/reverse"
-  RewriteRule ^([^/]+).php ui/$1.html [R,END]
-  # Same but .php suffix is missing.
-  RewriteCond %{QUERY_STRING} "!format="
-  RewriteCond %{REQUEST_URI}  "!/lookup"
-  RewriteCond %{REQUEST_URI}  "!/reverse"
-  RewriteRule ^([^/]+) ui/$1.html [R,END]
+  RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
 </Directory>
 ```

--- a/docs/admin/Update.md
+++ b/docs/admin/Update.md
@@ -10,18 +10,21 @@ For a list of other methods to add or update data see the output of
    If you have configured a flatnode file for the import, then you
    need to keep this flatnode file around for updates.

-#### Installing the newest version of Pyosmium
+### Installing the newest version of Pyosmium

-It is recommended to install Pyosmium via pip. Make sure to use python3.
+The replication process uses
+[Pyosmium](https://docs.osmcode.org/pyosmium/latest/updating_osm_data.html)
+to download update data from the server.
+It is recommended to install Pyosmium via pip.
 Run (as the same user who will later run the updates):

 ```sh
 pip3 install --user osmium
 ```

-#### Setting up the update process
+### Setting up the update process

-Next the update needs to be initialised. By default Nominatim is configured
+Next the update process needs to be initialised. By default Nominatim is configured
 to update using the global minutely diffs.

 If you want a different update source you will need to add some settings
@@ -30,9 +33,9 @@ diffs for Ireland from Geofabrik add the following:

    # base URL of the replication service
    NOMINATIM_REPLICATION_URL="https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates"
-    # How often upstream publishes diffs
+    # How often upstream publishes diffs (in seconds)
    NOMINATIM_REPLICATION_UPDATE_INTERVAL=86400
-    # How long to sleep if no update found yet
+    # How long to sleep if no update found yet (in seconds)
    NOMINATIM_REPLICATION_RECHECK_INTERVAL=900

 To set up the update process now run the following command:
@@ -45,12 +48,185 @@ what you expect.
 The `replication --init` command needs to be rerun whenever the replication
 service is changed.

-#### Updating Nominatim
+### Updating Nominatim

-The following command will keep your database constantly up to date:
+Nominatim supports different modes how to retrieve the update data from the
+server. Which one you want to use depends on your exact setup and how often you
+want to retrieve updates.
+
+These instructions are for using a single source of updates. If you have
+imported multiple country extracts and want to keep them
+up-to-date, [Advanced installations section](Advanced-Installations.md)
+contains instructions to set up and update multiple country extracts.
+
+#### One-time mode
+
+When the `--once` parameter is given, then Nominatim will download exactly one
+batch of updates and then exit. This one-time mode still respects the
+`NOMINATIM_REPLICATION_UPDATE_INTERVAL` that you have set. If according to
+the update interval no new data has been published yet, it will go to sleep
+until the next expected update and only then attempt to download the next batch.
+
+The one-time mode is particularly useful if you want to run updates continuously
+but need to schedule other work in between updates. For example, the main
+service at osm.org uses it, to regularly recompute postcodes -- a process that
+must not be run while updates are in progress. Its update script
+looks like this:
+
+```sh
+#!/bin/bash
+
+# Switch to your project directory.
+cd /srv/nominatim
+
+while true; do
+  nominatim replication --once
+  if [ -f "/srv/nominatim/schedule-maintenance" ]; then
+    rm /srv/nominatim/schedule-maintenance
+    nominatim refresh --postcodes
+  fi
+done
+```
+
+A cron job then creates the file `/srv/nominatim/schedule-maintenance` once per night.
+
+##### One-time mode with systemd
+
+You can run the one-time mode with a systemd timer & service.
+
+Create a timer description like `/etc/systemd/system/nominatim-updates.timer`:
+
+```
+[Unit]
+Description=Timer to start updates of Nominatim
+
+[Timer]
+OnActiveSec=2
+OnUnitActiveSec=1min
+Unit=nominatim-updates.service
+
+[Install]
+WantedBy=multi-user.target
+```
+
+And then a similar service definition: `/etc/systemd/system/nominatim-updates.service`:
+
+```
+[Unit]
+Description=Single updates of Nominatim
+
+[Service]
+WorkingDirectory=/srv/nominatim
+ExecStart=nominatim replication --once
+StandardOutput=append:/var/log/nominatim-updates.log
+StandardError=append:/var/log/nominatim-updates.error.log
+User=nominatim
+Group=nominatim
+Type=simple
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Replace the `WorkingDirectory` with your project directory. Also adapt user and
+group names as required. `OnUnitActiveSec` defines how often the individual
+update command is run.
+
+Now activate the service and start the updates:
+
+```
+sudo systemctl daemon-reload
+sudo systemctl enable nominatim-updates.timer
+sudo systemctl start nominatim-updates.timer
+```
+
+You can stop future data updates, while allowing any current, in-progress
+update steps to finish, by running `sudo systemctl stop
+nominatim-updates.timer` and waiting until `nominatim-updates.service` isn't
+running (`sudo systemctl is-active nominatim-updates.service`). Current output
+from the update can be seen like above (`systemctl status
+nominatim-updates.service`).
+
+
+#### Catch-up mode
+
+With the `--catch-up` parameter, Nominatim will immediately try to download
+all changes from the server until the database is up-to-date. The catch-up mode
+still respects the parameter `NOMINATIM_REPLICATION_MAX_DIFF`. It downloads and
+applies the changes in appropriate batches until all is done.
+
+The catch-up mode is foremost useful to bring the database up to speed after the
+initial import. Give that the service usually is not in production at this
+point, you can temporarily be a bit more generous with the batch size and
+number of threads you use for the updates by running catch-up like this:
+
+```
+cd /srv/nominatim
+NOMINATIM_REPLICATION_MAX_DIFF=5000 nominatim replication --catch-up --threads 15
+```
+
+The catch-up mode is also useful when you want to apply updates at a lower
+frequency than what the source publishes. You can set up a cron job to run
+replication catch-up at whatever interval you desire.
+
+!!! hint
+    When running scheduled updates with catch-up, it is a good idea to choose
+    a replication source with an update frequency that is an order of magnitude
+    lower. For example, if you want to update once a day, use an hourly updated
+    source. This makes sure that you don't miss an entire day of updates when
+    the source is unexpectedly late to publish its update.
+
+    If you want to use the source with the same update frequency (e.g. a daily
+    updated source with daily updates), use the
+    continuous update mode. It ensures to re-request the newest update until it
+    is published.
+
+
+#### Continuous updates
+
+!!! danger
+    This mode is no longer recommended to use and will removed in future
+    releases. systemd is much better
+    suited for running regular updates. Please refer to the setup
+    instructions for running one-time mode with systemd above.
+
+This is the easiest mode. Simply run the replication command without any
+parameters:

    nominatim replication

-If you have imported multiple country extracts and want to keep them
-up-to-date, [Advanced installations section](Advanced-Installations.md) contains instructions 
-to set up and update multiple country extracts.
+The update application keeps running forever and retrieves and applies
+new updates from the server as they are published.
+
+You can run this command as a simple systemd service. Create a service
+description like that in `/etc/systemd/system/nominatim-updates.service`:
+
+```
+[Unit]
+Description=Continuous updates of Nominatim
+
+[Service]
+WorkingDirectory=/srv/nominatim
+ExecStart=nominatim replication
+StandardOutput=append:/var/log/nominatim-updates.log
+StandardError=append:/var/log/nominatim-updates.error.log
+User=nominatim
+Group=nominatim
+Type=simple
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Replace the `WorkingDirectory` with your project directory. Also adapt user
+and group names as required.
+
+Now activate the service and start the updates:
+
+```
+sudo systemctl daemon-reload
+sudo systemctl enable nominatim-updates
+sudo systemctl start nominatim-updates
+```
+
+
--- a/docs/api/Details.md
+++ b/docs/api/Details.md
@@ -2,13 +2,17 @@

 Show all details about a single place saved in the database.

+This API endpoint is meant for visual inspection of the data in the database,
+mainly together with [Nominatim-UI](https://github.com/osm-search/nominatim-ui/).
+The parameters of the endpoint and the output may change occasionally between
+versions of Nominatim. Do not rely on the output in scripts or applications.
+
 !!! warning
-    The details page exists for debugging only. You may not use it in scripts
-    or to automatically query details about a result.
+    The details endpoint at https://nominatim.openstreetmap.org
+    may not used in scripts or bots at all.
    See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).


-## Parameters

 The details API supports the following two request formats:

@@ -35,59 +39,90 @@ for a place is different between Nominatim installation (servers) and
 changes when data gets reimported. Therefore it cannot be used as
 a permanent id and shouldn't be used in bug reports.

+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/details.php`. This is now deprecated
+    and will be removed in future versions.

-Additional optional parameters are explained below.
+
+## Parameters
+
+This section lists additional optional parameters.

 ### Output format

-* `json_callback=<string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |

-Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
+When set, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.

-* `pretty=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| pretty    | 0 or 1 | 0 |

-Add indentation to make it more human-readable. (Default: 0)
+`[PHP-only]` Add indentation to the output to make it more human-readable.


 ### Output details

-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |

-Include a breakdown of the address into elements. (Default: 0)
+When set to 1, include a breakdown of the address into elements.

-* `keywords=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| keywords  | 0 or 1 | 0 |

-Include a list of name keywords and address keywords (word ids). (Default: 0)
+When set to 1, include a list of name keywords and address keywords
+in the result.

-* `linkedplaces=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| linkedplaces  | 0 or 1 | 1 |

-Include a details of places that are linked with this one. Places get linked
+Include details of places that are linked with this one. Places get linked
 together when they are different forms of the same physical object. Nominatim
 links two kinds of objects together: place nodes get linked with the
 corresponding administrative boundaries. Waterway relations get linked together with their
 members.
-(Default: 1)

-* `hierarchy=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| hierarchy  | 0 or 1 | 0 |

-Include details of places lower in the address hierarchy. (Default: 0)
+Include details of places lower in the address hierarchy.

-* `group_hierarchy=[0|1]`
+`[Python-only]` will only return properly parented places. These are address
+or POI-like places that reuse the address of their parent street or place.

-For JSON output will group the places by type. (Default: 0)
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| group_hierarchy  | 0 or 1 | 0 |

-* `polygon_geojson=[0|1]`
+When set to 1, the output of the address hierarchy will be
+grouped by type.

-Include geometry of result. (Default: 0)
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+
+
+Include geometry of result.

 ### Language of results

-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |

-Preferred language order for showing result, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).


 ## Examples
--- a/docs/api/Faq.md
+++ b/docs/api/Faq.md
@@ -35,7 +35,7 @@ it contains the county/state/country across the border.
 #### 3. I get different counties/states/countries when I change the zoom parameter in the reverse query. How is that possible?

 This is basically the same problem as in the previous answer.
-The zoom level influences at which [search rank](https://wiki.openstreetmap.org/wiki/Nominatim/Development_overview#Country_to_street_level) Nominatim starts looking
+The zoom level influences at which [search rank](../customize/Ranking.md#search-rank) Nominatim starts looking
 for the closest object. So the closest house number maybe on one side of the
 border while the closest street is on the other. As the address details contain
 the address of the closest object found, you might sometimes get one result,
@@ -59,3 +59,27 @@ suited for these kinds of queries.

 That said if you installed your own Nominatim instance you can use the
 `nominatim export` PHP script as basis to return such lists.
+
+#### 7. My result has a wrong postcode. Where does it come from?
+
+Most places in OSM don't have a postcode, so Nominatim tries to interpolate
+one. It first look at all the places that make up the address of the place.
+If one of them has a postcode defined, this is the one to be used. When
+none of the address parts has a postcode either, Nominatim interpolates one
+from the surrounding objects. If the postcode is for your result is one, then
+most of the time there is an OSM object with the wrong postcode nearby.
+
+To find the bad postcode, go to
+[https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
+and search for your place. When you have found it, click on the 'details' link
+under the result to go to the details page. There is a field 'Computed Postcode'
+which should display the bad postcode. Click on the 'how?' link. A small
+explanation text appears. It contains a link to a query for Overpass Turbo.
+Click on that and you get a map with all places in the area that have the bad
+postcode. If none is displayed, zoom the map out a bit and then click on 'Run'.
+
+Now go to [OpenStreetMap](https://openstreetmap.org) and fix the error you
+have just found. It will take at least a day for Nominatim to catch up with
+your data fix. Sometimes longer, depending on how much editing activity is in
+the area.
+
--- a/docs/api/Lookup.md
+++ b/docs/api/Lookup.md
@@ -3,7 +3,7 @@
 The lookup API allows to query the address and other details of one or
 multiple OSM objects like node, way or relation.

-## Parameters
+## Endpoint

 The lookup API has the following format:

@@ -15,86 +15,140 @@ The lookup API has the following format:
 prefixed with its type, one of node(N), way(W) or relation(R). Up to 50 ids
 can be queried at the same time.

-Additional optional parameters are explained below.
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/lookup.php`. This is now deprecated
+    and will be removed in future versions.
+
+
+## Parameters
+
+This section lists additional optional parameters.

 ### Output format

-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format    | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |

-See [Place Output Formats](Output.md) for details on each format. (Default: xml)
+See [Place Output Formats](Output.md) for details on each format.

-* `json_callback=<string>`

-Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
+
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
+
 Only has an effect for JSON output formats.

+
 ### Output details

-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |

-Include a breakdown of the address into elements. (Default: 0)
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
+
+!!! tip
+    If you are interested in a stable classification of address categories
+    (suburb, city, state, etc), have a look at the `geocodejson` format.
+    All other formats return classifications according to OSM tagging.
+    There is a much larger set of categories and they are not always consistent,
+    which makes them very hard to work with.


-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |

-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.


-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |

-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.


 ### Language of results

-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
+
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
+
+!!! tip
+    First-time users of Nominatim tend to be confused that they get different
+    results when using Nominatim in the browser versus in a command-line tool
+    like wget or curl. The command-line tools
+    usually don't send any Accept-Language header, prompting Nominatim
+    to show results in the local language. Browsers on the contratry always
+    send the currently chosen browser language.

-Preferred language order for showing search results, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.

 ### Polygon output

-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml     | 0 or 1 | 0 |
+| polygon_svg     | 0 or 1 | 0 |
+| polygon_text    | 0 or 1 | 0 |

-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.

-* `polygon_threshold=0.0`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_threshold | floating-point number | 0.0 |

-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
 tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.
+

 ### Other

-* `email=<valid email address>`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| email     | valid email address | _unset_ |

 If you are making large numbers of request please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.

-* `debug=[0|1]`
+
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| debug     | 0 or 1 | 0       |

 Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.


 ## Examples

 ##### XML

-[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189)
+[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W50637691,N240109189)

 ```xml
-  <lookupresults timestamp="Mon, 29 Jun 15 18:01:33 +0000" attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright" querystring="R146656,W104393803,N240109189" polygon="false">
-    <place place_id="127761056" osm_type="relation" osm_id="146656" place_rank="16" lat="53.4791466" lon="-2.2447445" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.704893333438333">
+  <lookupresults timestamp="Mon, 28 Mar 22 14:38:54 +0000" attribution="Data &#xA9; OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright" querystring="R146656,W50637691,N240109189" more_url="">
+    <place place_id="282236157" osm_type="relation" osm_id="146656" place_rank="16" address_rank="16" boundingbox="53.3401044,53.5445923,-2.3199185,-2.1468288" lat="53.44246175" lon="-2.2324547359718547" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.35">
      <city>Manchester</city>
      <county>Greater Manchester</county>
      <state_district>North West England</state_district>
@@ -102,21 +156,20 @@ This overrides the specified machine readable format. (Default: 0)
      <country>United Kingdom</country>
      <country_code>gb</country_code>
    </place>
-    <place place_id="77769745" osm_type="way" osm_id="104393803" place_rank="30" lat="52.5162024" lon="13.3777343363579" display_name="Brandenburg Gate, 1, Pariser Platz, Mitte, Berlin, 10117, Germany" class="tourism" type="attraction" importance="0.443472858361592">
-      <attraction>Brandenburg Gate</attraction>
-      <house_number>1</house_number>
-      <pedestrian>Pariser Platz</pedestrian>
-      <suburb>Mitte</suburb>
-      <city_district>Mitte</city_district>
-      <city>Berlin</city>
-      <state>Berlin</state>
-      <postcode>10117</postcode>
+    <place place_id="115462561" osm_type="way" osm_id="50637691" place_rank="30" address_rank="30" boundingbox="52.3994612,52.3996426,13.0479574,13.0481754" lat="52.399550700000006" lon="13.048066846939687" display_name="Brandenburger Tor, Brandenburger Stra&#xDF;e, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany" class="tourism" type="attraction" importance="0.29402874005524">
+      <tourism>Brandenburger Tor</tourism>
+      <road>Brandenburger Stra&#xDF;e</road>
+      <suburb>Historische Innenstadt</suburb>
+      <city>Potsdam</city>
+      <state>Brandenburg</state>
+      <postcode>14467</postcode>
      <country>Germany</country>
      <country_code>de</country_code>
    </place>
-    <place place_id="2570600569" osm_type="node" osm_id="240109189" place_rank="15" lat="52.5170365" lon="13.3888599" display_name="Berlin, Germany" class="place" type="city" importance="0.822149797630868">
+    <place place_id="567505" osm_type="node" osm_id="240109189" place_rank="15" address_rank="16" boundingbox="52.3586925,52.6786925,13.2396024,13.5596024" lat="52.5186925" lon="13.3996024" display_name="Berlin, 10178, Germany" class="place" type="city" importance="0.78753902824914">
      <city>Berlin</city>
      <state>Berlin</state>
+      <postcode>10178</postcode>
      <country>Germany</country>
      <country_code>de</country_code>
    </place>
@@ -125,38 +178,50 @@ This overrides the specified machine readable format. (Default: 0)

 ##### JSON with extratags

-[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json)
+[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1)

 ```json
 [
-  {
-    "place_id": "84271358",
-    "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
-    "osm_type": "way",
-    "osm_id": "50637691",
-    "lat": "52.39955055",
-    "lon": "13.04806574678",
-    "display_name": "Brandenburger Tor, Brandenburger Straße, Nördliche Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
-    "class": "historic",
-    "type": "city_gate",
-    "importance": "0.221233780277011",
-    "address": {
-      "address29": "Brandenburger Tor",
-      "pedestrian": "Brandenburger Straße",
-      "suburb": "Nördliche Innenstadt",
-      "city": "Potsdam",
-      "state": "Brandenburg",
-      "postcode": "14467",
-      "country": "Germany",
-      "country_code": "de"
-    },
-    "extratags": {
-      "image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
-      "wikidata": "Q695045",
-      "wikipedia": "de:Brandenburger Tor (Potsdam)",
-      "wheelchair": "yes",
-      "description": "Kleines Brandenburger Tor in Potsdam"
-    }
-  }
+   {
+      "place_id": 115462561,
+      "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
+      "osm_type": "way",
+      "osm_id": 50637691,
+      "boundingbox": [
+        "52.3994612",
+        "52.3996426",
+        "13.0479574",
+        "13.0481754"
+      ],
+      "lat": "52.399550700000006",
+      "lon": "13.048066846939687",
+      "display_name": "Brandenburger Tor, Brandenburger Straße, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
+      "class": "tourism",
+      "type": "attraction",
+      "importance": 0.2940287400552381,
+      "address": {
+        "tourism": "Brandenburger Tor",
+        "road": "Brandenburger Straße",
+        "suburb": "Historische Innenstadt",
+        "city": "Potsdam",
+        "state": "Brandenburg",
+        "postcode": "14467",
+        "country": "Germany",
+        "country_code": "de"
+      },
+      "extratags": {
+        "image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
+        "heritage": "4",
+        "wikidata": "Q695045",
+        "architect": "Carl von Gontard;Georg Christian Unger",
+        "wikipedia": "de:Brandenburger Tor (Potsdam)",
+        "wheelchair": "yes",
+        "description": "Kleines Brandenburger Tor in Potsdam",
+        "heritage:website": "http://www.bldam-brandenburg.de/images/stories/PDF/DML%202012/04-p-internet-13.pdf",
+        "heritage:operator": "bldam",
+        "architect:wikidata": "Q68768;Q95223",
+        "year_of_construction": "1771"
+      }
+   }
 ]
 ```
--- a/docs/api/Output.md
+++ b/docs/api/Output.md
@@ -28,6 +28,7 @@ a single place (for reverse) of the following format:
      "city": "London",
      "state_district": "Greater London",
      "state": "England",
+      "ISO3166-2-lvl4": "GB-ENG",
      "postcode": "SW1A 2DU",
      "country": "United Kingdom",
      "country_code": "gb"
@@ -97,7 +98,10 @@ The GeocodeJSON format follows the
 The following feature attributes are implemented:

 * `osm_type`, `osm_id` - reference to the OSM object (unofficial extension, [see notes](#osm-reference))
- * `type` - value of the main tag of the object (e.g. residential, restaurant, ...)
+ * `type` - the 'address level' of the object ('house', 'street', `district`, `city`,
+            `county`, `state`, `country`, `locality`)
+ * `osm_key`- key of the main tag of the OSM object (e.g. boundary, highway, amenity)
+ * `osm_value` - value of the main tag of the OSM object (e.g. residential, restaurant)
 * `label` - full comma-separated address
 * `name` - localised name of the place
 * `housenumber`, `street`, `locality`, `district`, `postcode`, `city`,
@@ -126,6 +130,7 @@ formats depending on the API call.
  </result>
  <addressparts>
     <state>Bavaria</state>
+     <ISO3166-2-lvl4>DE-BY</ISO3166-2-lvl4>
     <country>Germany</country>
     <country_code>de</country_code>
  </addressparts>
@@ -179,6 +184,7 @@ Additional information requested with `addressdetails=1`, `extratags=1` and
    <city>London</city>
    <state_district>Greater London</state_district>
    <state>England</state>
+    <ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
    <postcode>SW1A 2DU</postcode>
    <country>United Kingdom</country>
    <country_code>gb</country_code>
@@ -205,8 +211,8 @@ be more than one. The attributes of that element contain:
 * `ref` - content of `ref` tag if it exists
 * `lat`, `lon` - latitude and longitude of the centroid of the object
 * `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))
- * `place_rank` - class [search rank](../develop/Ranking#search-rank)
- * `address_rank` - place [address rank](../develop/Ranking#address-rank)
+ * `place_rank` - class [search rank](../customize/Ranking.md#search-rank)
+ * `address_rank` - place [address rank](../customize/Ranking.md#address-rank)
 * `display_name` - full comma-separated address
 * `class`, `type` - key and value of the main OSM tag
 * `importance` - computed importance rank
@@ -230,7 +236,7 @@ on another server. It may even change its ID on the same server when it is
 removed and reimported while updating the database with fresh OSM data.
 It is thus not useful to treat it as permanent for later use.

-The combination `osm_type`+`osm_id` is slighly better but remember in
+The combination `osm_type`+`osm_id` is slightly better but remember in
 OpenStreetMap mappers can delete, split, recreate places (and those
 get a new `osm_id`), there is no link between those old and new ids.
 Places can also change their meaning without changing their `osm_id`,
@@ -279,17 +285,18 @@ with a designation label. Per default the following labels may appear:

 * continent
 * country, country_code
- * region, state, state_district, county
+ * region, state, state_district, county, ISO3166-2-lvl<admin_level>
 * municipality, city, town, village
 * city_district, district, borough, suburb, subdivision
 * hamlet, croft, isolated_dwelling
 * neighbourhood, allotments, quarter
- * city_block, residental, farm, farmyard, industrial, commercial, retail
+ * city_block, residential, farm, farmyard, industrial, commercial, retail
 * road
 * house_number, house_name
 * emergency, historic, military, natural, landuse, place, railway,
   man_made, aerialway, boundary, amenity, aeroway, club, craft, leisure,
   office, mountain_pass, shop, tourism, bridge, tunnel, waterway
+ * postcode

 They roughly correspond to the classification of the OpenStreetMap data
 according to either the `place` tag or the main key of the object.
--- a/docs/api/Overview.md
+++ b/docs/api/Overview.md
@@ -1,8 +1,16 @@
 ### Nominatim API

-Nominatim indexes named (or numbered) features within the OpenStreetMap (OSM) dataset and a subset of other unnamed features (pubs, hotels, churches, etc).
+!!! Attention
+    The current version of Nominatim implements two different search frontends:
+    the old PHP frontend and the new Python frontend. They have a very similar
+    API but differ in some implementation details. These are marked in the
+    documentation as `[Python-only]` or `[PHP-only]`.

-Its API has the following endpoints for querying the data:
+    `https://nominatim.openstreetmap.org` implements the **Python frontend**.
+    So users should refer to the **`[Python-only]`** comments.
+
+This section describes the API V1 of the Nominatim web service. The
+service offers the following endpoints:

 * __[/search](Search.md)__ - search OSM objects by name or type
 * __[/reverse](Reverse.md)__ - search OSM object by their location
@@ -12,3 +20,6 @@ Its API has the following endpoints for querying the data:
                    back in Nominatim in case the deletion was accidental
 * __/polygons__ - list of broken polygons detected by Nominatim
 * __[/details](Details.md)__ - show internal details for an object (for debugging only)
+
+
+
--- a/docs/api/Reverse.md
+++ b/docs/api/Reverse.md
@@ -1,6 +1,7 @@
 # Reverse Geocoding

-Reverse geocoding generates an address from a latitude and longitude.
+Reverse geocoding generates an address from a coordinate given as
+latitude and longitude.

 ## How it works

@@ -18,8 +19,7 @@ The other issue to be aware of is that the closest OSM object may not always
 have a similar enough address to the coordinate you were requesting. For
 example, in dense city areas it may belong to a completely different street.

-
-## Parameters
+## Endpoint

 The main format of the reverse API is

@@ -27,61 +27,105 @@ The main format of the reverse API is
 https://nominatim.openstreetmap.org/reverse?lat=<value>&lon=<value>&<params>
 ```

-where `lat` and `lon` are latitude and longitutde of a coordinate in WGS84
+where `lat` and `lon` are latitude and longitude of a coordinate in WGS84
 projection. The API returns exactly one result or an error when the coordinate
 is in an area with no OSM data coverage.

-Additional paramters are accepted as listed below.

-!!! warning "Deprecation warning"
+!!! danger "Deprecation warning"
    The reverse API used to allow address lookup for a single OSM object by
-    its OSM id. This use is now deprecated. Use the [Address Lookup API](../Lookup)
-    instead.
+    its OSM id for `[PHP-only]`. The use is considered deprecated.
+    Use the [Address Lookup API](Lookup.md) instead.
+
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/reverse.php`. This is now deprecated
+    and will be removed in future versions.
+
+
+## Parameters
+
+This section lists additional parameters to further influence the output.

 ### Output format

-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format    | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `xml` |

-See [Place Output Formats](Output.md) for details on each format. (Default: xml)
+See [Place Output Formats](Output.md) for details on each format.

-* `json_callback=<string>`

-Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
+
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
+
 Only has an effect for JSON output formats.

+
 ### Output details

-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 1 |

-Include a breakdown of the address into elements. (Default: 1)
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
+
+!!! tip
+    If you are interested in a stable classification of address categories
+    (suburb, city, state, etc), have a look at the `geocodejson` format.
+    All other formats return classifications according to OSM tagging.
+    There is a much larger set of categories and they are not always consistent,
+    which makes them very hard to work with.


-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |

-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.


-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |

-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.


 ### Language of results

-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |

-Preferred language order for showing search results, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).

-### Result limitation
+!!! tip
+    First-time users of Nominatim tend to be confused that they get different
+    results when using Nominatim in the browser versus in a command-line tool
+    like wget or curl. The command-line tools
+    usually don't send any Accept-Language header, prompting Nominatim
+    to show results in the local language. Browsers on the contratry always
+    send the currently chosen browser language.

-* `zoom=[0-18]`

-Level of detail required for the address. Default: 18. This is a number that
+### Result restriction
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| zoom      | 0-18  | 18      |
+
+Level of detail required for the address. This is a number that
 corresponds roughly to the zoom level used in XYZ tile sources in frameworks
 like Leaflet.js, Openlayers etc.
 In terms of address details the zoom levels are as follows:
@@ -92,41 +136,79 @@ In terms of address details the zoom levels are as follows:
  5   | state
  8   | county
  10  | city
-  14  | suburb
+  12  | town / borough
+  13  | village / suburb
+  14  | neighbourhood
+  15  | any settlement
  16  | major streets
  17  | major and minor streets
  18  | building


+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| layer     | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
+
+The layer filter allows to select places by themes.
+
+The `address` layer contains all places that make up an address:
+address points with house numbers, streets, inhabited places (suburbs, villages,
+cities, states etc.) and administrative boundaries.
+
+The `poi` layer selects all point of interest. This includes classic points
+of interest like restaurants, shops, hotels but also less obvious features
+like recycling bins, guideposts or benches.
+
+The `railway` layer includes railway infrastructure like tracks.
+Note that in Nominatim's standard configuration, only very few railway
+features are imported into the database.
+
+The `natural` layer collects feautures like rivers, lakes and mountains while
+the `manmade` layer functions as a catch-all for features not covered by the
+other layers.
+
+
 ### Polygon output

-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml     | 0 or 1 | 0 |
+| polygon_svg     | 0 or 1 | 0 |
+| polygon_text    | 0 or 1 | 0 |

-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.

-* `polygon_threshold=0.0`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_threshold | floating-point number | 0.0 |

-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
 tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.
+

 ### Other

-* `email=<valid email address>`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| email     | valid email address | _unset_ |

 If you are making large numbers of request please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.


-* `debug=[0|1]`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| debug     | 0 or 1 | 0       |

 Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.


 ## Examples
--- a/docs/api/Search.md
+++ b/docs/api/Search.md
@@ -8,12 +8,12 @@ The search query may also contain
 which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
 This can be used to narrow down the kind of objects to be returned.

-!!! warning
+!!! note
    Special phrases are not suitable to query all objects of a certain type in an
    area. Nominatim will always just return a collection of the best matches. To
    download OSM data by object type, use the [Overpass API](https://overpass-api.de/).

-## Parameters
+## Endpoint

 The search API has the following format:

@@ -21,82 +21,211 @@ The search API has the following format:
   https://nominatim.openstreetmap.org/search?<params>
 ```

-The search term may be specified with two different sets of parameters:
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/search.php`. This is now deprecated
+    and will be removed in future versions.

-* `q=<query>`
+The query term can be given in two different forms: free-form or structured.

-    Free-form query string to search for.
-    Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
-    [pilkington avenue, birmingham](//nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
-    [birmingham, pilkington avenue](//nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
-    Commas are optional, but improve performance by reducing the complexity of the search.
+### Free-form query

+| Parameter | Value |
+|-----------| ----- |
+| q         | Free-form query string to search for |

-* `street=<housenumber> <streetname>`
-* `city=<city>`
-* `county=<county>`
-* `state=<state>`
-* `country=<country>`
-* `postalcode=<postalcode>`
+In this form, the query can be unstructured.
+Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
+[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
+[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
+Commas are optional, but improve performance by reducing the complexity of the search.

-    Alternative query string format split into several parameters for structured requests.
-    Structured requests are faster but are less robust against alternative
-    OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
+The free-form may also contain special phrases to describe the type of
+place to be returned or a coordinate to search close to a position.

-Both query forms accept the additional parameters listed below.
+### Structured query
+
+| Parameter  | Value |
+|----------- | ----- |
+| amenity    | name and/or type of POI |
+| street     | housenumber and streetname |
+| city       | city |
+| county     | county |
+| state      | state |
+| country    | country |
+| postalcode | postal code |
+
+The structured form of the search query allows to lookup up an address
+that is already split into its components. Each parameter represents a field
+of the address. All parameters are optional. You should only use the ones
+that are relevant for the address you want to geocode.
+
+!!! Attention
+    Cannot be combined with the `q=<query>` parameter. Newer versions of
+    the API will return an error if you do so. Older versions simply return
+    unexpected results.
+
+## Parameters
+
+The following parameters can be used to further restrict the search and
+change the output. They are usable for both forms of the search query.

 ### Output format

-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format    | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |

-See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
+See [Place Output Formats](Output.md) for details on each format.

-* `json_callback=<string>`
+!!! note
+    The Nominatim service at
+    [https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
+    has a different default behaviour for historical reasons. When the
+    `format` parameter is omitted, the request will be forwarded to the Web UI.
+
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
+
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.

-Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
 Only has an effect for JSON output formats.

+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| limit     | number | 10 |
+
+Limit the maximum number of returned results. Cannot be more than 40.
+Nominatim may decide to return less results than given, if additional
+results do not sufficiently match the query.
+
+
 ### Output details

-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |

-Include a breakdown of the address into elements. (Default: 0)
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
+
+!!! tip
+    If you are interested in a stable classification of address categories
+    (suburb, city, state, etc), have a look at the `geocodejson` format.
+    All other formats return classifications according to OSM tagging.
+    There is a much larger set of categories and they are not always consistent,
+    which makes them very hard to work with.


-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |

-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.


-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |

-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.


 ### Language of results

-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |

-Preferred language order for showing search results, overrides the value
-specified in the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).

-### Result limitation
+!!! tip
+    First-time users of Nominatim tend to be confused that they get different
+    results when using Nominatim in the browser versus in a command-line tool
+    like wget or curl. The command-line tools
+    usually don't send any Accept-Language header, prompting Nominatim
+    to show results in the local language. Browsers on the contratry always
+    send the currently chosen browser language.

-* `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`
+### Result restriction

-Limit search results to one or more countries. `<countrycode>` must be the
-[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
-e.g. `gb` for the United Kingdom, `de` for Germany.
+There are two ways to influence the results. *Filters* exclude certain
+kinds of results completely. *Boost parameters* only change the order of the
+results and thus give a preference to some results over others.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| countrycodes | comma-separated list of country codes | _unset_ |
+
+Filer that limits the search results to one or more countries.
+The country code must be the
+[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
+of the country, e.g. `gb` for the United Kingdom, `de` for Germany.

 Each place in Nominatim is assigned to one country code based
 on OSM country boundaries. In rare cases a place may not be in any country
-at all, for example, in international waters.
+at all, for example, when it is in international waters. These places are
+also excluded when the filter is set.

-* `exclude_place_ids=<place_id,[place_id],[place_id]`
+!!! Note
+    This parameter should not be confused with the 'country' parameter of
+    the structured query. The 'country' parameter contains a search term
+    and will be handled with some fuzziness. The `countrycodes` parameter
+    is a hard filter and as such should be prefered. Having both parameters
+    in the same query will work. If the parameters contradict each other,
+    the search will come up empty.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| layer     | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
+
+The layer filter allows to select places by themes.
+
+The `address` layer contains all places that make up an address:
+address points with house numbers, streets, inhabited places (suburbs, villages,
+cities, states tec.) and administrative boundaries.
+
+The `poi` layer selects all point of interest. This includes classic POIs like
+restaurants, shops, hotels but also less obvious features like recycling bins,
+guideposts or benches.
+
+The `railway` layer includes railway infrastructure like tracks.
+Note that in Nominatim's standard configuration, only very few railway
+features are imported into the database.
+
+The `natural` layer collects feautures like rivers, lakes and mountains while
+the `manmade` layer functions as a catch-all for features not covered by the
+other layers.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| featureType | one of: `country`, `state`, `city`, `settlement` | _unset_ |
+
+The featureType allows to have a more fine-grained selection for places
+from the address layer. Results can be restricted to places that make up
+the 'state', 'country' or 'city' part of an address. A featureType of
+settlement selects any human inhabited feature from 'state' down to
+'neighbourhood'.
+
+When featureType ist set, then results are automatically restricted
+to the address layer (see above).
+
+!!! tip
+    Instead of using the featureType filters `country`, `state` or `city`,
+    you can also use a structured query without the finer-grained parameters
+    amenity or street.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| exclude_place_ids | comma-separeted list of place ids |

 If you do not want certain OSM objects to appear in the search
 result, give a comma separated list of the `place_id`s you want to skip.
@@ -104,180 +233,212 @@ This can be used to retrieve additional search results. For example, if a
 previous query only returned a few results, then including those here would
 cause the search to return other, less accurate, matches (if possible).

+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| viewbox   | `<x1>,<y1>,<x2>,<y2>` | _unset_ |

-* `limit=<integer>`
+Boost parameter which focuses the search on the given area.
+Any two corner points of the box are accepted as long as they make a proper
+box. `x` is longitude, `y` is latitude.

-Limit the number of returned results. (Default: 10, Maximum: 50)
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| bounded   | 0 or 1 | 0       |

+When set to 1, then it turns the 'viewbox' parameter (see above) into
+a filter paramter, excluding any results outside the viewbox.

-* `viewbox=<x1>,<y1>,<x2>,<y2>`
-
-The preferred area to find search results. Any two corner points of the box
-are accepted as long as they span a real box. `x` is longitude,
-`y` is latitude.
-
-
-* `bounded=[0|1]`
-
-When a viewbox is given, restrict the result to items contained within that
-viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
-only search is allowed. Give the special keyword for the amenity in square
+When `bounded=1` is given and the viewbox is small enough, then an amenity-only
+search is allowed. Give the special keyword for the amenity in square
 brackets, e.g. `[pub]` and a selection of objects of this type is returned.
-There is no guarantee that the result is complete. (Default: 0)
+There is no guarantee that the result returns all objects in the area.


 ### Polygon output

-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml     | 0 or 1 | 0 |
+| polygon_svg     | 0 or 1 | 0 |
+| polygon_text    | 0 or 1 | 0 |

-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.

-* `polygon_threshold=0.0`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_threshold | floating-point number | 0.0 |

-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
 tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.

 ### Other

-* `email=<valid email address>`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| email     | valid email address | _unset_ |

 If you are making large numbers of request please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.

-* `dedupe=[0|1]`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| dedupe    | 0 or 1 | 1       |

 Sometimes you have several objects in OSM identifying the same place or
 object in reality. The simplest case is a street being split into many
 different OSM ways due to different characteristics. Nominatim will
-attempt to detect such duplicates and only return one match unless
-this parameter is set to 0. (Default: 1)
+attempt to detect such duplicates and only return one match. Setting
+this parameter to 0 disables this deduplication mechanism and
+ensures that all results are returned.

-* `debug=[0|1]`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| debug     | 0 or 1 | 0       |

 Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
-
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.


 ## Examples


-##### XML with kml polygon
+##### XML with KML polygon

-* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)
+* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1)

 ```xml
-  <searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
-    <place
-      place_id="1620612" osm_type="node" osm_id="452010817"
-      boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
-      lat="52.5487429714954" lon="-1.81602098644987"
-      display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
-      class="place" type="house">
-      <geokml>
-        <Polygon>
-          <outerBoundaryIs>
-            <LinearRing>
-              <coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
-            </LinearRing>
-          </outerBoundaryIs>
-        </Polygon>
-      </geokml>
-      <house_number>135</house_number>
-      <road>Pilkington Avenue</road>
-      <village>Wylde Green</village>
-      <town>Sutton Coldfield</town>
-      <city>City of Birmingham</city>
-      <county>West Midlands (county)</county>
-      <postcode>B72</postcode>
-      <country>United Kingdom</country>
-      <country_code>gb</country_code>
-    </place>
-  </searchresults>
+<?xml version="1.0" encoding="UTF-8" ?>
+<searchresults timestamp="Tue, 08 Aug 2023 15:45:41 +00:00"
+               attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright"
+               querystring="135 pilkington avenue, birmingham"
+               more_url="https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue%2C+birmingham&amp;polygon_kml=1&amp;addressdetails=1&amp;limit=20&amp;exclude_place_ids=125279639&amp;format=xml"
+               exclude_place_ids="125279639">
+  <place place_id="125279639"
+         osm_type="way"
+         osm_id="90394480"
+         lat="52.5487921"
+         lon="-1.8164308"
+         boundingbox="52.5487473,52.5488481,-1.8165130,-1.8163464"
+         place_rank="30"
+         address_rank="30"
+         display_name="135, Pilkington Avenue, Maney, Sutton Coldfield, Wylde Green, Birmingham, West Midlands Combined Authority, England, B72 1LH, United Kingdom"
+         class="building"
+         type="residential"
+         importance="9.999999994736442e-08">
+    <geokml>
+      <Polygon>
+        <outerBoundaryIs>
+          <LinearRing>
+            <coordinates>-1.816513,52.5487566 -1.816434,52.5487473 -1.816429,52.5487629 -1.8163717,52.5487561 -1.8163464,52.5488346 -1.8164599,52.5488481 -1.8164685,52.5488213 -1.8164913,52.548824 -1.816513,52.5487566</coordinates>
+          </LinearRing>
+        </outerBoundaryIs>
+      </Polygon>
+    </geokml>
+    <house_number>135</house_number>
+    <road>Pilkington Avenue</road>
+    <hamlet>Maney</hamlet>
+    <town>Sutton Coldfield</town>
+    <village>Wylde Green</village>
+    <city>Birmingham</city>
+    <ISO3166-2-lvl8>GB-BIR</ISO3166-2-lvl8>
+    <state_district>West Midlands Combined Authority</state_district>
+    <state>England</state>
+    <ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
+    <postcode>B72 1LH</postcode>
+    <country>United Kingdom</country>
+    <country_code>gb</country_code>
+  </place>
+</searchresults>
 ```

 ##### JSON with SVG polygon

-[https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1)
+[https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1)

 ```json
-    {
-        "address": {
-            "city": "Berlin",
-            "city_district": "Mitte",
-            "construction": "Unter den Linden",
-            "continent": "European Union",
-            "country": "Deutschland",
-            "country_code": "de",
-            "house_number": "1",
-            "neighbourhood": "Scheunenviertel",
-            "postcode": "10117",
-            "public_building": "Kommandantenhaus",
-            "state": "Berlin",
-            "suburb": "Mitte"
-        },
-        "boundingbox": [
-            "52.5170783996582",
-            "52.5173187255859",
-            "13.3975105285645",
-            "13.3981599807739"
-        ],
-        "class": "amenity",
-        "display_name": "Kommandantenhaus, 1, Unter den Linden, Scheunenviertel, Mitte, Berlin, 10117, Deutschland, European Union",
-        "importance": 0.73606775332943,
-        "lat": "52.51719785",
-        "licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
-        "lon": "13.3978352028938",
-        "osm_id": "15976890",
-        "osm_type": "way",
-        "place_id": "30848715",
-        "svg": "M 13.397511 -52.517283599999999 L 13.397829400000001 -52.517299800000004 13.398131599999999 -52.517315099999998 13.398159400000001 -52.517112099999999 13.3975388 -52.517080700000001 Z",
-        "type": "public_building"
-    }
+[
+  {
+    "address": {
+      "ISO3166-2-lvl4": "DE-BE",
+      "borough": "Mitte",
+      "city": "Berlin",
+      "country": "Deutschland",
+      "country_code": "de",
+      "historic": "Kommandantenhaus",
+      "house_number": "1",
+      "neighbourhood": "Friedrichswerder",
+      "postcode": "10117",
+      "road": "Unter den Linden",
+      "suburb": "Mitte"
+    },
+    "boundingbox": [
+      "52.5170798",
+      "52.5173311",
+      "13.3975116",
+      "13.3981577"
+    ],
+    "class": "historic",
+    "display_name": "Kommandantenhaus, 1, Unter den Linden, Friedrichswerder, Mitte, Berlin, 10117, Deutschland",
+    "importance": 0.8135042058306902,
+    "lat": "52.51720765",
+    "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
+    "lon": "13.397834399325466",
+    "osm_id": 15976890,
+    "osm_type": "way",
+    "place_id": 108681845,
+    "svg": "M 13.3975116 -52.5172905 L 13.397549 -52.5170798 13.397715 -52.5170906 13.3977122 -52.5171064 13.3977392 -52.5171086 13.3977417 -52.5170924 13.3979655 -52.5171069 13.3979623 -52.5171233 13.3979893 -52.5171248 13.3979922 -52.5171093 13.3981577 -52.5171203 13.398121 -52.5173311 13.3978115 -52.5173103 Z",
+    "type": "house"
+  }
+]
 ```

 ##### JSON with address details

-[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
+[https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1](https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1)

 ```json
-    {
-        "address": {
-            "bakery": "B\u00e4cker Kamps",
-            "city_district": "Mitte",
-            "continent": "European Union",
-            "country": "Deutschland",
-            "country_code": "de",
-            "footway": "Bahnsteig U6",
-            "neighbourhood": "Sprengelkiez",
-            "postcode": "13353",
-            "state": "Berlin",
-            "suburb": "Wedding"
-        },
-        "boundingbox": [
-            "52.5460929870605",
-            "52.5460968017578",
-            "13.3591794967651",
-            "13.3591804504395"
-        ],
-        "class": "shop",
-        "display_name": "B\u00e4cker Kamps, Bahnsteig U6, Sprengelkiez, Wedding, Mitte, Berlin, 13353, Deutschland, European Union",
-        "icon": "https://nominatim.openstreetmap.org/images/mapicons/shopping_bakery.p.20.png",
-        "importance": 0.201,
-        "lat": "52.5460941",
-        "licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
-        "lon": "13.35918",
-        "osm_id": "317179427",
-        "osm_type": "node",
-        "place_id": "1453068",
-        "type": "bakery"
-    }
+[
+  {
+    "address": {
+      "ISO3166-2-lvl4": "DE-BE",
+      "borough": "Mitte",
+      "city": "Berlin",
+      "country": "Deutschland",
+      "country_code": "de",
+      "neighbourhood": "Sprengelkiez",
+      "postcode": "13347",
+      "road": "Lindower Straße",
+      "shop": "Ditsch",
+      "suburb": "Wedding"
+    },
+    "addresstype": "shop",
+    "boundingbox": [
+      "52.5427201",
+      "52.5427654",
+      "13.3668619",
+      "13.3669442"
+    ],
+    "category": "shop",
+    "display_name": "Ditsch, Lindower Straße, Sprengelkiez, Wedding, Mitte, Berlin, 13347, Deutschland",
+    "importance": 9.99999999995449e-06,
+    "lat": "52.54274275",
+    "licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright",
+    "lon": "13.36690305710228",
+    "name": "Ditsch",
+    "osm_id": 437595031,
+    "osm_type": "way",
+    "place_id": 204751033,
+    "place_rank": 30,
+    "type": "bakery"
+  }
+]
 ```

 ##### GeoJSON
--- a/docs/api/Status.md
+++ b/docs/api/Status.md
@@ -1,35 +1,50 @@
 # Status

-Useful for checking if the service and database is running. The JSON output also shows
+Report on the state of the service and database. Useful for checking if the
+service is up and running. The JSON output also reports
 when the database was last updated.

+## Endpoint
+
+The status API has the following format:
+
+```
+https://nominatim.openstreetmap.org/status
+```
+
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/status.php`. This is now deprecated
+    and will be removed in future versions.
+
+
 ## Parameters

-* `format=[text|json]` (defaults to 'text')
+The status endpoint takes a single optional parameter:
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format    | one of: `text`, `json` | 'text' |
+
+Selects the output format. See below.


 ## Output

 #### Text format

-```
-   https://nominatim.openstreetmap.org/status.php
-```
+When everything is okay, a status code 200 is returned and a simple message: `OK`

-will return HTTP status code 200 and print `OK`.
-
-On error it will return HTTP status code 500 and print a message, e.g.
+On error it will return HTTP status code 500 and print a detailed error message, e.g.
 `ERROR: Database connection failed`.



 #### JSON format

-```
-   https://nominatim.openstreetmap.org/status.php?format=json
-```
+Always returns a HTTP code 200, when the status call could be executed.

-will return HTTP code 200 and a structure
+On success a JSON dictionary with the following structure is returned:

 ```json
  {
@@ -45,8 +60,8 @@ The `software_version` field contains the version of Nominatim used to serve
 the API. The `database_version` field contains the version of the data format
 in the database.

-On error will also return HTTP status code 200 and a structure with error
-code and message, e.g.
+On error will return a shorter JSON dictionary with the error message
+and status only, e.g.

 ```json
   {
@@ -54,13 +69,3 @@ code and message, e.g.
       "message": "Database connection failed"
   }
 ```
-
-Possible status codes are
-
-   |     | message              | notes                                             |
-   |-----|----------------------|---------------------------------------------------|
-   | 700 | "No database"        | connection failed                                 |
-   | 701 | "Module failed"      | database could not load nominatim.so              |
-   | 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
-   | 703 | "Query failed"       | test query against a database table failed        |
-   | 704 | "No value"           | test query worked but returned no results         |
--- a/docs/customize/Country-Settings.md
+++ b/docs/customize/Country-Settings.md
@@ -0,0 +1,149 @@
+# Customizing Per-Country Data
+
+Whenever an OSM is imported into Nominatim, the object is first assigned
+a country. Nominatim can use this information to adapt various aspects of
+the address computation to the local customs of the country. This section
+explains how country assignment works and the principal per-country
+localizations.
+
+## Country assignment
+
+Countries are assigned on the basis of country data from the OpenStreetMap
+input data itself. Countries are expected to be tagged according to the
+[administrative boundary schema](https://wiki.openstreetmap.org/wiki/Tag:boundary%3Dadministrative):
+a OSM relation with `boundary=administrative` and `admin_level=2`. Nominatim
+uses the country code to distinguish the countries.
+
+If there is no country data available for a point, then Nominatim uses the
+fallback data imported from `data/country_osm_grid.sql.gz`. This was computed
+from OSM data as well but is guaranteed to cover all countries.
+
+Some OSM objects may also be located outside any country, for example a buoy
+in the middle of the ocean. These object do not get any country assigned and
+get a default treatment when it comes to localized handling of data.
+
+## Per-country settings
+
+### Global country settings
+
+The main place to configure settings per country is the file
+`settings/country_settings.yaml`. This file has one section per country that
+is recognised by Nominatim. Each section is tagged with the country code
+(in lower case) and contains the different localization information. Only
+countries which are listed in this file are taken into account for computations.
+
+For example, the section for Andorra looks like this:
+
+```
+    partition: 35
+    languages: ca
+    names: !include country-names/ad.yaml
+    postcode:
+      pattern: "(ddd)"
+      output: AD\1
+```
+
+The individual settings are described below.
+
+#### `partition`
+
+Nominatim internally splits the data into multiple tables to improve
+performance. The partition number tells Nominatim into which table to put
+the country. This is purely internal management and has no effect on the
+output data.
+
+The default is to have one partition per country.
+
+#### `languages`
+
+A comma-separated list of ISO-639 language codes of default languages in the
+country. These are the languages used in name tags without a language suffix.
+Note that this is not necessarily the same as the list of official languages
+in the country. There may be officially recognised languages in a country
+which are only ever used in name tags with the appropriate language suffixes.
+Conversely, a non-official language may appear a lot in the name tags, for
+example when used as an unofficial Lingua Franca.
+
+List the languages in order of frequency of appearance with the most frequently
+used language first. It is not recommended to add languages when there are only
+very few occurrences.
+
+If only one language is listed, then Nominatim will 'auto-complete' the
+language of names without an explicit language-suffix.
+
+#### `names`
+
+List of names of the country and its translations. These names are used as
+a baseline. It is always possible to search countries by the given names, no
+matter what other names are in the OSM data. They are also used as a fallback
+when a needed translation is not available.
+
+!!! Note
+    The list of names per country is currently fairly large because Nominatim
+    supports translations in many languages per default. That is why the
+    name lists have been separated out into extra files. You can find the
+    name lists in the file `settings/country-names/<country code>.yaml`.
+    The names section in the main country settings file only refers to these
+    files via the special `!include` directive.
+
+#### `postcode`
+
+Describes the format of the postcode that is in use in the country.
+
+When a country has no official postcodes, set this to no. Example:
+
+```
+ae:
+    postcode: no
+```
+
+When a country has a postcode, you need to state the postcode pattern and
+the default output format. Example:
+
+```
+bm:
+    postcode:
+      pattern: "(ll)[ -]?(dd)"
+      output: \1 \2
+```
+
+The **pattern** is a regular expression that describes the possible formats
+accepted as a postcode. The pattern follows the standard syntax for
+[regular expressions in Python](https://docs.python.org/3/library/re.html#regular-expression-syntax)
+with two extra shortcuts: `d` is a shortcut for a single digit([0-9])
+and `l` for a single ASCII letter ([A-Z]).
+
+Use match groups to indicate groups in the postcode that may optionally be
+separated with a space or a hyphen.
+
+For example, the postcode for Bermuda above always consists of two letters
+and two digits. They may optionally be separated by a space or hyphen. That
+means that Nominatim will consider `AB56`, `AB 56` and `AB-56` spelling variants
+for one and the same postcode.
+
+Never add the country code in front of the postcode pattern. Nominatim will
+automatically accept variants with a country code prefix for all postcodes.
+
+The **output** field is an optional field that describes what the canonical
+spelling of the postcode should be. The format is the
+[regular expression expand syntax](https://docs.python.org/3/library/re.html#re.Match.expand) referring back to the bracket groups in the pattern.
+
+Most simple postcodes only have one spelling variant. In that case, the
+**output** can be omitted. The postcode will simply be used as is.
+
+In the Bermuda example above, the canonical spelling would be to have a space
+between letters and digits.
+
+!!! Warning
+    When your postcode pattern covers multiple variants of the postcode, then
+    you must explicitly state the canonical output or Nominatim will not
+    handle the variations correctly.
+
+### Other country-specific configuration
+
+There are some other configuration files where you can set localized settings
+according to the assigned country. These are:
+
+ * [Place ranking configuration](Ranking.md)
+
+Please see the linked documentation sections for more information.
--- a/docs/customize/Import-Styles.md
+++ b/docs/customize/Import-Styles.md
@@ -0,0 +1,443 @@
+## Configuring the Import
+
+In the very first step of a Nominatim import, OSM data is loaded into the
+database. Nominatim uses [osm2pgsql](https://osm2pgsql.org) for this task.
+It comes with a [flex style](https://osm2pgsql.org/doc/manual.html#the-flex-output)
+specifically tailored to filter and convert OSM data into Nominatim's
+internal data representation.
+
+There are a number of default configurations for the flex style which
+result in geocoding databases of different detail. The
+[Import section](../admin/Import.md#filtering-imported-data) explains
+these default configurations in detail.
+
+You can also create your own custom style. Put the style file into your
+project directory and then set `NOMINATIM_IMPORT_STYLE` to the name of the file.
+It is always recommended to start with one of the standard styles and customize
+those. You find the standard styles under the name `import-<stylename>.lua`
+in the standard Nominatim configuration path (usually `/etc/nominatim` or
+`/usr/local/etc/nominatim`).
+
+The remainder of the page describes how the flex style works and how to
+customize it.
+
+### The `flex-base.lua` module
+
+The core of Nominatim's flex import configuration is the `flex-base` module.
+It defines the table layout used by Nominatim and provides standard
+implementations for the import callbacks that make it easy to customize
+how OSM tags are used by Nominatim.
+
+Every custom style should include this module to make sure that the correct
+tables are created. Thus start your custom style as follows:
+
+``` lua
+local flex = require('flex-base')
+
+```
+
+The following sections explain how the module can be customized.
+
+
+### Changing the recognized tags
+
+If you just want to change which OSM tags are recognized during import,
+then there are a number of convenience functions to set the tag lists used
+during the processing.
+
+!!! warning
+    There are no built-in defaults for the tag lists, so all the functions
+    need to be called from your style script to fully process the data.
+    Make sure you start from one of the default style and only modify
+    the data you are interested in. You can also derive your style from an
+    existing style by importing the appropriate module, e.g.
+    `local flex = require('import-street')`.
+
+Many of the following functions take _key match lists_. These lists can
+contain three kinds of strings to match against tag keys:
+A string that ends in an asterisk `*` is a prefix match and accordingly matches
+against any key that starts with the given string (minus the `*`). 
+A suffix match can be defined similarly with a string that starts with a `*`.
+Any other string is matched exactly against tag keys.
+
+
+#### `set_main_tags()` - principal tags
+
+If a principal or main tag is found on an OSM object, then the object
+is included in Nominatim's search index. A single object may also have
+multiple main tags. In that case, the object will be included multiple
+times in the index, once for each main tag.
+
+The flex script distinguishes between four types of main tags:
+
+* __always__: a main tag that is used unconditionally
+* __named__: consider this main tag only, if the object has a proper name
+  (a reference is not enough, see below).
+* __named_with_key__: consider this main tag only, when the object has
+  a proper name with a domain prefix. For example, if the main tag is
+  `bridge=yes`, then it will only be added as an extra row, if there is
+  a tag `bridge:name[:XXX]` for the same object. If this property is set,
+  all other names that are not domain-specific are ignored.
+* __fallback__: use this main tag only, if there is no other main tag.
+  Fallback always implied `named`, i.e. fallbacks are only tried for
+  named objects.
+
+The `set_main_tags()` function takes exactly one table parameter which
+defines the keys and key/value combinations to include and the kind of
+main tag. Each lua table key defines an OSM tag key. The value may
+be a string defining the kind of main key as described above. Then the tag will
+be considered a main tag for any possible value. To further restrict
+which values are acceptable, give a table with the permitted values
+and their kind of main tag. If the table contains a simple value without
+key, then this is used as default for values that are not listed.
+
+!!! example
+    ``` lua
+    local flex = require('import-full')
+
+    flex.set_main_tags{
+        boundary = {administrative = 'named'},
+        highway = {'always', street_lamp = 'named'},
+        landuse = 'fallback'
+    }
+    ```
+
+    In this example an object with a `boundary` tag will only be included
+    when it has a value of `administrative`. Objects with `highway` tags are
+    always included. However when the value is `street_lamp` then the object
+    must have a name, too. With any other value, the object is included
+    independently of the name. Finally, if a `landuse` tag is present then
+    it will be used independely of the concrete value if neither boundary
+    nor highway tags were found and the object is named.
+
+
+#### `set_prefilters()` - ignoring tags
+
+Pre-filtering of tags allows to ignore them for any further processing.
+Thus pre-filtering takes precedence over any other tag processing. This is
+useful when some specific key/value combinations need to be excluded from
+processing. When tags are filtered, they may either be deleted completely
+or moved to `extratags`. Extra tags are saved with the object and returned
+to the user when requested, but are not used otherwise.
+
+`set_prefilters()` takes a table with four optional fields:
+
+* __delete_keys__ is a _key match list_ for tags that should be deleted
+* __delete_tags__ contains a table of tag keys pointing to a list of tag
+  values. Tags with matching key/value pairs are deleted.
+* __extra_keys__ is a _key match list_ for tags which should be saved into
+  extratags
+* __extra_tags__ contains a table of tag keys pointing to a list of tag
+  values. Tags with matching key/value pairs are moved to extratags.
+
+Key list may contain three kinds of strings:
+A string that ends in an asterisk `*` is a prefix match and accordingly matches
+against any key that starts with the given string (minus the `*`). 
+A suffix match can be defined similarly with a string that starts with a `*`.
+Any other string is matched exactly against tag keys.
+
+!!! example
+    ``` lua
+    local flex = require('import-full')
+
+    flex.set_prefilters{
+        delete_keys = {'source', 'source:*'},
+        extra_tags = {amenity = {'yes', 'no'}}
+    }
+    flex.set_main_tags{
+        amenity = 'always'
+    }
+    ```
+
+    In this example any tags `source` and tags that begin with `source:`  are
+    deleted before any other processing is done. Getting rid of frequent tags
+    this way can speed up the import.
+
+    Tags with `amenity=yes` or `amenity=no` are moved to extratags. Later
+    all tags with an `amenity` key are made a main tag. This effectively means
+    that Nominatim will use all amenity tags except for those with value
+    yes and no.
+
+#### `set_name_tags()` - defining names
+
+The flex script distinguishes between two kinds of names:
+
+* __main__: the primary names make an object fully searchable.
+  Main tags of type _named_ will only cause the object to be included when
+  such a primary name is present. Primary names are usually those found
+  in the `name` tag and its variants.
+* __extra__: extra names are still added to the search index but they are
+  alone not sufficient to make an object named.
+
+`set_name_tags()` takes a table with two optional fields `main` and `extra`.
+They take _key match lists_ for main and extra names respectively.
+
+!!! example
+    ``` lua
+    local flex = require('flex-base')
+
+    flex.set_main_tags{highway = {traffic_light = 'named'}}
+    flex.set_name_tags{main = {'name', 'name:*'},
+                       extra = {'ref'}
+                      }
+    ```
+
+    This example creates a search index over traffic lights but will
+    only include those that have a common name and not those which just
+    have some reference ID from the city.
+
+#### `set_address_tags()` - defining address parts
+
+Address tags will be used to build up the address of an object.
+
+`set_address_tags()` takes a table with arbitrary fields pointing to
+_key match lists_. To fields have a special meaning:
+
+* __main__: defines
+the tags that make a full address object out of the OSM object. This
+is usually the housenumber or variants thereof. If a main address tag
+appears, then the object will always be included, if necessary with a
+fallback of `place=house`. If the key has a prefix of `addr:` or `is_in:`
+this will be stripped.
+
+* __extra__: defines all supplementary tags for addresses, tags like `addr:street`, `addr:city` etc. If the key has a prefix of `addr:` or `is_in:` this will be stripped.
+
+All other fields will be handled as summary fields. If a key matches the
+key match list, then its value will be added to the address tags with the
+name of the field as key. If multiple tags match, then an arbitrary one
+wins.
+
+Country tags are handled slightly special. Only tags with a two-letter code
+are accepted, all other values are discarded.
+
+!!! example
+    ``` lua
+    local flex = require('import-full')
+
+    flex.set_address_tags{
+        main = {'addr:housenumber'},
+        extra = {'addr:*'},
+        postcode = {'postal_code', 'postcode', 'addr:postcode'},
+        country = {'country-code', 'ISO3166-1'}
+    }
+    ```
+
+    In this example all tags which begin with `addr:` will be saved in
+    the address tag list. If one of the tags is `addr:housenumber`, the
+    object will fall back to be entered as a `place=house` in the database
+    unless there is another interested main tag to be found.
+
+    Tags with keys `country-code` and `ISO3166-1` are saved with their
+    value under `country` in the address tag list. The same thing happens
+    to postcodes, they will always be saved under the key `postcode` thus
+    normalizing the multitude of keys that are used in the OSM database.
+
+
+#### `set_unused_handling()` - processing remaining tags
+
+This function defines what to do with tags that remain after all tags
+have been classified using the functions above. There are two ways in
+which the function can be used:
+
+`set_unused_handling(delete_keys = ..., delete_tags = ...)` deletes all
+keys that match the descriptions in the parameters and moves all remaining
+tags into the extratags list.
+`set_unused_handling(extra_keys = ..., extra_tags = ...)` moves all tags
+matching the parameters into the extratags list and then deletes the remaining
+tags. For the format of the parameters see the description in `set_prefilters()`
+above.
+
+!!! example
+    ``` lua
+    local flex = require('import-full')
+
+    flex.set_address_tags{
+        main = {'addr:housenumber'},
+        extra = {'addr:*', 'tiger:county'}
+    }
+    flex.set_unused_handling{delete_keys = {'tiger:*'}}
+    ```
+
+    In this example all remaining tags except those beginning with `tiger:`
+    are moved to the extratags list. Note that it is not possible to
+    already delete the tiger tags with `set_prefilters()` because that
+    would remove tiger:county before the address tags are processed.
+
+### Customizing osm2pgsql callbacks
+
+osm2pgsql expects the flex style to implement three callbacks, one process
+function per OSM type. If you want to implement special handling for
+certain OSM types, you can override the default implementations provided
+by the flex-base module.
+
+#### Changing the relation types to be handled
+
+The default scripts only allows relations of type `multipolygon`, `boundary`
+and `waterway`. To add other types relations, set `RELATION_TYPES` for
+the type to the kind of geometry that should be created. The following
+kinds of geometries can be used:
+
+* __relation_as_multipolygon__ creates a (Multi)Polygon from the ways in
+  the relation. If the ways do not form a valid area, then the object is
+  silently discarded.
+* __relation_as_multiline__ creates a (Mutli)LineString from the ways in
+  the relations. Ways are combined as much as possible without any regards
+  to their order in the relation.
+
+!!! Example
+    ``` lua
+    local flex = require('import-full')
+
+    flex.RELATION_TYPES['site'] = flex.relation_as_multipolygon
+    ```
+
+    With this line relations of `type=site` will be included in the index
+    according to main tags found. This only works when the site relation
+    resolves to a valid area. Nodes in the site relation are not part of the
+    geometry.
+
+
+#### Adding additional logic to processing functions
+
+The default processing functions are also exported by the flex-base module
+as `process_node`, `process_way` and `process_relation`. These can be used
+to implement your own processing functions with some additional processing
+logic.
+
+!!! Example
+    ``` lua
+    local flex = require('import-full')
+
+    function osm2pgsql.process_relation(object)
+        if object.tags.boundary ~= 'administrative' or object.tags.admin_level ~= '2' then
+          flex.process_relation(object)
+        end
+    end
+    ```
+
+    This example discards all country-level boundaries and uses standard
+    handling for everything else. This can be useful if you want to use
+    your own custom country boundaries.
+
+
+### Customizing the main processing function
+
+The main processing function of the flex style can be found in the function
+`process_tags`. This function is called for all OSM object kinds and is
+responsible for filtering the tags and writing out the rows into Postgresql.
+
+!!! Example
+    ``` lua
+    local flex = require('import-full')
+
+    local original_process_tags = flex.process_tags
+
+    function flex.process_tags(o)
+        if o.object.tags.highway ~= nil and o.object.tags.access == 'no' then
+            return
+        end
+
+        original_process_tags(o)
+    end
+    ```
+
+    This example shows the most simple customization of the process_tags function.
+    It simply adds some additional processing before running the original code.
+    To do that, first save the original function and then overwrite process_tags
+    from the module. In this example all highways which are not accessible
+    by anyone will be ignored.
+
+
+#### The `Place` class
+
+The `process_tags` function receives a Lua object of `Place` type which comes
+with some handy functions to collect the data necessary for geocoding and
+writing it into the place table. Always use this object to fill the table.
+
+The Place class has some attributes which you may access read-only:
+
+* __object__ is the original OSM object data handed in by osm2pgsql
+* __admin_level__ is the content of the admin_level tag, parsed into an
+  integer and normalized to a value between 0 and 15
+* __has_name__ is a boolean indicating if the object has a full name
+* __names__ is a table with the collected list of name tags
+* __address__ is a table with the collected list of address tags
+* __extratags__ is a table with the collected list of additional tags to save
+
+There are a number of functions to fill these fields. All functions expect
+a table parameter with fields as indicated in the description.
+Many of these functions expect match functions which are described in detail
+further below.
+
+* __delete{match=...}__ removes all tags that match the match function given
+  in _match_.
+* __grab_extratags{match=...}__ moves all tags that match the match function
+  given in _match_ into extratags. Returns the number of tags moved.
+* __clean{delete=..., extra=...}__ deletes all tags that match _delete_ and
+  moves the ones that match _extra_  into extratags
+* __grab_address_parts{groups=...}__ moves matching tags into the address table.
+  _groups_ must be a group match function. Tags of the group `main` and
+  `extra` are added to the address table as is but with `addr:` and `is_in:`
+  prefixes removed from the tag key. All other groups are added with the
+  group name as key and the value from the tag. Multiple values of the same
+  group overwrite each other. The function returns the number of tags saved
+  from the main group.
+* __grab_main_parts{groups=...}__ moves matching tags into the name table.
+  _groups_ must be a group match function. If a tags of the group `main` is
+  present, the object will be marked as having a name. Tags of group `house`
+  produce a fallback to `place=house`. This fallback is return by the function
+  if present.
+
+There are two functions to write a row into the place table. Both functions
+expect the main tag (key and value) for the row and then use the collected
+information from the name, address, extratags etc. fields to complete the row.
+They also have a boolean parameter `save_extra_mains` which defines how any
+unprocessed tags are handled: when True, the tags will be saved as extratags,
+when False, they will be simply discarded.
+
+* __write_row(key, value, save_extra_mains)__ creates a new table row from
+  the current state of the Place object.
+* __write_place(key, value, mtype, save_extra_mains)__ creates a new row
+  conditionally. When value is nil, the function will attempt to look up the
+  value in the object tags. If value is still nil or mtype is nil, the row
+  is ignored. An mtype of `always` will then always write out the row,
+  a mtype of `named` only, when the object has a full name. When mtype
+  is `named_with_key`, the function checks for a domain name, i.e. a name
+  tag prefixed with the name of the main key. Only if at least one is found,
+  the row will be written. The names are replaced with the domain names found.
+
+#### Match functions
+
+The Place functions usually expect either a _match function_ or a
+_group match function_ to find the tags to apply their function to.
+
+The __match function__ is a Lua function which takes two parameters,
+key and value, and returns a boolean to indicate that a tag matches. The
+flex-base module has a convenience function `tag_match()` to create such a
+function. It takes a table with two optional fields: `keys` takes a key match
+list (see above), `tags` takes a table with keys that point to a list of
+possible values, thus defining key/value matches.
+
+The __group match function__ is a Lua function which also takes two parameters,
+key and value, and returns a string indicating to which group or type they
+belong to. The `tag_group()` can be used to create such a function. It expects
+a table where the group names are the keys and the values are a key match list.
+
+
+
+### Using the gazetteer output of osm2pgsql
+
+Nominatim still allows you to configure the gazetteer output to remain
+backwards compatible with older imports. It will be automatically used
+when the style file name ends in `.style`. For documentation of the
+old import style, please refer to the documentation of older releases
+of Nominatim. Do not use the gazetteer output for new imports. There is no
+guarantee that new versions of Nominatim are fully compatible with the
+gazetteer output.
+
+### Changing the Style of Existing Databases
+
+There is normally no issue changing the style of a database that is already
+imported and now kept up-to-date with change files. Just be aware that any
+change in the style applies to updates only. If you want to change the data
+that is already in the database, then a reimport is necessary.
--- a/docs/customize/Importance.md
+++ b/docs/customize/Importance.md
@@ -0,0 +1,49 @@
+## Importance
+
+Search requests can yield multiple results which match equally well with
+the original query. In such case Nominatim needs to order the results
+according to a different criterion: importance. This is a measure for how
+likely it is that a user will search for a given place. This section explains
+the sources Nominatim uses for computing importance of a place and how to
+customize them.
+
+### How importance is computed
+
+The main value for importance is derived from page ranking values for Wikipedia
+pages for a place. For places that do not have their own
+Wikipedia page, a formula is used that derives a static importance from the
+places [search rank](../customize/Ranking.md#search-rank).
+
+In a second step, a secondary importance value is added which is meant to
+represent how well-known the general area is where the place is located. It
+functions as a tie-breaker between places with very similar primary
+importance values.
+
+nominatim.org has preprocessed importance tables for the
+[primary Wikipedia rankings](https://nominatim.org/data/wikimedia-importance.sql.gz)
+and for a secondary importance based on the number of tile views on openstreetmap.org.
+
+### Customizing secondary importance
+
+The secondary importance is implemented as a simple
+[Postgis raster](https://postgis.net/docs/raster.html) table, where Nominatim
+looks up the value for the coordinates of the centroid of a place. You can
+provide your own secondary importance raster in form of an SQL file named
+`secondary_importance.sql.gz` in your project directory.
+
+The SQL file needs to drop and (re)create a table `secondary_importance` which
+must as a minimum contain a column `rast` of type `raster`. The raster must
+be in EPSG:4326 and contain 16bit unsigned ints
+(`raster_constraint_pixel_types(rast) = '{16BUI}'). Any other columns in the
+table will be ignored. You must furthermore create an index as follows:
+
+```
+CREATE INDEX ON secondary_importance USING gist(ST_ConvexHull(gist))
+```
+
+The following raster2pgsql command will create a table that conforms to
+the requirements:
+
+```
+raster2pgsql -I -C -Y -d -t 128x128 input.tiff public.secondary_importance
+```
--- a/docs/customize/Overview.md
+++ b/docs/customize/Overview.md
@@ -0,0 +1,20 @@
+Nominatim comes with a predefined set of configuration options that should
+work for most standard installations. If you have special requirements, there
+are many places where the configuration can be adapted. This chapter describes
+the following configurable parts:
+
+* [Global Settings](Settings.md) has a detailed description of all parameters that
+  can be set in your local `.env` configuration
+* [Import styles](Import-Styles.md) explains how to write your own import style
+  in order to control what kind of OSM data will be imported
+* [Place ranking](Ranking.md) describes the configuration around classifing
+  places in terms of their importance and their role in an address
+* [Tokenizers](Tokenizers.md) describes the configuration of the module
+  responsible for analysing and indexing names
+* [Special Phrases](Special-Phrases.md) are common nouns or phrases that
+  can be used in search to identify a class of places
+
+There are also guides for adding the following external data:
+
+* [US house numbers from the TIGER dataset](Tiger.md)
+* [External postcodes](Postcodes.md)
--- a/docs/customize/Postcodes.md
+++ b/docs/customize/Postcodes.md
@@ -0,0 +1,37 @@
+# External postcode data
+
+Nominatim creates a table of known postcode centroids during import. This table
+is used for searches of postcodes and for adding postcodes to places where the
+OSM data does not provide one. These postcode centroids are mainly computed
+from the OSM data itself. In addition, Nominatim supports reading postcode
+information from an external CSV file, to supplement the postcodes that are
+missing in OSM.
+
+To enable external postcode support, simply put one CSV file per country into
+your project directory and name it `<CC>_postcodes.csv`. `<CC>` must be the
+two-letter country code for which to apply the file. The file may also be
+gzipped. Then it must be called `<CC>_postcodes.csv.gz`.
+
+The CSV file must use commas as a delimiter and have a header line. Nominatim
+expects three columns to be present: `postcode`, `lat` and `lon`. All other
+columns are ignored. `lon` and `lat` must describe the x and y coordinates of the
+postcode centroids in WGS84.
+
+The postcode files are loaded only when there is data for the given country
+in your database. For example, if there is a `us_postcodes.csv` file in your
+project directory but you import only an excerpt of Italy, then the US postcodes
+will simply be ignored.
+
+As a rule, the external postcode data should be put into the project directory
+**before** starting the initial import. Still, you can add, remove and update the
+external postcode data at any time. Simply
+run:
+
+```
+nominatim refresh --postcodes
+```
+
+to make the changes visible in your database. Be aware, however, that the changes
+only have an immediate effect on searches for postcodes. Postcodes that were
+added to places are only updated, when they are reindexed. That usually happens
+only during replication updates.
--- a/docs/customize/Ranking.md
+++ b/docs/customize/Ranking.md
@@ -1,8 +1,7 @@
 # Place Ranking in Nominatim

 Nominatim uses two metrics to rank a place: search rank and address rank.
-Both can be assigned a value between 0 and 30. They serve slightly
-different purposes, which are explained in this chapter.
+This chapter explains what place ranking means and how it can be customized.

 ## Search rank

--- a/docs/customize/Settings.md
+++ b/docs/customize/Settings.md
@@ -0,0 +1,751 @@
+This section provides a reference of all configuration parameters that can
+be used with Nominatim.
+
+# Configuring Nominatim
+
+Nominatim uses [dotenv](https://github.com/theskumar/python-dotenv) to manage
+its configuration settings. There are two means to set configuration
+variables: through an `.env` configuration file or through an environment
+variable.
+
+The `.env` configuration file needs to be placed into the
+[project directory](../admin/Import.md#creating-the-project-directory). It
+must contain configuration parameters in `<parameter>=<value>` format.
+Please refer to the dotenv documentation for details.
+
+The configuration options may also be set in the form of shell environment
+variables. This is particularly useful, when you want to temporarily change
+a configuration option. For example, to force the replication serve to
+download the next change, you can temporarily disable the update interval:
+
+    NOMINATIM_REPLICATION_UPDATE_INTERVAL=0 nominatim replication --once
+
+If a configuration option is defined through .env file and environment
+variable, then the latter takes precedence. 
+
+## Configuration Parameter Reference
+
+### Import and Database Settings
+
+#### NOMINATIM_DATABASE_DSN
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Database connection string |
+| **Format:**        | string: `pgsql:<param1>=<value1>;<param2>=<value2>;...` |
+| **Default:**       | pgsql:dbname=nominatim |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Sets the connection parameters for the Nominatim database. At a minimum
+the name of the database (`dbname`) is required. You can set any additional
+parameter that is understood by libpq. See the [Postgres documentation](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) for a full list.
+
+!!! note
+    It is usually recommended not to set the password directly in this
+    configuration parameter. Use a
+    [password file](https://www.postgresql.org/docs/current/libpq-pgpass.html)
+    instead.
+
+
+#### NOMINATIM_DATABASE_WEBUSER
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Database query user |
+| **Format:**        | string  |
+| **Default:**       | www-data |
+| **After Changes:** | cannot be changed after import |
+
+Defines the name of the database user that will run search queries. Usually
+this is the user under which the webserver is executed. When running Nominatim
+via php-fpm, you can also define a separate query user. The Postgres user
+needs to be set up before starting the import.
+
+Nominatim grants minimal rights to this user to all tables that are needed
+for running geocoding queries.
+
+
+#### NOMINATIM_DATABASE_MODULE_PATH
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Directory where to find the PostgreSQL server module |
+| **Format:**        | path |
+| **Default:**       | _empty_ (use `<project_directory>/module`) |
+| **After Changes:** | run `nominatim refresh --functions` |
+| **Comment:**       | Legacy tokenizer only |
+
+Defines the directory in which the PostgreSQL server module `nominatim.so`
+is stored. The directory and module must be accessible by the PostgreSQL
+server.
+
+For information on how to use this setting when working with external databases,
+see [Advanced Installations](../admin/Advanced-Installations.md).
+
+The option is only used by the Legacy tokenizer and ignored otherwise.
+
+
+#### NOMINATIM_TOKENIZER
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Tokenizer used for normalizing and parsing queries and names |
+| **Format:**        | string |
+| **Default:**       | icu |
+| **After Changes:** | cannot be changed after import |
+
+Sets the tokenizer type to use for the import. For more information on
+available tokenizers and how they are configured, see
+[Tokenizers](../customize/Tokenizers.md).
+
+
+#### NOMINATIM_TOKENIZER_CONFIG
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Configuration file for the tokenizer |
+| **Format:**        | path |
+| **Default:**       | _empty_ (default file depends on tokenizer) |
+| **After Changes:** | see documentation for each tokenizer |
+
+Points to the file with additional configuration for the tokenizer.
+See the [Tokenizer](../customize/Tokenizers.md) descriptions for details
+on the file format.
+
+If a relative path is given, then the file is searched first relative to the
+project directory and then in the global settings directory.
+
+#### NOMINATIM_MAX_WORD_FREQUENCY
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Number of occurrences before a word is considered frequent |
+| **Format:**        | int |
+| **Default:**       | 50000 |
+| **After Changes:** | cannot be changed after import |
+| **Comment:**       | Legacy tokenizer only |
+
+The word frequency count is used by the Legacy tokenizer to automatically
+identify _stop words_. Any partial term that occurs more often then what
+is defined in this setting, is effectively ignored during search.
+
+
+#### NOMINATIM_LIMIT_REINDEXING
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Avoid invalidating large areas |
+| **Format:**        | bool |
+| **Default:**       | yes |
+
+Nominatim computes the address of each place at indexing time. This has the
+advantage to make search faster but also means that more objects needs to
+be invalidated when the data changes. For example, changing the name of
+the state of Florida would require recomputing every single address point
+in the state to make the new name searchable in conjunction with addresses.
+
+Setting this option to 'yes' means that Nominatim skips reindexing of contained
+objects when the area becomes too large.
+
+
+#### NOMINATIM_LANGUAGES
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Restrict search languages |
+| **Format:**        | string: comma-separated list of language codes |
+| **Default:**       | _empty_ |
+
+Normally Nominatim will include all language variants of name:XX
+in the search index. Set this to a comma separated list of language
+codes, to restrict import to a subset of languages.
+
+Currently only affects the initial import of country names and special phrases.
+
+
+#### NOMINATIM_TERM_NORMALIZATION
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Rules for normalizing terms for comparisons |
+| **Format:**        | string: semicolon-separated list of ICU rules |
+| **Default:**       | :: NFD (); [[:Nonspacing Mark:] [:Cf:]] >;  :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC (); |
+| **Comment:**       | Legacy tokenizer only |
+
+[Special phrases](Special-Phrases.md) have stricter matching requirements than
+normal search terms. They must appear exactly in the query after this term
+normalization has been applied.
+
+Only has an effect on the Legacy tokenizer. For the ICU tokenizer the rules
+defined in the
+[normalization section](Tokenizers.md#normalization-and-transliteration)
+will be used.
+
+
+#### NOMINATIM_USE_US_TIGER_DATA
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable searching for Tiger house number data |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --functions` |
+
+When this setting is enabled, search and reverse queries also take data
+from [Tiger house number data](Tiger.md) into account.
+
+
+#### NOMINATIM_USE_AUX_LOCATION_DATA
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable searching in external house number tables |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --functions` |
+| **Comment:**       | Do not use. |
+
+When this setting is enabled, search queries also take data from external
+house number tables into account.
+
+*Warning:* This feature is currently unmaintained and should not be used.
+
+
+#### NOMINATIM_HTTP_PROXY
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Use HTTP proxy when downloading data |
+| **Format:**        | boolean |
+| **Default:**       | no |
+
+When this setting is enabled and at least
+[NOMINATIM_HTTP_PROXY_HOST](#nominatim_http_proxy_host) and
+[NOMINATIM_HTTP_PROXY_PORT](#nominatim_http_proxy_port) are set, the
+configured proxy will be used, when downloading external data like
+replication diffs.
+
+
+#### NOMINATIM_HTTP_PROXY_HOST
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Host name of the proxy to use |
+| **Format:**        | string |
+| **Default:**       | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, this setting
+configures the proxy host name.
+
+
+#### NOMINATIM_HTTP_PROXY_PORT
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Port number of the proxy to use |
+| **Format:**        | integer |
+| **Default:**       | 3128 |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, this setting
+configures the port number to use with the proxy.
+
+
+#### NOMINATIM_HTTP_PROXY_LOGIN
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Username for proxies that require login |
+| **Format:**        | string |
+| **Default:**       | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, use this
+setting to define the username for proxies that require a login.
+
+
+#### NOMINATIM_HTTP_PROXY_PASSWORD
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Password for proxies that require login |
+| **Format:**        | string |
+| **Default:**       | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, use this
+setting to define the password for proxies that require a login.
+
+
+#### NOMINATIM_OSM2PGSQL_BINARY
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Location of the osm2pgsql binary |
+| **Format:**        | path |
+| **Default:**       | _empty_ (use binary shipped with Nominatim) |
+| **Comment:**       | EXPERT ONLY |
+
+Nominatim uses [osm2pgsql](https://osm2pgsql.org) to load the OSM data
+initially into the database. Nominatim comes bundled with a version of
+osm2pgsql that is guaranteed to be compatible. Use this setting to use
+a different binary instead. You should do this only when you know exactly
+what you are doing. If the osm2pgsql version is not compatible, then the
+result is undefined.
+
+
+#### NOMINATIM_WIKIPEDIA_DATA_PATH
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Directory with the wikipedia importance data |
+| **Format:**        | path |
+| **Default:**       | _empty_ (project directory) |
+
+Set a custom location for the
+[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
+unset, Nominatim expects the data to be saved in the project directory.
+
+#### NOMINATIM_ADDRESS_LEVEL_CONFIG
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Configuration file for rank assignments |
+| **Format:**        | path |
+| **Default:**       | address-levels.json |
+
+The _address level configuration_ defines the rank assignments for places. See
+[Place Ranking](Ranking.md) for a detailed explanation what rank assignments
+are and what the configuration file must look like.
+
+When a relative path is given, then the file is searched first relative to the
+project directory and then in the global settings directory.
+
+
+#### NOMINATIM_IMPORT_STYLE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Configuration to use for the initial OSM data import |
+| **Format:**        | string or path |
+| **Default:**       | extratags |
+
+The _style configuration_ describes which OSM objects and tags are taken
+into consideration for the search database. Nominatim comes with a set
+of pre-configured styles, that may be configured here.
+
+You can also write your own custom style and point the setting to the file
+with the style. When a relative path is given, then the style file is searched
+first relative to the project directory and then in the global settings
+directory.
+
+See [Import Styles](Import-Styles.md)
+for more information on the available internal styles and the format of the
+configuration file.
+
+#### NOMINATIM_FLATNODE_FILE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Location of osm2pgsql flatnode file |
+| **Format:**        | path |
+| **Default:**       | _empty_ (do not use a flatnote file) |
+| **After Changes:** | Only change when moving the file physically. |
+
+The `osm2pgsql flatnode file` is file that efficiently stores geographic
+location for OSM nodes. For larger imports it can significantly speed up
+the import. When this option is unset, then osm2pgsql uses a PsotgreSQL table
+to store the locations.
+
+When a relative path is given, then the flatnode file is created/searched
+relative to the project directory.
+
+!!! warning
+
+    The flatnode file is not only used during the initial import but also
+    when adding new data with `nominatim add-data` or `nominatim replication`.
+    Make sure you keep the flatnode file around and this setting unmodified,
+    if you plan to add more data or run regular updates.
+
+
+#### NOMINATIM_TABLESPACE_*
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Group of settings for distributing the database over tablespaces |
+| **Format:**        | string |
+| **Default:**       | _empty_ (do not use a table space) |
+| **After Changes:** | no effect after initial import |
+
+Nominatim allows to distribute the search database over up to 10 different
+[PostgreSQL tablespaces](https://www.postgresql.org/docs/current/manage-ag-tablespaces.html).
+If you use this option, make sure that the tablespaces exist before starting
+the import.
+
+The available tablespace groups are:
+
+NOMINATIM_TABLESPACE_SEARCH_DATA
+:    Data used by the geocoding frontend.
+
+NOMINATIM_TABLESPACE_SEARCH_INDEX
+:    Indexes used by the geocoding frontend.
+
+NOMINATIM_TABLESPACE_OSM_DATA
+:    Raw OSM data cache used for import and updates.
+
+NOMINATIM_TABLESPACE_OSM_DATA
+:    Indexes on the raw OSM data cache.
+
+NOMINATIM_TABLESPACE_PLACE_DATA
+:    Data table with the pre-filtered but still unprocessed OSM data.
+     Used only during imports and updates.
+
+NOMINATIM_TABLESPACE_PLACE_INDEX
+:    Indexes on raw data table. Used only during imports and updates.
+
+NOMINATIM_TABLESPACE_ADDRESS_DATA
+:    Data tables used for computing search terms and addresses of places
+     during import and updates.
+
+NOMINATIM_TABLESPACE_ADDRESS_INDEX
+:    Indexes on the data tables for search term and address computation.
+     Used only for import and updates.
+
+NOMINATIM_TABLESPACE_AUX_DATA
+:    Auxiliary data tables for non-OSM data, e.g. for Tiger house number data.
+
+NOMINATIM_TABLESPACE_AUX_INDEX
+:    Indexes on auxiliary data tables.
+
+
+### Replication Update Settings
+
+#### NOMINATIM_REPLICATION_URL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Base URL of the replication service |
+| **Format:**        | url |
+| **Default:**       | https://planet.openstreetmap.org/replication/minute |
+| **After Changes:** | run `nominatim replication --init` |
+
+Replication services deliver updates to OSM data. Use this setting to choose
+which replication service to use. See [Updates](../admin/Update.md) for more
+information on how to set up regular updates.
+
+#### NOMINATIM_REPLICATION_MAX_DIFF
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Maximum amount of data to download per update cycle (in MB) |
+| **Format:**        | integer |
+| **Default:**       | 50 |
+| **After Changes:** | restart the replication process |
+
+At each update cycle Nominatim downloads diffs until either no more diffs
+are available on the server (i.e. the database is up-to-date) or the limit
+given in this setting is exceeded. Nominatim guarantees to downloads at least
+one diff, if one is available, no matter how small the setting.
+
+The default for this setting is fairly conservative because Nominatim keeps
+all data downloaded in one cycle in RAM. Using large values in a production
+server may interfere badly with the search frontend because it evicts data
+from RAM that is needed for speedy answers to incoming requests. It is usually
+a better idea to keep this setting lower and run multiple update cycles
+to catch up with updates.
+
+When catching up in non-production mode, for example after the initial import,
+the setting can easily be changed temporarily on the command line:
+
+    NOMINATIM_REPLICATION_MAX_DIFF=3000 nominatim replication
+
+
+#### NOMINATIM_REPLICATION_UPDATE_INTERVAL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Publication interval of the replication service (in seconds) |
+| **Format:**        | integer |
+| **Default:**       | 75 |
+| **After Changes:** | restart the replication process |
+
+This setting determines when Nominatim will attempt to download again a new
+update. The time is computed from the publication date of the last diff
+downloaded. Setting this to a slightly higher value than the actual
+publication interval avoids unnecessary rechecks.
+
+
+#### NOMINATIM_REPLICATION_RECHECK_INTERVAL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Wait time to recheck for a pending update (in seconds)  |
+| **Format:**        | integer |
+| **Default:**       | 60 |
+| **After Changes:** | restart the replication process |
+
+When replication updates are run in continuous mode (using `nominatim replication`),
+this setting determines how long Nominatim waits until it looks for updates
+again when updates were not available on the server.
+
+Note that this is different from
+[NOMINATIM_REPLICATION_UPDATE_INTERVAL](#nominatim_replication_update_interval).
+Nominatim will never attempt to query for new updates for UPDATE_INTERVAL
+seconds after the current database date. Only after the update interval has
+passed it asks for new data. If then no new data is found, it waits for
+RECHECK_INTERVAL seconds before it attempts again.
+
+### API Settings
+
+#### NOMINATIM_CORS_NOACCESSCONTROL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Send permissive CORS access headers |
+| **Format:**        | boolean |
+| **Default:**       | yes |
+| **After Changes:** | run `nominatim refresh --website` |
+
+When this setting is enabled, API HTTP responses include the HTTP
+[CORS](https://en.wikipedia.org/wiki/CORS) headers
+`access-control-allow-origin: *` and `access-control-allow-methods: OPTIONS,GET`.
+
+#### NOMINATIM_MAPICON_URL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | URL prefix for static icon images |
+| **Format:**        | url |
+| **Default:**       | _empty_ |
+| **After Changes:** | run `nominatim refresh --website` |
+
+When a mapicon URL is configured, then Nominatim includes an additional `icon`
+field in the responses, pointing to an appropriate icon for the place type.
+
+Map icons used to be included in Nominatim itself but now have moved to the
+[nominatim-ui](https://github.com/osm-search/nominatim-ui/) project. If you
+want the URL to be included in API responses, make the `/mapicon`
+directory of the project available under a public URL and point this setting
+to the directory.
+
+
+#### NOMINATIM_DEFAULT_LANGUAGE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Language of responses when no language is requested |
+| **Format:**        | language code |
+| **Default:**       | _empty_ (use the local language of the feature) |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Nominatim localizes the place names in responses when the corresponding
+translation is available. Users can request a custom language setting through
+the HTTP accept-languages header or through the explicit parameter
+[accept-languages](../api/Search.md#language-of-results). If neither is
+given, it falls back to this setting. If the setting is also empty, then
+the local languages (in OSM: the name tag without any language suffix) is
+used.
+
+
+#### NOMINATIM_SEARCH_BATCH_MODE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable a special batch query mode |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --website` |
+| **Comment:**       | PHP frontend only |
+
+
+This feature is currently undocumented and potentially broken.
+
+
+#### NOMINATIM_SEARCH_NAME_ONLY_THRESHOLD
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Threshold for switching the search index lookup strategy |
+| **Format:**        | integer |
+| **Default:**       | 500 |
+| **After Changes:** | run `nominatim refresh --website` |
+| **Comment:**       | PHP frontend only |
+
+This setting defines the threshold over which a name is no longer considered
+as rare. When searching for places with rare names, only the name is used
+for place lookups. Otherwise the name and any address information is used.
+
+This setting only has an effect after `nominatim refresh --word-counts` has
+been called to compute the word frequencies.
+
+
+#### NOMINATIM_LOOKUP_MAX_COUNT
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Maximum number of OSM ids accepted by /lookup |
+| **Format:**        | integer |
+| **Default:**       | 50 |
+| **After Changes:** | run `nominatim refresh --website` |
+
+The /lookup point accepts list of ids to look up address details for. This
+setting restricts the number of places a user may look up with a single
+request.
+
+
+#### NOMINATIM_POLYGON_OUTPUT_MAX_TYPES
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Number of different geometry formats that may be returned |
+| **Format:**        | integer |
+| **Default:**       | 1 |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Nominatim supports returning full geometries of places. The geometries may
+be requested in different formats with one of the
+[`polygon_*` parameters](../api/Search.md#polygon-output). Use this
+setting to restrict the number of geometry types that may be requested
+with a single query.
+
+Setting this parameter to 0 disables polygon output completely.
+
+
+#### NOMINATIM_SEARCH_WITHIN_COUNTRIES
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Disable search for elements that are not in the country grid |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --website` |
+| **Comment:**       | PHP frontend only |
+
+Enable to search elements just within countries.
+
+When enabled, if, despite not finding a point within the static grid of countries, it
+finds a geometry of a region, do not return the geometry.
+Return "Unable to geocode" instead.
+
+
+#### NOMINATIM_SERVE_LEGACY_URLS
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable serving via URLs with a .php suffix |
+| **Format:**        | boolean |
+| **Default:**       | yes |
+| **Comment:**       | Python frontend only |
+
+When enabled, then endpoints are reachable as `/<name>` as well as `/<name>.php`.
+This can be useful when you want to be backwards-compatible with previous
+versions of Nominatim.
+
+
+#### NOMINATIM_API_POOL_SIZE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Number of parallel database connections per worker |
+| **Format:**        | number |
+| **Default:**       | 10 |
+| **Comment:**       | Python frontend only |
+
+Sets the maximum number of database connections available for a single instance
+of Nominatim. When configuring the maximum number of connections that your
+PostgreSQL database can handle, you need at least
+`NOMINATIM_API_POOL_SIZE` * `<number of configured workers>` connections.
+For configuring the number of workers, refer to the section about
+[Deploying the Python frontend](../admin/Deployment-Python.md).
+
+#### NOMINATIM_QUERY_TIMEOUT
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Timeout for SQL queries to the database |
+| **Format:**        | number (seconds) |
+| **Default:**       | 10 |
+| **Comment:**       | Python frontend only |
+
+When this timeout is set, then all SQL queries that run longer than the
+specified numbers of seconds will be cancelled and the user receives a
+timeout exceptions. Users of the API see a 503 HTTP error.
+
+The timeout does ont apply when using the
+[low-level DB access](../library/Low-Level-DB-Access.md)
+of the library. A timeout can be manually set, if required.
+
+
+#### NOMINATIM_REQUEST_TIMEOUT
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Timeout for search queries |
+| **Format:**        | number (seconds) |
+| **Default:**       | 60 |
+| **Comment:**       | Python frontend only |
+
+When this timeout is set, a search query will finish sending queries
+to the database after the timeout has passed and immediately return the
+results gathered so far.
+
+Note that under high load you may observe that users receive different results
+than usual without seeing an error. This may cause some confusion.
+
+### Logging Settings
+
+#### NOMINATIM_LOG_DB
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Log requests into the database |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Enable logging requests into a database table with this setting. The logs
+can be found in the table `new_query_log`.
+
+When using this logging method, it is advisable to set up a job that
+regularly clears out old logging information. Nominatim will not do that
+on its own.
+
+Can be used as the same time as NOMINATIM_LOG_FILE.
+
+#### NOMINATIM_LOG_FILE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Log requests into a file |
+| **Format:**        | path |
+| **Default:**       | _empty_ (logging disabled) |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Enable logging of requests into a file with this setting by setting the log
+file where to log to. A relative file name is assumed to be relative to
+the project directory.
+
+
+The entries in the log file have the following format:
+
+    <request time> <execution time in s> <number of results> <type> "<query string>"
+
+Request time is the time when the request was started. The execution time is
+given in seconds and corresponds to the time the query took executing in PHP.
+type contains the name of the endpoint used.
+
+Can be used as the same time as NOMINATIM_LOG_DB.
+
+#### NOMINATIM_DEBUG_SQL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable printing of raw SQL by SQLAlchemy |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **Comment:**       | **For developers only.** |
+
+This settings enables
+[SQL debugging](https://docs.sqlalchemy.org/en/20/core/engines.html#dbengine-logging)
+by SQLAlchemy. This can be helpful when debugging some bugs with internal
+query handling. It should only be used together with the CLI query functions.
+Enabling it for server mode may have unintended consequences. Use the `debug`
+parameter instead, which prints information on how the search is executed
+including SQL statements.
--- a/docs/customize/Special-Phrases.md
+++ b/docs/customize/Special-Phrases.md
@@ -0,0 +1,34 @@
+# Special phrases
+
+## Importing OSM user-maintained special phrases
+
+As described in the [Import section](../admin/Import.md), it is possible to
+import special phrases from the wiki with the following command:
+
+```sh
+nominatim special-phrases --import-from-wiki
+```
+
+## Importing custom special phrases
+
+But, it is also possible to import some phrases from a csv file. 
+To do so, you have access to the following command:
+
+```sh
+nominatim special-phrases --import-from-csv <csv file>
+```
+
+Note that the two previous import commands will update the phrases from your database.
+This means that if you import some phrases from a csv file, only the phrases
+present in the csv file will be kept into the database. All other phrases will
+be removed.
+
+If you want to only add new phrases and not update the other ones you can add
+the argument `--no-replace` to the import command. For example:
+
+```sh
+nominatim special-phrases --import-from-csv <csv file> --no-replace
+```
+
+This will add the phrases present in the csv file into the database without
+removing the other ones.
--- a/docs/customize/Tiger.md
+++ b/docs/customize/Tiger.md
@@ -0,0 +1,28 @@
+# Installing TIGER housenumber data for the US
+
+Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
+address set to complement the OSM house number data in the US. You can add
+TIGER data to your own Nominatim instance by following these steps. The
+entire US adds about 10GB to your database.
+
+  1. Get preprocessed TIGER data:
+
+        cd $PROJECT_DIR
+        wget https://nominatim.org/data/tiger-nominatim-preprocessed-latest.csv.tar.gz
+
+  2. Import the data into your Nominatim database:
+
+        nominatim add-data --tiger-data tiger-nominatim-preprocessed-latest.csv.tar.gz
+
+  3. Enable use of the Tiger data in your existing `.env` file by adding:
+
+        echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
+
+  4. Apply the new settings:
+
+        nominatim refresh --functions --website
+
+
+See the [TIGER-data project](https://github.com/osm-search/TIGER-data) for more
+information on how the data got preprocessed.
+
--- a/docs/customize/Tokenizers.md
+++ b/docs/customize/Tokenizers.md
@@ -0,0 +1,405 @@
+# Tokenizers
+
+The tokenizer module in Nominatim is responsible for analysing the names given
+to OSM objects and the terms of an incoming query in order to make sure, they
+can be matched appropriately.
+
+Nominatim offers different tokenizer modules, which behave differently and have
+different configuration options. This sections describes the tokenizers and how
+they can be configured.
+
+!!! important
+    The use of a tokenizer is tied to a database installation. You need to choose
+    and configure the tokenizer before starting the initial import. Once the import
+    is done, you cannot switch to another tokenizer anymore. Reconfiguring the
+    chosen tokenizer is very limited as well. See the comments in each tokenizer
+    section.
+
+## Legacy tokenizer
+
+The legacy tokenizer implements the analysis algorithms of older Nominatim
+versions. It uses a special Postgresql module to normalize names and queries.
+This tokenizer is automatically installed and used when upgrading an older
+database. It should not be used for new installations anymore.
+
+### Compiling the PostgreSQL module
+
+The tokeinzer needs a special C module for PostgreSQL which is not compiled
+by default. If you need the legacy tokenizer, compile Nominatim as follows:
+
+```
+mkdir build
+cd build
+cmake -DBUILD_MODULE=on
+make
+```
+
+### Enabling the tokenizer
+
+To enable the tokenizer add the following line to your project configuration:
+
+```
+NOMINATIM_TOKENIZER=legacy
+```
+
+The Postgresql module for the tokenizer is available in the `module` directory
+and also installed with the remainder of the software under
+`lib/nominatim/module/nominatim.so`. You can specify a custom location for
+the module with
+
+```
+NOMINATIM_DATABASE_MODULE_PATH=<path to directory where nominatim.so resides>
+```
+
+This is in particular useful when the database runs on a different server.
+See [Advanced installations](../admin/Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
+
+There are no other configuration options for the legacy tokenizer. All
+normalization functions are hard-coded.
+
+## ICU tokenizer
+
+The ICU tokenizer uses the [ICU library](http://site.icu-project.org/) to
+normalize names and queries. It also offers configurable decomposition and
+abbreviation handling.
+This tokenizer is currently the default.
+
+To enable the tokenizer add the following line to your project configuration:
+
+```
+NOMINATIM_TOKENIZER=icu
+```
+
+### How it works
+
+On import the tokenizer processes names in the following three stages:
+
+1. During the **Sanitizer step** incoming names are cleaned up and converted to
+   **full names**. This step can be used to regularize spelling, split multi-name
+   tags into their parts and tag names with additional attributes. See the
+   [Sanitizers section](#sanitizers) below for available cleaning routines.
+2. The **Normalization** part removes all information from the full names
+   that are not relevant for search.
+3. The **Token analysis** step takes the normalized full names and creates
+   all transliterated variants under which the name should be searchable.
+   See the [Token analysis](#token-analysis) section below for more
+   information.
+
+During query time, only normalization and transliteration are relevant.
+An incoming query is first split into name chunks (this usually means splitting
+the string at the commas) and the each part is normalised and transliterated.
+The result is used to look up places in the search index.
+
+### Configuration
+
+The ICU tokenizer is configured using a YAML file which can be configured using
+`NOMINATIM_TOKENIZER_CONFIG`. The configuration is read on import and then
+saved as part of the internal database status. Later changes to the variable
+have no effect.
+
+Here is an example configuration file:
+
+``` yaml
+normalization:
+    - ":: lower ()"
+    - "ß > 'ss'" # German szet is unambiguously equal to double ss
+transliteration:
+    - !include /etc/nominatim/icu-rules/extended-unicode-to-asccii.yaml
+    - ":: Ascii ()"
+sanitizers:
+    - step: split-name-list
+token-analysis:
+    - analyzer: generic
+      variants:
+          - !include icu-rules/variants-ca.yaml
+          - words:
+              - road -> rd
+              - bridge -> bdge,br,brdg,bri,brg
+      mutations:
+          - pattern: 'ä'
+            replacements: ['ä', 'ae']
+```
+
+The configuration file contains four sections:
+`normalization`, `transliteration`, `sanitizers` and `token-analysis`.
+
+#### Normalization and Transliteration
+
+The normalization and transliteration sections each define a set of
+ICU rules that are applied to the names.
+
+The **normalization** rules are applied after sanitation. They should remove
+any information that is not relevant for search at all. Usual rules to be
+applied here are: lower-casing, removing of special characters, cleanup of
+spaces.
+
+The **transliteration** rules are applied at the end of the tokenization
+process to transfer the name into an ASCII representation. Transliteration can
+be useful to allow for further fuzzy matching, especially between different
+scripts.
+
+Each section must contain a list of
+[ICU transformation rules](https://unicode-org.github.io/icu/userguide/transforms/general/rules.html).
+The rules are applied in the order in which they appear in the file.
+You can also include additional rules from external yaml file using the
+`!include` tag. The included file must contain a valid YAML list of ICU rules
+and may again include other files.
+
+!!! warning
+    The ICU rule syntax contains special characters that conflict with the
+    YAML syntax. You should therefore always enclose the ICU rules in
+    double-quotes.
+
+#### Sanitizers
+
+The sanitizers section defines an ordered list of functions that are applied
+to the name and address tags before they are further processed by the tokenizer.
+They allows to clean up the tagging and bring it to a standardized form more
+suitable for building the search index.
+
+!!! hint
+    Sanitizers only have an effect on how the search index is built. They
+    do not change the information about each place that is saved in the
+    database. In particular, they have no influence on how the results are
+    displayed. The returned results always show the original information as
+    stored in the OpenStreetMap database.
+
+Each entry contains information of a sanitizer to be applied. It has a
+mandatory parameter `step` which gives the name of the sanitizer. Depending
+on the type, it may have additional parameters to configure its operation.
+
+The order of the list matters. The sanitizers are applied exactly in the order
+that is configured. Each sanitizer works on the results of the previous one.
+
+The following is a list of sanitizers that are shipped with Nominatim.
+
+##### split-name-list
+
+::: nominatim.tokenizer.sanitizers.split_name_list
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+
+##### strip-brace-terms
+
+::: nominatim.tokenizer.sanitizers.strip_brace_terms
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+
+##### tag-analyzer-by-language
+
+::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+
+##### clean-housenumbers
+
+::: nominatim.tokenizer.sanitizers.clean_housenumbers
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+
+##### clean-postcodes
+
+::: nominatim.tokenizer.sanitizers.clean_postcodes
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+
+##### clean-tiger-tags
+
+::: nominatim.tokenizer.sanitizers.clean_tiger_tags
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+
+#### delete-tags
+
+::: nominatim.tokenizer.sanitizers.delete_tags
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+
+#### tag-japanese
+
+::: nominatim.tokenizer.sanitizers.tag_japanese
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
+
+#### Token Analysis
+
+Token analyzers take a full name and transform it into one or more normalized
+form that are then saved in the search index. In its simplest form, the
+analyzer only applies the transliteration rules. More complex analyzers
+create additional spelling variants of a name. This is useful to handle
+decomposition and abbreviation.
+
+The ICU tokenizer may use different analyzers for different names. To select
+the analyzer to be used, the name must be tagged with the `analyzer` attribute
+by a sanitizer (see for example the
+[tag-analyzer-by-language sanitizer](#tag-analyzer-by-language)).
+
+The token-analysis section contains the list of configured analyzers. Each
+analyzer must have an `id` parameter that uniquely identifies the analyzer.
+The only exception is the default analyzer that is used when no special
+analyzer was selected. There are analysers with special ids:
+
+ * '@housenumber'. If an analyzer with that name is present, it is used
+   for normalization of house numbers.
+ * '@potcode'. If an analyzer with that name is present, it is used
+   for normalization of postcodes.
+
+Different analyzer implementations may exist. To select the implementation,
+the `analyzer` parameter must be set. The different implementations are
+described in the following.
+
+##### Generic token analyzer
+
+The generic analyzer `generic` is able to create variants from a list of given
+abbreviation and decomposition replacements and introduce spelling variations.
+
+###### Variants
+
+The optional 'variants' section defines lists of replacements which create alternative
+spellings of a name. To create the variants, a name is scanned from left to
+right and the longest matching replacement is applied until the end of the
+string is reached.
+
+The variants section must contain a list of replacement groups. Each group
+defines a set of properties that describes where the replacements are
+applicable. In addition, the word section defines the list of replacements
+to be made. The basic replacement description is of the form:
+
+```
+<source>[,<source>[...]] => <target>[,<target>[...]]
+```
+
+The left side contains one or more `source` terms to be replaced. The right side
+lists one or more replacements. Each source is replaced with each replacement
+term.
+
+!!! tip
+    The source and target terms are internally normalized using the
+    normalization rules given in the configuration. This ensures that the
+    strings match as expected. In fact, it is better to use unnormalized
+    words in the configuration because then it is possible to change the
+    rules for normalization later without having to adapt the variant rules.
+
+###### Decomposition
+
+In its standard form, only full words match against the source. There
+is a special notation to match the prefix and suffix of a word:
+
+``` yaml
+- ~strasse => str  # matches "strasse" as full word and in suffix position
+- hinter~ => hntr  # matches "hinter" as full word and in prefix position
+```
+
+There is no facility to match a string in the middle of the word. The suffix
+and prefix notation automatically trigger the decomposition mode: two variants
+are created for each replacement, one with the replacement attached to the word
+and one separate. So in above example, the tokenization of "hauptstrasse" will
+create the variants "hauptstr" and "haupt str". Similarly, the name "rote strasse"
+triggers the variants "rote str" and "rotestr". By having decomposition work
+both ways, it is sufficient to create the variants at index time. The variant
+rules are not applied at query time.
+
+To avoid automatic decomposition, use the '|' notation:
+
+``` yaml
+- ~strasse |=> str
+```
+
+simply changes "hauptstrasse" to "hauptstr" and "rote strasse" to "rote str".
+
+###### Initial and final terms
+
+It is also possible to restrict replacements to the beginning and end of a
+name:
+
+``` yaml
+- ^south => s  # matches only at the beginning of the name
+- road$ => rd  # matches only at the end of the name
+```
+
+So the first example would trigger a replacement for "south 45th street" but
+not for "the south beach restaurant".
+
+###### Replacements vs. variants
+
+The replacement syntax `source => target` works as a pure replacement. It changes
+the name instead of creating a variant. To create an additional version, you'd
+have to write `source => source,target`. As this is a frequent case, there is
+a shortcut notation for it:
+
+```
+<source>[,<source>[...]] -> <target>[,<target>[...]]
+```
+
+The simple arrow causes an additional variant to be added. Note that
+decomposition has an effect here on the source as well. So a rule
+
+``` yaml
+- "~strasse -> str"
+```
+
+means that for a word like `hauptstrasse` four variants are created:
+`hauptstrasse`, `haupt strasse`, `hauptstr` and `haupt str`.
+
+###### Mutations
+
+The 'mutation' section in the configuration describes an additional set of
+replacements to be applied after the variants have been computed.
+
+Each mutation is described by two parameters: `pattern` and `replacements`.
+The pattern must contain a single regular expression to search for in the
+variant name. The regular expressions need to follow the syntax for
+[Python regular expressions](file:///usr/share/doc/python3-doc/html/library/re.html#regular-expression-syntax).
+Capturing groups are not permitted.
+`replacements` must contain a list of strings that the pattern
+should be replaced with. Each occurrence of the pattern is replaced with
+all given replacements. Be mindful of combinatorial explosion of variants.
+
+###### Modes
+
+The generic analyser supports a special mode `variant-only`. When configured
+then it consumes the input token and emits only variants (if any exist). Enable
+the mode by adding:
+
+```
+  mode: variant-only
+```
+
+to the analyser configuration.
+
+##### Housenumber token analyzer
+
+The analyzer `housenumbers` is purpose-made to analyze house numbers. It
+creates variants with optional spaces between numbers and letters. Thus,
+house numbers of the form '3 a', '3A', '3-A' etc. are all considered equivalent.
+
+The analyzer cannot be customized.
+
+##### Postcode token analyzer
+
+The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
+a 'lookup' varaint of the token, which produces variants with optional
+spaces. Use together with the clean-postcodes sanitizer.
+
+The analyzer cannot be customized.
+
+### Reconfiguration
+
+Changing the configuration after the import is currently not possible, although
+this feature may be added at a later time.
--- a/docs/develop/Database-Layout.md
+++ b/docs/develop/Database-Layout.md
@@ -0,0 +1,167 @@
+# Database Layout
+
+### Import tables
+
+OSM data is initially imported using [osm2pgsql](https://osm2pgsql.org).
+Nominatim uses its own data output style 'gazetteer', which differs from the
+output style created for map rendering.
+
+The import process creates the following tables:
+
+![osm2pgsql tables](osm2pgsql-tables.svg)
+
+The `planet_osm_*` tables are the usual backing tables for OSM data. Note
+that Nominatim uses them to look up special relations and to find nodes on
+ways.
+
+The gazetteer style produces a single table `place` as output with the following
+columns:
+
+ * `osm_type` - kind of OSM object (**N** - node, **W** - way, **R** - relation)
+ * `osm_id` - original OSM ID
+ * `class` - key of principal tag defining the object type
+ * `type` - value of principal tag defining the object type
+ * `name` - collection of tags that contain a name or reference
+ * `admin_level` - numerical value of the tagged administrative level
+ * `address` - collection of tags defining the address of an object
+ * `extratags` - collection of additional interesting tags that are not
+                 directly relevant for searching
+ * `geometry` - geometry of the object (in WGS84)
+
+A single OSM object may appear multiple times in this table when it is tagged
+with multiple tags that may constitute a principal tag. Take for example a
+motorway bridge. In OSM, this would be a way which is tagged with
+`highway=motorway` and `bridge=yes`. This way would appear in the `place` table
+once with `class` of `highway` and once with a `class` of `bridge`. Thus the
+*unique key* for `place` is (`osm_type`, `osm_id`, `class`).
+
+How raw OSM tags are mapped to the columns in the place table is to a certain
+degree configurable. See [Customizing Import Styles](../customize/Import-Styles.md)
+for more information.
+
+### Search tables
+
+The following tables carry all information needed to do the search:
+
+![search tables](search-tables.svg)
+
+The **placex** table is the central table that saves all information about the
+searchable places in Nominatim. The basic columns are the same as for the
+place table and have the same meaning. The placex tables adds the following
+additional columns:
+
+ * `place_id` - the internal unique ID to identify the place
+ * `partition` - the id to use with partitioned tables (see below)
+ * `geometry_sector` - a location hash used for geographically close ordering
+ * `parent_place_id` - the next higher place in the address hierarchy, only
+   relevant for POI-type places (with rank 30)
+ * `linked_place_id` - place ID of the place this object has been merged with.
+   When this ID is set, then the place is invisible for search.
+ * `importance` - measure how well known the place is
+ * `rank_search`, `rank_address` - search and address rank (see [Customizing ranking](../customize/Ranking.md)
+ * `wikipedia` - the wikipedia page used for computing the importance of the place
+ * `country_code` - the country the place is located in
+ * `housenumber` - normalized housenumber, if the place has one
+ * `postcode` - computed postcode for the place
+ * `indexed_status` - processing status of the place (0 - ready, 1 - freshly inserted, 2 - needs updating, 100 - needs deletion)
+ * `indexed_date` - timestamp when the place was processed last
+ * `centroid` - a point feature for the place
+
+The **location_property_osmline** table is a special table for
+[address interpolations](https://wiki.openstreetmap.org/wiki/Addresses#Using_interpolation).
+The columns have the same meaning and use as the columns with the same name in
+the placex table. Only three columns are special:
+
+ * `startnumber` and `endnumber` - beginning and end of the number range
+    for the interpolation
+ * `interpolationtype` - a string `odd`, `even` or `all` to indicate
+    the interval between the numbers
+
+Address interpolations are always ways in OSM, which is why there is no column
+`osm_type`.
+
+The **location_postcode** table holds computed centroids of all postcodes that
+can be found in the OSM data. The meaning of the columns is again the same
+as that of the placex table.
+
+Every place needs an address, a set of surrounding places that describe the
+location of the place. The set of address places is made up of OSM places
+themselves. The **place_addressline** table cross-references for each place
+all the places that make up its address. Two columns define the address
+relation:
+
+  * `place_id` - reference to the place being addressed
+  * `address_place_id` - reference to the place serving as an address part
+
+The most of the columns cache information from the placex entry of the address
+part. The exceptions are:
+
+  * `fromarea` - is true if the address part has an area geometry and can
+    therefore be considered preceise
+  * `isaddress` - is true if the address part should show up in the address
+    output. Sometimes there are multiple places competing for for same address
+    type (e.g. multiple cities) and this field resolves the tie.
+
+The **search_name** table contains the search index proper. It saves for each
+place the terms with which the place can be found. The terms are split into
+the name itself and all terms that make up the address. The table mirrors some
+of the columns from placex for faster lookup.
+
+Search terms are not saved as strings. Each term is assigned an integer and those
+integers are saved in the name and address vectors of the search_name table. The
+**word** table serves as the lookup table from string to such a word ID. The
+exact content of the word table depends on the [tokenizer](Tokenizers.md) used.
+
+## Address computation tables
+
+Next to the main search tables, there is a set of secondary helper tables used
+to compute the address relations between places. These tables are partitioned.
+Each country is assigned a partition number in the country_name table (see
+below) and the data is then split between a set of tables, one for each
+partition. Note that Nominatim still manually manages partitioned tables.
+Native support for partitions in PostgreSQL only became usable with version 13.
+It will be a little while before Nominatim drops support for older versions.
+
+![address tables](address-tables.svg)
+
+The **search_name_X** tables are used to look up streets that appear in the
+`addr:street` tag.
+
+The **location_area_large_X** tables are used to look up larger areas
+(administrative boundaries and place nodes) either through their geographic
+closeness or through `addr:*` entries.
+
+The **location_road_X** tables are used to find the closest street for a
+dependent place.
+
+All three table cache specific information from the placex table for their
+selected subset of places:
+
+ * `keywords` and `name_vector` contain lists of term ids (from the word table)
+   that the full name of the place should match against
+ * `isguess` is true for places that are not described by an area
+
+All other columns reflect their counterpart in the placex table.
+
+## Static data tables
+
+Nominatim also creates a number of static tables at import:
+
+ * `nominatim_properties` saves settings that must not be changed after
+    import
+ * `address_levels` save the rank information from the
+   [ranking configuration](../customize/Ranking.md)
+ * `country_name` contains a fallback of names for all countries, their
+   default languages and saves the assignment of countries to partitions.
+ * `country_osm_grid` provides a fallback for country geometries
+
+## Auxiliary data tables
+
+Finally there are some table for auxiliary data:
+
+ * `location_property_tiger` - saves housenumber from the Tiger import. Its
+   layout is similar to that of `location_propoerty_osmline`.
+ * `place_class_*` tables are helper tables to facilitate lookup of POIs
+   by their class and type. They exist because it is not possible to create
+   combined indexes with geometries.
+
--- a/docs/develop/Development-Environment.md
+++ b/docs/develop/Development-Environment.md
@@ -1,6 +1,6 @@
 # Setting up Nominatim for Development

-This chapter gives an overview how to set up Nominatim for developement
+This chapter gives an overview how to set up Nominatim for development
 and how to run tests.

 !!! Important
@@ -29,15 +29,26 @@ The Nominatim test suite consists of behavioural tests (using behave) and
 unit tests (using PHPUnit for PHP code and pytest for Python code).
 It has the following additional requirements:

-* [behave test framework](https://behave.readthedocs.io) >= 1.2.5
-* [phpunit](https://phpunit.de) >= 7.3
+* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
+* [phpunit](https://phpunit.de) (9.5 is known to work)
 * [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
-* [Pylint](https://pylint.org/) (2.6.0 is used for the CI)
+* [Pylint](https://pylint.org/) (CI always runs the latest version from pip)
+* [mypy](http://mypy-lang.org/) (plus typing information for external libs)
+* [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9)
 * [pytest](https://pytest.org)
+* [pytest-asyncio](https://pytest-asyncio.readthedocs.io)
+
+For testing the Python search frontend, you need to install extra dependencies
+depending on your choice of webserver framework:
+
+* [httpx](https://www.python-httpx.org/) (starlette only)
+* [asgi-lifespan](https://github.com/florimondmanca/asgi-lifespan) (starlette only)

 The documentation is built with mkdocs:

 * [mkdocs](https://www.mkdocs.org/) >= 1.1.2
+* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
+* [mkdocstrings-python](https://mkdocstrings.github.io/python/)

 ### Installing prerequisites on Ubuntu/Debian

@@ -49,9 +60,12 @@ To install all necessary packages run:

 ```sh
 sudo apt install php-cgi phpunit php-codesniffer \
-                 python3-pip python3-setuptools python3-dev pylint
+                 python3-pip python3-setuptools python3-dev

-pip3 install --user behave mkdocs pytest
+pip3 install --user behave mkdocs mkdocstrings pytest pytest-asyncio pylint \
+                    mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil \
+                    types-ujson types-requests types-Pygments typing-extensions\
+                    httpx asgi-lifespan
 ```

 The `mkdocs` executable will be located in `.local/bin`. You may have to add
@@ -113,7 +127,7 @@ symlinks (see `CMakeLists.txt` for the exact steps).
 Now you can start webserver for local testing

 ```
-build> mkdocs serve
+build> make serve-doc
 [server:296] Serving on http://127.0.0.1:8000
 [handlers:62] Start watching changes
 ```
@@ -122,7 +136,7 @@ If you develop inside a Vagrant virtual machine, use a port that is forwarded
 to your host:

 ```
-build> mkdocs serve --dev-addr 0.0.0.0:8088
+build> PYTHONPATH=$SRCDIR mkdocs serve --dev-addr 0.0.0.0:8088
 [server:296] Serving on http://0.0.0.0:8088
 [handlers:62] Start watching changes
 ```
--- a/docs/develop/ICU-Tokenizer-Modules.md
+++ b/docs/develop/ICU-Tokenizer-Modules.md
@@ -0,0 +1,220 @@
+# Writing custom sanitizer and token analysis modules for the ICU tokenizer
+
+The [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer) provides a
+highly customizable method to pre-process and normalize the name information
+of the input data before it is added to the search index. It comes with a
+selection of sanitizers and token analyzers which you can use to adapt your
+installation to your needs. If the provided modules are not enough, you can
+also provide your own implementations. This section describes the API
+of sanitizers and token analysis.
+
+!!! warning
+    This API is currently in early alpha status. While this API is meant to
+    be a public API on which other sanitizers and token analyzers may be
+    implemented, it is not guaranteed to be stable at the moment.
+
+
+## Using non-standard sanitizers and token analyzers
+
+Sanitizer names (in the `step` property) and token analysis names (in the
+`analyzer`) may refer to externally supplied modules. There are two ways
+to include external modules: through a library or from the project directory.
+
+To include a module from a library, use the absolute import path as name and
+make sure the library can be found in your PYTHONPATH.
+
+To use a custom module without creating a library, you can put the module
+somewhere in your project directory and then use the relative path to the
+file. Include the whole name of the file including the `.py` ending.
+
+## Custom sanitizer modules
+
+A sanitizer module must export a single factory function `create` with the
+following signature:
+
+``` python
+def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]
+```
+
+The function receives the custom configuration for the sanitizer and must
+return a callable (function or class) that transforms the name and address
+terms of a place. When a place is processed, then a `ProcessInfo` object
+is created from the information that was queried from the database. This
+object is sequentially handed to each configured sanitizer, so that each
+sanitizer receives the result of processing from the previous sanitizer.
+After the last sanitizer is finished, the resulting name and address lists
+are forwarded to the token analysis module.
+
+Sanitizer functions are instantiated once and then called for each place
+that is imported or updated. They don't need to be thread-safe.
+If multi-threading is used, each thread creates their own instance of
+the function.
+
+### Sanitizer configuration
+
+::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
+    options:
+        heading_level: 6
+
+### The main filter function of the sanitizer
+
+The filter function receives a single object of type `ProcessInfo`
+which has with three members:
+
+ * `place: PlaceInfo`: read-only information about the place being processed.
+   See PlaceInfo below.
+ * `names: List[PlaceName]`: The current list of names for the place.
+ * `address: List[PlaceName]`: The current list of address names for the place.
+
+While the `place` member is provided for information only, the `names` and
+`address` lists are meant to be manipulated by the sanitizer. It may add and
+remove entries, change information within a single entry (for example by
+adding extra attributes) or completely replace the list with a different one.
+
+#### PlaceInfo - information about the place
+
+::: nominatim.data.place_info.PlaceInfo
+    options:
+        heading_level: 6
+
+
+#### PlaceName - extended naming information
+
+::: nominatim.data.place_name.PlaceName
+    options:
+        heading_level: 6
+
+
+### Example: Filter for US street prefixes
+
+The following sanitizer removes the directional prefixes from street names
+in the US:
+
+``` python
+import re
+
+def _filter_function(obj):
+    if obj.place.country_code == 'us' \
+       and obj.place.rank_address >= 26 and obj.place.rank_address <= 27:
+        for name in obj.names:
+            name.name = re.sub(r'^(north|south|west|east) ',
+                               '',
+                               name.name,
+                               flags=re.IGNORECASE)
+
+def create(config):
+    return _filter_function
+```
+
+This is the most simple form of a sanitizer module. If defines a single
+filter function and implements the required `create()` function by returning
+the filter.
+
+The filter function first checks if the object is interesting for the
+sanitizer. Namely it checks if the place is in the US (through `country_code`)
+and it the place is a street (a `rank_address` of 26 or 27). If the
+conditions are met, then it goes through all available names and
+removes any leading directional prefix using a simple regular expression.
+
+Save the source code in a file in your project directory, for example as
+`us_streets.py`. Then you can use the sanitizer in your `icu_tokenizer.yaml`:
+
+``` yaml
+...
+sanitizers:
+    - step: us_streets.py
+...
+```
+
+!!! warning
+    This example is just a simplified show case on how to create a sanitizer.
+    It is not really read for real-world use: while the sanitizer would
+    correcly transform `West 5th Street` into `5th Street`. it would also
+    shorten a simple `North Street` to `Street`.
+
+For more sanitizer examples, have a look at the sanitizers provided by Nominatim.
+They can be found in the directory
+[`nominatim/tokenizer/sanitizers`](https://github.com/osm-search/Nominatim/tree/master/nominatim/tokenizer/sanitizers).
+
+
+## Custom token analysis module
+
+::: nominatim.tokenizer.token_analysis.base.AnalysisModule
+    options:
+        heading_level: 6
+
+
+::: nominatim.tokenizer.token_analysis.base.Analyzer
+    options:
+        heading_level: 6
+
+### Example: Creating acronym variants for long names
+
+The following example of a token analysis module creates acronyms from
+very long names and adds them as a variant:
+
+``` python
+class AcronymMaker:
+    """ This class is the actual analyzer.
+    """
+    def __init__(self, norm, trans):
+        self.norm = norm
+        self.trans = trans
+
+
+    def get_canonical_id(self, name):
+        # In simple cases, the normalized name can be used as a canonical id.
+        return self.norm.transliterate(name.name).strip()
+
+
+    def compute_variants(self, name):
+        # The transliterated form of the name always makes up a variant.
+        variants = [self.trans.transliterate(name)]
+
+        # Only create acronyms from very long words.
+        if len(name) > 20:
+            # Take the first letter from each word to form the acronym.
+            acronym = ''.join(w[0] for w in name.split())
+            # If that leds to an acronym with at least three letters,
+            # add the resulting acronym as a variant.
+            if len(acronym) > 2:
+                # Never forget to transliterate the variants before returning them.
+                variants.append(self.trans.transliterate(acronym))
+
+        return variants
+
+# The following two functions are the module interface.
+
+def configure(rules, normalizer, transliterator):
+    # There is no configuration to parse and no data to set up.
+    # Just return an empty configuration.
+    return None
+
+
+def create(normalizer, transliterator, config):
+    # Return a new instance of our token analysis class above.
+    return AcronymMaker(normalizer, transliterator)
+```
+
+Given the name `Trans-Siberian Railway`, the code above would return the full
+name `Trans-Siberian Railway` and the acronym `TSR` as variant, so that
+searching would work for both.
+
+## Sanitizers vs. Token analysis - what to use for variants?
+
+It is not always clear when to implement variations in the sanitizer and
+when to write a token analysis module. Just take the acronym example
+above: it would also have been possible to write a sanitizer which adds the
+acronym as an additional name to the name list. The result would have been
+similar. So which should be used when?
+
+The most important thing to keep in mind is that variants created by the
+token analysis are only saved in the word lookup table. They do not need
+extra space in the search index. If there are many spelling variations, this
+can mean quite a significant amount of space is saved.
+
+When creating additional names with a sanitizer, these names are completely
+independent. In particular, they can be fed into different token analysis
+modules. This gives a much greater flexibility but at the price that the
+additional names increase the size of the search index.
+
--- a/docs/develop/Import.md
+++ b/docs/develop/Import.md
@@ -1,170 +0,0 @@
-# OSM Data Import
-
-OSM data is initially imported using [osm2pgsql](https://osm2pgsql.org).
-Nominatim uses its own data output style 'gazetteer', which differs from the
-output style created for map rendering.
-
-## Database Layout
-
-The gazetteer style produces a single table `place` with the following rows:
-
- * `osm_type` - kind of OSM object (**N** - node, **W** - way, **R** - relation)
- * `osm_id` - original OSM ID
- * `class` - key of principal tag defining the object type
- * `type` - value of principal tag defining the object type
- * `name` - collection of tags that contain a name or reference
- * `admin_level` - numerical value of the tagged administrative level
- * `address` - collection of tags defining the address of an object
- * `extratags` - collection of additional interesting tags that are not
-                 directly relevant for searching
- * `geometry` - geometry of the object (in WGS84)
-
-A single OSM object may appear multiple times in this table when it is tagged
-with multiple tags that may constitute a principal tag. Take for example a
-motorway bridge. In OSM, this would be a way which is tagged with
-`highway=motorway` and `bridge=yes`. This way would appear in the `place` table
-once with `class` of `highway` and once with a `class` of `bridge`. Thus the
-*unique key* for `place` is (`osm_type`, `osm_id`, `class`).
-
-## Configuring the Import
-
-How tags are interpreted and assigned to the different `place` columns can be
-configured via the import style configuration file (`NOMINATIM_IMPORT_STYLE`). This
-is a JSON file which contains a list of rules which are matched against every
-tag of every object and then assign the tag its specific role.
-
-### Configuration Rules
-
-A single rule looks like this:
-
-```json
-{
-    "keys" : ["key1", "key2", ...],
-    "values" : {
-        "value1" : "prop",
-        "value2" : "prop1,prop2"
-    }
-}
-```
-
-A rule first defines a list of keys to apply the rule to. This is always a list
-of strings. The string may have four forms. An empty string matches against
-any key. A string that ends in an asterisk `*` is a prefix match and accordingly
-matches against any key that starts with the given string (minus the `*`). A
-suffix match can be defined similarly with a string that starts with a `*`. Any
-other string constitutes an exact match.
-
-The second part of the rules defines a list of values and the properties that
-apply to a successful match. Value strings may be either empty, which
-means that they match any value, or describe an exact match. Prefix
-or suffix matching of values is not possible.
-
-For a rule to match, it has to find a valid combination of keys and values. The
-resulting property is that of the matched values.
-
-The rules in a configuration file are processed sequentially and the first
-match for each tag wins.
-
-A rule where key and value are the empty string is special. This defines the
-fallback when none of the rules match. The fallback is always used as a last
-resort when nothing else matches, no matter where the rule appears in the file.
-Defining multiple fallback rules is not allowed. What happens in this case,
-is undefined.
-
-### Tag Properties
-
-One or more of the following properties may be given for each tag:
-
-* `main`
-
-    A principal tag. A new row will be added for the object with key and value
-    as `class` and `type`.
-
-* `with_name`
-
-    When the tag is a principal tag (`main` property set): only really add a new
-    row, if there is any name tag found (a reference tag is not sufficient, see
-    below).
-
-* `with_name_key`
-
-    When the tag is a principal tag (`main` property set): only really add a new
-    row, if there is also a name tag that matches the key of the principal tag.
-    For example, if the main tag is `bridge=yes`, then it will only be added as
-    an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
-    If this property is set, all other names that are not domain-specific are
-    ignored.
-
-* `fallback`
-
-    When the tag is a principal tag (`main` property set): only really add a new
-    row, when no other principal tags for this object have been found. Only one
-    fallback tag can win for an object.
-
-* `operator`
-
-    When the tag is a principal tag (`main` property set): also include the
-    `operator` tag in the list of names. This is a special construct for an
-    out-dated tagging practise in OSM. Fuel stations and chain restaurants
-    in particular used to have the name of the chain tagged as `operator`.
-    These days the chain can be more commonly found in the `brand` tag but
-    there is still enough old data around to warrant this special case.
-
-* `name`
-
-    Add tag to the list of names.
-
-* `ref`
-
-    Add tag to the list of names as a reference. At the moment this only means
-    that the object is not considered to be named for `with_name`.
-
-* `address`
-
-    Add tag to the list of address tags. If the tag starts with `addr:` or
-    `is_in:`, then this prefix is cut off before adding it to the list.
-
-* `postcode`
-
-    Add the value as a postcode to the address tags. If multiple tags are
-    candidate for postcodes, one wins out and the others are dropped.
-
-* `country`
-
-    Add the value as a country code to the address tags. The value must be a
-    two letter country code, otherwise it is ignored. If there are multiple
-    tags that match, then one wins out and the others are dropped.
-
-* `house`
-
-    If no principle tags can be found for the object, still add the object with
-    `class`=`place` and `type`=`house`. Use this for address nodes that have no
-    other function.
-
-* `interpolation`
-
-    Add this object as an address interpolation (appears as `class`=`place` and
-    `type`=`houses` in the database).
-
-* `extra`
-
-    Add tag to the list of extra tags.
-
-* `skip`
-
-    Skip the tag completely. Useful when a custom default fallback is defined
-    or to define exceptions to rules.
-
-A rule can define as many of these properties for one match as it likes. For
-example, if the property is `"main,extra"` then the tag will open a new row
-but also have the tag appear in the list of extra tags.
-
-There are a number of pre-defined styles in the `settings/` directory. It is
-advisable to start from one of these styles when defining your own.
-
-### Changing the Style of Existing Databases
-
-There is normally no issue changing the style of a database that is already
-imported and now kept up-to-date with change files. Just be aware that any
-change in the style applies to updates only. If you want to change the data
-that is already in the database, then a reimport is necessary.
--- a/docs/develop/Indexing.md
+++ b/docs/develop/Indexing.md
@@ -0,0 +1,152 @@
+# Indexing Places
+
+In Nominatim, the word __indexing__ refers to the process that takes the raw
+OpenStreetMap data from the place table, enriches it with address information
+and creates the search indexes. This section explains the basic data flow.
+
+
+## Initial import
+
+After osm2pgsql has loaded the raw OSM data into the place table,
+the data is copied to the final search tables placex and location_property_osmline.
+While they are copied, some basic properties are added:
+
+ * country_code, geometry_sector and partition
+ * initial search and address rank
+
+In addition the column `indexed_status` is set to `1` marking the place as one
+that needs to be indexed.
+
+All this happens in the triggers `placex_insert` and `osmline_insert`.
+
+## Indexing
+
+The main work horse of the data import is the indexing step, where Nominatim
+takes every place from the placex and location_property_osmline tables where
+the indexed_status != 0 and computes the search terms and the address parts
+of the place.
+
+The indexing happens in three major steps:
+
+1. **Data preparation** - The indexer gets the data for the place to be indexed
+   from the database.
+
+2. **Search name processing** - The prepared data is given to the
+   tokenizer which computes the search terms from the names
+   and potentially other information.
+
+3. **Address processing** - The indexer then hands the prepared data and the
+   tokenizer information back to the database via an `INSERT` statement which
+   also sets the indexed_status to `0`. This triggers the update triggers
+   `placex_update`/`osmline_update` which do the work of computing address
+   parts and filling all the search tables.
+
+When computing the address terms of a place, Nominatim relies on the processed
+search names of all the address parts. That is why places are processed in rank
+order, from smallest rank to largest. To ensure correct handling of linked
+place nodes, administrative boundaries are processed before all other places.
+
+Apart from these restrictions, each place can be indexed independently
+from the others. This allows a large degree of parallelization during the indexing.
+It also means that the indexing process can be interrupted at any time and
+will simply pick up where it left of when restarted.
+
+### Data preparation
+
+The data preparation step computes and retrieves all data for a place that
+might be needed for the next step of processing the search name. That includes
+
+* location information (country code)
+* place classification (class, type, ranks)
+* names (including names of linked places)
+* address information (`addr:*` tags)
+
+Data preparation is implemented in pl/PgSQL mostly in the functions
+`placex_indexing_prepare()` and `get_interpolation_address()`.
+
+#### `addr:*` tag inheritance
+
+Nominatim has limited support for inheriting address tags from a building
+to POIs inside the building. This only works when the address tags are on the
+building outline. Any rank 30 object inside such a building or on its outline
+inherits all address tags when it does not have any address tags of its own.
+
+The inheritance is computed in the data preparation step.
+
+### Search name processing
+
+The prepared place information is handed to the tokenizer next. This is a
+Python module responsible for processing the names  from both name and address
+terms and building up the word index from them. The process is explained in
+more detail in the [Tokenizer chapter](Tokenizers.md).
+
+### Address processing
+
+Finally, the preprocessed place information and the results of the search name
+processing are written back to the database. At this point the update trigger
+of the placex/location_property_osmline tables take over and fill all the
+dependent tables. This makes up the most work-intensive part of the indexing.
+
+Nominatim distinguishes between dependent and independent places.
+**Dependent places** are all places on rank 30: house numbers, POIs etc. These
+places don't have a full address of their own. Instead they are attached to
+a parent street or place and use the information of the parent for searching
+and displaying information. Everything else are **independent places**: streets,
+parks, water bodies, suburbs, cities, states etc.  They receive a full address
+on their own.
+
+The address processing for both types of places is very different.
+
+#### Independent places
+
+To compute the address of an independent place Nominatim searches for all
+places that cover the place to compute the address for at least partially.
+For places with an area, that area is used to check for coverage. For place
+nodes an artificial square area is computed according to the rank of
+the place. The lower the rank the lager the area. The `location_area_large_X`
+tables are there to facilitate the lookup. All places that can function as
+the address of another place are saved in those tables.
+
+`addr:*` and `isin:*` tags are taken into account to compute the address, too.
+Nominatim will give preference to places with the same name as in these tags
+when looking for places in the vicinity. If there are no matching place names
+at all, then the tags are at least added to the search index. That means that
+the names will not be shown in the result as the 'address' of the place, but
+searching by them still works.
+
+Independent places are always added to the global search index `search_name`.
+
+#### Dependent places
+
+Dependent places skip the full address computation for performance reasons.
+Instead they just find a parent place to attach themselves to.
+
+![parenting of dependent places](parenting-flow.svg)
+
+By default a POI
+or house number will be attached to the closest street. That can be any major
+or minor street indexed by Nominatim. In the default configuration that means
+that it can attach itself to a footway but only when it has a name.
+
+When the dependent place has an `addr:street` tag, then Nominatim will first
+try to find a street with the same name before falling back to the closest
+street.
+
+There are also addresses in OSM, where the housenumber does not belong
+to a street at all. These have an `addr:place` tag. For these places, Nominatim
+tries to find a place with the given name in the indexed places with an
+address rank between 16 and 25. If none is found, then the dependent place
+is attached to the closest place in that category and the addr:place name is
+added as *unlisted* place, which indicates to Nominatim that it needs to add
+it to the address output, no matter what. This special case is necessary to
+cover addresses that don't really refer to an existing object.
+
+When an address has both the `addr:street` and `addr:place` tag, then Nominatim
+assumes that the `addr:place` tag in fact should be the city part of the address
+and give the POI the usual street number address.
+
+Dependent places are only added to the global search index `search_name` when
+they have either a name themselves or when they have address tags that are not
+covered by the places that make up their address. The latter ensures that
+addresses are always searchable by those address tags.
+
--- a/docs/develop/Postcodes.md
+++ b/docs/develop/Postcodes.md
@@ -1,45 +0,0 @@
-# Postcodes in Nominatim
-
-The blog post
-[Nominatim and Postcodes](https://www.openstreetmap.org/user/lonvia/diary/43143)
-describes the handling implemented since Nominatim 3.1.
-
-Postcode centroids (aka 'calculated postcodes') are generated by looking at all
-postcodes of a country, grouping them and calculating the geometric centroid.
-There is currently no logic to deal with extreme outliers (typos or other
-mistakes in OSM data). There is also no check if a postcodes adheres to a
-country's format, e.g. if Swiss postcodes are 4 digits.
-
-
-## Regular updating calculated postcodes
-
-The script to rerun the calculation is
-`nominatim refresh --postcodes`
-and runs once per night on nominatim.openstreetmap.org.
-
-
-## Finding places that share a specific postcode
-
-In the Nominatim database run
-
-```sql
-SELECT address->'postcode' as pc,
-       osm_type, osm_id, class, type,
-       st_x(centroid) as lon, st_y(centroid) as lat
-FROM placex
-WHERE country_code='fr'
-  AND upper(trim (both ' ' from address->'postcode')) = '33210';
-```
-
-Alternatively on [Overpass](https://overpass-turbo.eu/) run the following query
-
-```
-[out:json][timeout:250];
-area["name"="France"]->.boundaryarea;
-(
-nwr(area.boundaryarea)["addr:postcode"="33210"];
-);
-out body;
->;
-out skel qt;
-```
--- a/docs/develop/Testing.md
+++ b/docs/develop/Testing.md
@@ -22,8 +22,8 @@ This test directory is sturctured as follows:
  |
  +-   php         PHP unit tests
  +-   python      Python unit tests
-  +-   scenes      Geometry test data
  +-   testdb      Base data for generating API test database
+  +-   testdata    Additional test data used by unit tests
 ```

 ## PHP Unit Tests (`test/php`)
@@ -84,6 +84,8 @@ The tests can be configured with a set of environment variables (`behave -D key=
 * `TEST_DB` - name of test database (db tests)
 * `API_TEST_DB` - name of the database containing the API test data (api tests)
 * `API_TEST_FILE` - OSM file to be imported into the API test database (api tests)
+ * `API_ENGINE` - webframe to use for running search queries, same values as
+                  `nominatim serve --engine` parameter
 * `DB_HOST` - (optional) hostname of database host
 * `DB_PORT` - (optional) port of database on host
 * `DB_USER` - (optional) username of database login
@@ -120,7 +122,7 @@ and compromises the following data:
 API tests should only be testing the functionality of the website PHP code.
 Most tests should be formulated as BDD DB creation tests (see below) instead.

-#### Code Coverage
+#### Code Coverage (PHP engine only)

 The API tests also support code coverage tests. You need to install
 [PHP_CodeCoverage](https://github.com/sebastianbergmann/php-code-coverage).
@@ -153,7 +155,3 @@ needs superuser rights for postgres.

 These tests check that data is imported correctly into the place table. They
 use the same template database as the DB Creation tests, so the same remarks apply.
-
-Note that most testing of the gazetteer output of osm2pgsql is done in the tests
-of osm2pgsql itself. The BDD tests are just there to ensure compatibility of
-the osm2pgsql and Nominatim code.
--- a/docs/develop/Tokenizers.md
+++ b/docs/develop/Tokenizers.md
@@ -0,0 +1,354 @@
+# Tokenizers
+
+The tokenizer is the component of Nominatim that is responsible for
+analysing names of OSM objects and queries. Nominatim provides different
+tokenizers that use different strategies for normalisation. This page describes
+how tokenizers are expected to work and the public API that needs to be
+implemented when creating a new tokenizer. For information on how to configure
+a specific tokenizer for a database see the
+[tokenizer chapter in the Customization Guide](../customize/Tokenizers.md).
+
+## Generic Architecture
+
+### About Search Tokens
+
+Search in Nominatim is organised around search tokens. Such a token represents
+string that can be part of the search query. Tokens are used so that the search
+index does not need to be organised around strings. Instead the database saves
+for each place which tokens match this place's name, address, house number etc.
+To be able to distinguish between these different types of information stored
+with the place, a search token also always has a certain type: name, house number,
+postcode etc.
+
+During search an incoming query is transformed into a ordered list of such
+search tokens (or rather many lists, see below) and this list is then converted
+into a database query to find the right place.
+
+It is the core task of the tokenizer to create, manage and assign the search
+tokens. The tokenizer is involved in two distinct operations:
+
+* __at import time__: scanning names of OSM objects, normalizing them and
+  building up the list of search tokens.
+* __at query time__: scanning the query and returning the appropriate search
+  tokens.
+
+
+### Importing
+
+The indexer is responsible to enrich an OSM object (or place) with all data
+required for geocoding. It is split into two parts: the controller collects
+the places that require updating, enriches the place information as required
+and hands the place to Postgresql. The collector is part of the Nominatim
+library written in Python. Within Postgresql, the `placex_update`
+trigger is responsible to fill out all secondary tables with extra geocoding
+information. This part is written in PL/pgSQL.
+
+The tokenizer is involved in both parts. When the indexer prepares a place,
+it hands it over to the tokenizer to inspect the names and create all the
+search tokens applicable for the place. This usually involves updating the
+tokenizer's internal token lists and creating a list of all token IDs for
+the specific place. This list is later needed in the PL/pgSQL part where the
+indexer needs to add the token IDs to the appropriate search tables. To be
+able to communicate the list between the Python part and the pl/pgSQL trigger,
+the `placex` table contains a special JSONB column `token_info` which is there
+for the exclusive use of the tokenizer.
+
+The Python part of the tokenizer returns a structured information about the
+tokens of a place to the indexer which converts it to JSON and inserts it into
+the `token_info` column. The content of the column is then handed to the PL/pqSQL
+callbacks of the tokenizer which extracts the required information. Usually
+the tokenizer then removes all information from the `token_info` structure,
+so that no information is ever persistently saved in the table. All information
+that went in should have been processed after all and put into secondary tables.
+This is however not a hard requirement. If the tokenizer needs to store
+additional information about a place permanently, it may do so in the
+`token_info` column. It just may never execute searches over it and
+consequently not create any special indexes on it.
+
+### Querying
+
+At query time, Nominatim builds up multiple _interpretations_ of the search
+query. Each of these interpretations is tried against the database in order
+of the likelihood with which they match to the search query. The first
+interpretation that yields results wins.
+
+The interpretations are encapsulated in the `SearchDescription` class. An
+instance of this class is created by applying a sequence of
+_search tokens_ to an initially empty SearchDescription. It is the
+responsibility of the tokenizer to parse the search query and derive all
+possible sequences of search tokens. To that end the tokenizer needs to parse
+the search query and look up matching words in its own data structures.
+
+## Tokenizer API
+
+The following section describes the functions that need to be implemented
+for a custom tokenizer implementation.
+
+!!! warning
+    This API is currently in early alpha status. While this API is meant to
+    be a public API on which other tokenizers may be implemented, the API is
+    far away from being stable at the moment.
+
+### Directory Structure
+
+Nominatim expects two files for a tokenizer:
+
+* `nominatim/tokenizer/<NAME>_tokenizer.py` containing the Python part of the
+  implementation
+* `lib-php/tokenizer/<NAME>_tokenizer.php` with the PHP part of the
+  implementation
+
+where `<NAME>` is a unique name for the tokenizer consisting of only lower-case
+letters, digits and underscore. A tokenizer also needs to install some SQL
+functions. By convention, these should be placed in `lib-sql/tokenizer`.
+
+If the tokenizer has a default configuration file, this should be saved in
+the `settings/<NAME>_tokenizer.<SUFFIX>`.
+
+### Configuration and Persistence
+
+Tokenizers may define custom settings for their configuration. All settings
+must be prefixed with `NOMINATIM_TOKENIZER_`. Settings may be transient or
+persistent. Transient settings are loaded from the configuration file when
+Nominatim is started and may thus be changed at any time. Persistent settings
+are tied to a database installation and must only be read during installation
+time. If they are needed for the runtime then they must be saved into the
+`nominatim_properties` table and later loaded from there.
+
+### The Python module
+
+The Python module is expect to export a single factory function:
+
+```python
+def create(dsn: str, data_dir: Path) -> AbstractTokenizer
+```
+
+The `dsn` parameter contains the DSN of the Nominatim database. The `data_dir`
+is a directory in the project directory that the tokenizer may use to save
+database-specific data. The function must return the instance of the tokenizer
+class as defined below.
+
+### Python Tokenizer Class
+
+All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
+and implement the abstract functions defined there.
+
+::: nominatim.tokenizer.base.AbstractTokenizer
+    options:
+        heading_level: 6
+
+### Python Analyzer Class
+
+::: nominatim.tokenizer.base.AbstractAnalyzer
+    options:
+        heading_level: 6
+
+### PL/pgSQL Functions
+
+The tokenizer must provide access functions for the `token_info` column
+to the indexer which extracts the necessary information for the global
+search tables. If the tokenizer needs additional SQL functions for private
+use, then these functions must be prefixed with `token_` in order to ensure
+that there are no naming conflicts with the SQL indexer code.
+
+The following functions are expected:
+
+```sql
+FUNCTION token_get_name_search_tokens(info JSONB) RETURNS INTEGER[]
+```
+
+Return an array of token IDs of search terms that should match
+the name(s) for the given place. These tokens are used to look up the place
+by name and, where the place functions as part of an address for another place,
+by address. Must return NULL when the place has no name.
+
+```sql
+FUNCTION token_get_name_match_tokens(info JSONB) RETURNS INTEGER[]
+```
+
+Return an array of token IDs of full names of the place that should be used
+to match addresses. The list of match tokens is usually more strict than
+search tokens as it is used to find a match between two OSM tag values which
+are expected to contain matching full names. Partial terms should not be
+used for match tokens. Must return NULL when the place has no name.
+
+```sql
+FUNCTION token_get_housenumber_search_tokens(info JSONB) RETURNS INTEGER[]
+```
+
+Return an array of token IDs of house number tokens that apply to the place.
+Note that a place may have multiple house numbers, for example when apartments
+each have their own number. Must be NULL when the place has no house numbers.
+
+```sql
+FUNCTION token_normalized_housenumber(info JSONB) RETURNS TEXT
+```
+
+Return the house number(s) in the normalized form that can be matched against
+a house number token text. If a place has multiple house numbers they must
+be listed with a semicolon as delimiter. Must be NULL when the place has no
+house numbers.
+
+```sql
+FUNCTION token_is_street_address(info JSONB) RETURNS BOOLEAN
+```
+
+Return true if this is an object that should be parented against a street.
+Only relevant for objects with address rank 30.
+
+```sql
+FUNCTION token_has_addr_street(info JSONB) RETURNS BOOLEAN
+```
+
+Return true if there are street names to match against for finding the
+parent of the object.
+
+
+```sql
+FUNCTION token_has_addr_place(info JSONB) RETURNS BOOLEAN
+```
+
+Return true if there are place names to match against for finding the
+parent of the object.
+
+```sql
+FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[]) RETURNS BOOLEAN
+```
+
+Check if the given tokens (previously saved from `token_get_name_match_tokens()`)
+match against the `addr:street` tag name. Must return either NULL or FALSE
+when the place has no `addr:street` tag.
+
+```sql
+FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[]) RETURNS BOOLEAN
+```
+
+Check if the given tokens (previously saved from `token_get_name_match_tokens()`)
+match against the `addr:place` tag name. Must return either NULL or FALSE
+when the place has no `addr:place` tag.
+
+
+```sql
+FUNCTION token_addr_place_search_tokens(info JSONB) RETURNS INTEGER[]
+```
+
+Return the search token IDs extracted from the `addr:place` tag. These tokens
+are used for searches by address when no matching place can be found in the
+database. Must be NULL when the place has no `addr:place` tag.
+
+```sql
+FUNCTION token_get_address_keys(info JSONB) RETURNS SETOF TEXT
+```
+
+Return the set of keys for which address information is provided. This
+should correspond to the list of (relevant) `addr:*` tags with the `addr:`
+prefix removed or the keys used in the `address` dictionary of the place info.
+
+```sql
+FUNCTION token_get_address_search_tokens(info JSONB, key TEXT) RETURNS INTEGER[]
+```
+
+Return the array of search tokens for the given address part. `key` can be
+expected to be one of those returned with `token_get_address_keys()`. The
+search tokens are added to the address search vector of the place, when no
+corresponding OSM object could be found for the given address part from which
+to copy the name information.
+
+```sql
+FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
+```
+
+Check if the given tokens match against the address part `key`.
+
+__Warning:__ the tokens that are handed in are the lists previously saved
+from `token_get_name_search_tokens()`, _not_ from the match token list. This
+is an historical oddity which will be fixed at some point in the future.
+Currently, tokenizers are encouraged to make sure that matching works against
+both the search token list and the match token list.
+
+```sql
+FUNCTION token_get_postcode(info JSONB) RETURNS TEXT
+```
+
+Return the postcode for the object, if any exists. The postcode must be in
+the form that should also be presented to the end-user.
+
+```sql
+FUNCTION token_strip_info(info JSONB) RETURNS JSONB
+```
+
+Return the part of the `token_info` field that should be stored in the database
+permanently. The indexer calls this function when all processing is done and
+replaces the content of the `token_info` column with the returned value before
+the trigger stores the information in the database. May return NULL if no
+information should be stored permanently.
+
+### PHP Tokenizer class
+
+The PHP tokenizer class is instantiated once per request and responsible for
+analyzing the incoming query. Multiple requests may be in flight in
+parallel.
+
+The class is expected to be found under the
+name of `\Nominatim\Tokenizer`. To find the class the PHP code includes the file
+`tokenizer/tokenizer.php` in the project directory. This file must be created
+when the tokenizer is first set up on import. The file should initialize any
+configuration variables by setting PHP constants and then require the file
+with the actual implementation of the tokenizer.
+
+The tokenizer class must implement the following functions:
+
+```php
+public function __construct(object &$oDB)
+```
+
+The constructor of the class receives a database connection that can be used
+to query persistent data in the database.
+
+```php
+public function checkStatus()
+```
+
+Check that the tokenizer can access its persistent data structures. If there
+is an issue, throw an `\Exception`.
+
+```php
+public function normalizeString(string $sTerm) : string
+```
+
+Normalize string to a form to be used for comparisons when reordering results.
+Nominatim reweighs results how well the final display string matches the actual
+query. Before comparing result and query, names and query are normalised against
+this function. The tokenizer can thus remove all properties that should not be
+taken into account for reweighing, e.g. special characters or case.
+
+```php
+public function tokensForSpecialTerm(string $sTerm) : array
+```
+
+Return the list of special term tokens that match the given term.
+
+```php
+public function extractTokensFromPhrases(array &$aPhrases) : TokenList
+```
+
+Parse the given phrases, splitting them into word lists and retrieve the
+matching tokens.
+
+The phrase array may take on two forms. In unstructured searches (using `q=`
+parameter) the search query is split at the commas and the elements are
+put into a sorted list. For structured searches the phrase array is an
+associative array where the key designates the type of the term (street, city,
+county etc.) The tokenizer may ignore the phrase type at this stage in parsing.
+Matching phrase type and appropriate search token type will be done later
+when the SearchDescription is built.
+
+For each phrase in the list of phrases, the function must analyse the phrase
+string and then call `setWordSets()` to communicate the result of the analysis.
+A word set is a list of strings, where each string refers to a search token.
+A phrase may have multiple interpretations. Therefore a list of word sets is
+usually attached to the phrase. The search tokens themselves are returned
+by the function in an associative array, where the key corresponds to the
+strings given in the word sets. The value is a list of search tokens. Thus
+a single string in the list of word sets may refer to multiple search tokens.
+
--- a/docs/develop/address-tables.plantuml
+++ b/docs/develop/address-tables.plantuml
@@ -0,0 +1,35 @@
+@startuml
+skinparam monochrome true
+skinparam ObjectFontStyle bold
+
+map search_name_X {
+  place_id => BIGINT
+  address_rank => SMALLINT
+  name_vector => INT[]
+  centroid => GEOMETRY
+}
+
+map location_area_large_X {
+  place_id => BIGINT
+  keywords => INT[]
+  partition => SMALLINT
+  rank_search => SMALLINT
+  rank_address => SMALLINT
+  country_code => VARCHR(2)
+  isguess => BOOLEAN
+  postcode => TEXT
+  centroid => POINT
+  geometry => GEOMETRY
+}
+
+map location_road_X {
+  place_id => BIGINT
+  partition => SMALLINT
+  country_code => VARCHR(2)
+  geometry => GEOMETRY
+}
+
+search_name_X -[hidden]> location_area_large_X
+location_area_large_X -[hidden]> location_road_X
+
+@enduml
--- a/docs/develop/address-tables.svg
+++ b/docs/develop/address-tables.svg
--- a/docs/develop/data-sources.md
+++ b/docs/develop/data-sources.md
@@ -13,7 +13,7 @@ More details in [osm-search/country-grid-data](https://github.com/osm-search/cou

 ## US Census TIGER

-For the United States you can choose to import additonal street-level data.
+For the United States you can choose to import additional street-level data.
 The data isn't mixed into OSM data but queried as fallback when no OSM
 result can be found.

--- a/docs/develop/osm2pgsql-tables.plantuml
+++ b/docs/develop/osm2pgsql-tables.plantuml
@@ -0,0 +1,44 @@
+@startuml
+skinparam monochrome true
+skinparam ObjectFontStyle bold
+
+map planet_osm_nodes #eee {
+  id => BIGINT
+  lat => INT
+  lon => INT
+}
+
+map planet_osm_ways #eee {
+  id => BIGINT
+  nodes => BIGINT[]
+  tags => TEXT[]
+}
+
+map planet_osm_rels #eee {
+  id => BIGINT
+  parts => BIGINT[]
+  members => TEXT[]
+  tags => TEXT[]
+  way_off => SMALLINT
+  rel_off => SMALLINT
+}
+
+map place {
+  osm_type => CHAR(1)
+  osm_id => BIGINT
+  class => TEXT
+  type => TEXT
+  name => HSTORE
+  address => HSTORE
+  extratags => HSTORE
+  admin_level => SMALLINT
+  geometry => GEOMETRY
+}
+
+planet_osm_nodes  -[hidden]> planet_osm_ways
+planet_osm_ways  -[hidden]> planet_osm_rels
+planet_osm_ways -[hidden]-> place
+
+planet_osm_nodes::id <- planet_osm_ways::nodes
+
+@enduml
--- a/docs/develop/osm2pgsql-tables.svg
+++ b/docs/develop/osm2pgsql-tables.svg
--- a/docs/develop/parenting-flow.plantuml
+++ b/docs/develop/parenting-flow.plantuml
@@ -0,0 +1,31 @@
+@startuml
+skinparam monochrome true
+
+start
+
+if (has 'addr:street'?) then (yes)
+  if (street with that name\n nearby?) then (yes)
+    :**Use closest street**
+     **with same name**;
+     kill
+  else (no)
+    :** Use closest**\n**street**;
+     kill
+  endif
+elseif (has 'addr:place'?) then (yes)
+  if (place with that name\n nearby?) then (yes)
+    :**Use closest place**
+     **with same name**;
+     kill
+  else (no)
+    :add addr:place to adress;
+    :**Use closest place**\n**rank 16 to 25**;
+     kill
+  endif
+else (otherwise)
+ :**Use closest**\n**street**;
+ kill
+endif
+
+
+@enduml
--- a/docs/develop/parenting-flow.svg
+++ b/docs/develop/parenting-flow.svg
--- a/docs/develop/search-tables.plantuml
+++ b/docs/develop/search-tables.plantuml
@@ -0,0 +1,99 @@
+@startuml
+skinparam monochrome true
+skinparam ObjectFontStyle bold
+
+left to right direction
+
+map placex {
+  place_id => BIGINT
+  osm_type => CHAR(1)
+  osm_id => BIGINT
+  class => TEXT
+  type => TEXT
+  name => HSTORE
+  address => HSTORE
+  extratags => HSTORE
+  admin_level => SMALLINT
+  partition => SMALLINT
+  geometry_sector => INT
+  parent_place_id => BIGINT
+  linked_place_id => BIGINT
+  importance => DOUBLE
+  rank_search => SMALLINT
+  rank_address => SMALLINT
+  wikipedia => TEXT
+  country_code => VARCHAR(2)
+  housenumber => TEXT
+  postcode => TEXT
+  indexed_status => SMALLINT
+  indexed_date => TIMESTAMP
+  centroid => GEOMETRY
+  geometry => GEOMETRY
+}
+
+map search_name {
+  place_id => BIGINT
+  importance => DOUBLE
+  search_rank => SMALLINT
+  address_rank => SMALLINT
+  name_vector => INT[]
+  nameaddress_vector => INT[]
+  country_code => VARCHAR(2)
+  centroid => GEOMETRY
+}
+
+map word {
+  word_id => INT
+  word_token => TEXT
+  ... =>
+}
+
+map location_property_osmline {
+  place_id => BIGINT
+  osm_id => BIGINT
+  startnumber => INT
+  endnumber => INT
+  interpolationtype => TEXT
+  address => HSTORE
+  partition => SMALLINT
+  geometry_sector => INT
+  parent_place_id => BIGINT
+  country_code => VARCHAR(2)
+  postcode => text
+  indexed_status => SMALLINT
+  indexed_date => TIMESTAMP
+  linegeo => GEOMETRY
+}
+
+map place_addressline {
+  place_id => BIGINT
+  address_place_id => BIGINT
+  distance => DOUBLE
+  cached_rank_address => SMALLINT
+  fromarea => BOOLEAN
+  isaddress => BOOLEAN
+}
+
+map location_postcode {
+  place_id => BIGINT
+  postcode => TEXT
+  parent_place_id => BIGINT
+  rank_search => SMALLINT
+  rank_address => SMALLINT
+  indexed_status => SMALLINT
+  indexed_date => TIMESTAMP
+  geometry => GEOMETRY
+}
+
+placex::place_id <-- search_name::place_id
+placex::place_id <-- place_addressline::place_id
+placex::place_id <-- place_addressline::address_place_id
+
+search_name::name_vector --> word::word_id
+search_name::nameaddress_vector --> word::word_id
+
+place_addressline -[hidden]> location_property_osmline
+search_name -[hidden]> place_addressline
+location_property_osmline -[hidden]-> location_postcode
+
+@enduml
--- a/docs/develop/search-tables.svg
+++ b/docs/develop/search-tables.svg
--- a/docs/extra.css
+++ b/docs/extra.css
@@ -2,6 +2,10 @@
    display: none!important
 }

+.wy-nav-content {
+    max-width: 900px!important
+}
+
 table {
    margin-bottom: 12pt
 }
@@ -13,3 +17,20 @@ th, td {
 th {
    background-color: #eee;
 }
+
+.doc-object h6 {
+    margin-bottom: 0.8em;
+    font-size: 130%;
+}
+
+.doc-object {
+    margin-bottom: 1.3em;
+}
+
+.doc-children .doc-contents {
+    margin-left: 3em;
+}
+
+.md-footer__inner {
+    display: none;
+}
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,8 +1,15 @@
-Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).
+Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and
+address and to generate synthetic addresses of OSM points (reverse geocoding).
+It has also limited capability to search features by their type
+(pubs, hotels, churches, etc).

-This guide comes in three parts:
+This guide comes in five parts:

 * __[API reference](api/Overview.md)__ for users of Nominatim
 * __[Administration Guide](admin/Installation.md)__ for those who want
   to install their own Nominatim server
+ * __[Customization Guide](customize/Overview.md)__ for those who want to
+   adapt their own installation to their special requirements
+ * __[Library Guide](library/Getting-Started.md)__ for Python developers who
+   want to use Nominatim as a library in their project
 * __[Developer's Guide](develop/overview.md)__ for developers of the software
--- a/docs/library/Configuration.md
+++ b/docs/library/Configuration.md
@@ -0,0 +1,31 @@
+# Configuration
+
+When using Nominatim through the library, it can be configured in exactly
+the same way as when running as a service. This means that you should have
+created a [project directory](../admin/Import.md#creating-the-project-directory)
+which contains all files belonging to the Nominatim instance. It can also contain
+an `.env` file with configuration options. Setting configuration parameters
+via environment variables works as well.
+
+Configuration options are resolved in the following order:
+
+* from the OS environment (or the dictionary given in `environ`,
+  (see NominatimAPI.md#nominatim.api.core.NominatimAPI.__init__)
+* from the .env file in the project directory of the installation
+* from the default installation in the configuration directory
+
+For more information on configuration via dotenv and a list of possible
+configuration parameters, see the [Configuration page](../customize/Settings.md).
+
+
+## `Configuration` class
+
+::: nominatim.config.Configuration
+    options:
+        members:
+            - get_bool
+            - get_int
+            - get_str_list
+            - get_path
+        heading_level: 6
+        show_signature_annotations: True
--- a/docs/library/Getting-Started.md
+++ b/docs/library/Getting-Started.md
@@ -0,0 +1,248 @@
+# Getting Started
+
+The Nominatim search frontend can directly be used as a Python library in
+scripts and applications. When you have imported your own Nominatim database,
+then it is no longer necessary to run a full web service for it and access
+the database through http requests. There are
+also less constraints on the kinds of data that can be accessed. The library
+allows to get access to more detailed information about the objects saved
+in the database.
+
+!!! danger
+    The library interface is currently in an experimental stage. There might
+    be some smaller adjustments to the public interface until the next version.
+
+    The library also misses a proper installation routine, so some manipulation
+    of the PYTHONPATH is required. At the moment, use is only recommended for
+    developers wit some experience in Python.
+
+## Installation
+
+To use the Nominatim library, you need access to a local Nominatim database.
+Follow the [installation](../admin/Installation.md) and
+[import](../admin/Import.md) instructions to set up your database.
+
+It is not yet possible to install it in the usual way via pip or inside a
+virtualenv. To get access to the library you need to set an appropriate
+`PYTHONPATH`. With the default installation, the python library can be found
+under `/usr/local/share/nominatim/lib-python`. If you have installed
+Nominatim under a different prefix, adapt the `/usr/local/` part accordingly.
+You can also point the `PYTHONPATH` to the Nominatim source code.
+
+### A simple search example
+
+To query the Nominatim database you need to first set up a connection. This
+is done by creating an Nominatim API object. This object exposes all the
+search functions of Nominatim that are also known from its web API.
+
+This code snippet implements a simple search for the town of 'Brugge':
+
+!!! example
+    === "NominatimAPIAsync"
+        ``` python
+        from pathlib import Path
+        import asyncio
+
+        import nominatim.api as napi
+
+        async def search(query):
+            api = napi.NominatimAPIAsync(Path('.'))
+
+            return await api.search(query)
+
+        results = asyncio.run(search('Brugge'))
+        if not results:
+            print('Cannot find Brugge')
+        else:
+            print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
+        ```
+
+    === "NominatimAPI"
+        ``` python
+        from pathlib import Path
+
+        import nominatim.api as napi
+
+        api = napi.NominatimAPI(Path('.'))
+
+        results = api.search('Brugge')
+
+        if not results:
+            print('Cannot find Brugge')
+        else:
+            print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
+        ```
+
+The Nominatim library is designed around
+[asyncio](https://docs.python.org/3/library/asyncio.html). `NominatimAPIAsync`
+provides you with an interface of coroutines.
+If you have many requests to make, coroutines can speed up your applications
+significantly.
+
+For smaller scripts there is also a synchronous wrapper around the API. By
+using `NominatimAPI`, you get exactly the same interface using classic functions.
+
+The examples in this chapter will always show-case both
+implementations. The documentation itself will usually refer only to
+'Nominatim API class' when both flavours are meant. If a functionality is
+available only for the synchronous or asynchronous version, this will be
+explicitly mentioned.
+
+### Defining which database to use
+
+The [Configuration](../admin/Import.md#configuration-setup-in-env)
+section explains how Nominatim is configured using the
+[dotenv](https://github.com/theskumar/python-dotenv) library.
+The same configuration mechanism is used with the
+Nominatim API library. You should therefore be sure you are familiar with
+the section.
+
+The constructor of the 'Nominatim API class' takes one mandatory parameter:
+the path to the [project directory](../admin/Import.md#creating-the-project-directory).
+You should have set up this directory as part of the Nominatim import.
+Any configuration found in the `.env` file in this directory will automatically
+used.
+
+Yo may also configure Nominatim be setting environment variables.
+Normally, Nominatim will check the operating system environment. This can be
+overwritten by giving the constructor a dictionary of configuration parameters.
+
+Let us look up 'Brugge' in the special database named 'belgium' instead of the
+standard 'nominatim' database:
+
+!!! example
+    === "NominatimAPIAsync"
+        ``` python
+        from pathlib import Path
+        import asyncio
+
+        import nominatim.api as napi
+
+        config_params = {
+            'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
+        }
+
+        async def search(query):
+            api = napi.NominatimAPIAsync(Path('.'), environ=config_params)
+
+            return await api.search(query)
+
+        results = asyncio.run(search('Brugge'))
+        ```
+
+    === "NominatimAPI"
+        ``` python
+        from pathlib import Path
+
+        import nominatim.api as napi
+
+        config_params = {
+            'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
+        }
+
+        api = napi.NominatimAPI(Path('.'), environ=config_params)
+
+        results = api.search('Brugge')
+        ```
+
+### Presenting results to humans
+
+All search functions return the raw results from the database. There is no
+full human-readable label. To create such a label, you need two things:
+
+* the address details of the place
+* adapt the result to the language you wish to use for display
+
+Again searching for 'Brugge', this time with a nicely formatted result:
+
+!!! example
+    === "NominatimAPIAsync"
+        ``` python
+        from pathlib import Path
+        import asyncio
+
+        import nominatim.api as napi
+
+        async def search(query):
+            api = napi.NominatimAPIAsync(Path('.'))
+
+            return await api.search(query, address_details=True)
+
+        results = asyncio.run(search('Brugge'))
+
+        locale = napi.Locales(['fr', 'en'])
+        for i, result in enumerate(results):
+            address_parts = result.address_rows.localize(locale)
+            print(f"{i + 1}. {', '.join(address_parts)}")
+        ```
+
+    === "NominatimAPI"
+        ``` python
+        from pathlib import Path
+
+        import nominatim.api as napi
+
+        api = napi.NominatimAPI(Path('.'))
+
+        results = api.search('Brugge', address_details=True)
+
+        locale = napi.Locales(['fr', 'en'])
+        for i, result in enumerate(results):
+            address_parts = result.address_rows.localize(locale)
+            print(f"{i + 1}. {', '.join(address_parts)}")
+        ```
+
+To request information about the address of a result, add the optional
+parameter 'address_details' to your search:
+
+``` python
+>>> results = api.search('Brugge', address_details=True)
+```
+
+An additional field `address_rows` will set in results that are returned.
+It contains a list of all places that make up the address of the place. For
+simplicity, this includes name and house number of the place itself. With
+the names in this list it is possible to create a human-readable description
+of the result. To do that, you first need to decide in which language the
+results should be presented. As with the names in the result itself, the
+places in `address_rows` contain all possible name translation for each row.
+
+The library has a helper class `Locale` which helps extracting a name of a
+place in the preferred language. It takes a single parameter with a list
+of language codes in the order of preference. So
+
+``` python
+locale = napi.Locale(['fr', 'en'])
+```
+
+creates a helper class that returns the name preferably in French. If that is
+not possible, it tries English and eventually falls back to the default `name`
+or `ref`.
+
+The `Locale` object can be applied to a name dictionary to return the best-matching
+name out of it:
+
+``` python
+>>> print(locale.display_name(results[0].names))
+'Brugges'
+```
+
+The `address_row` field has a helper function to apply the function to all
+its members and save the result in the `local_name` field. It also returns
+all the localized names as a convenient simple list. This list can be used
+to create a human-readable output:
+
+``` python
+>>> address_parts = results[0].address_rows.localize(locale)
+>>> print(', '.join(address_parts))
+Bruges, Flandre-Occidentale, Flandre, Belgique
+```
+
+This is a fairly simple way to create a human-readable description. The
+place information in `address_rows` contains further information about each
+place. For example, which OSM `adlin_level` was used, what category the place
+belongs to or what rank Nominatim has assigned. Use this to adapt the output
+to local address formats.
+
+For more information on address rows, see
+[detailed address description](Result-Handling.md#detailed-address-description).
--- a/docs/library/Input-Parameter-Types.md
+++ b/docs/library/Input-Parameter-Types.md
@@ -0,0 +1,62 @@
+# Input Parameter Types
+
+This page describes in more detail some of the input parameter types used
+in the query functions of the API object.
+
+## Place identification
+
+The [details](NominatimAPI.md#nominatim.api.core.NominatimAPI.details) and
+[lookup](NominatimAPI.md#nominatim.api.core.NominatimAPI.lookup) functions
+require references to places in the database. Below the possible
+types for place identification are listed. All types are dataclasses.
+
+### PlaceID
+
+::: nominatim.api.PlaceID
+    options:
+        heading_level: 6
+
+### OsmID
+
+::: nominatim.api.OsmID
+    options:
+        heading_level: 6
+
+## Geometry types
+
+::: nominatim.api.GeometryFormat
+    options:
+        heading_level: 6
+        members_order: source
+
+## Geometry input
+
+### Point
+
+::: nominatim.api.Point
+    options:
+        heading_level: 6
+        show_signature_annotations: True
+
+### Bbox
+
+::: nominatim.api.Bbox
+    options:
+        heading_level: 6
+        show_signature_annotations: True
+        members_order: source
+        group_by_category: False
+
+## Layers
+
+Layers allow to restrict the search result to thematic groups. This is
+orthogonal to restriction by address ranks, which groups places by their
+geographic extent.
+
+
+::: nominatim.api.DataLayer
+    options:
+        heading_level: 6
+        members_order: source
+
+
--- a/docs/library/Low-Level-DB-Access.md
+++ b/docs/library/Low-Level-DB-Access.md
@@ -0,0 +1,56 @@
+# Low-level connections
+
+The `NominatimAPIAsync` class allows to directly access the underlying
+database connection to explore the raw data. Nominatim uses
+[SQLAlchemy](https://docs.sqlalchemy.org/) for building queries. Please
+refer to the documentation of the library to understand how to write SQL.
+
+To get access to a search connection, use the `begin()` function of your
+API object. This returns a `SearchConnection` object described below
+wrapped in a context manager. Its
+`t` property has definitions for all Nominatim search tables. For an
+overview of available tables, refer to the
+[Development Layout](../develop/Database-Layout.md) in in the development
+chapter. Note that only tables that are needed for search are accessible
+as SQLAlchemy tables.
+
+!!! warning
+    The database layout is not part of the API definition and may change
+    without notice. If you play with the low-level access functions, you
+    need to be prepared for such changes.
+
+Here is a simple example, which prints how many places are available in
+the placex table:
+
+```
+import asyncio
+from pathlib import Path
+import sqlalchemy as sa
+from nominatim.api import NominatimAPIAsync
+
+async def print_table_size():
+    api = NominatimAPIAsync(Path('.'))
+
+    async with api.begin() as conn:
+        cnt = await conn.scalar(sa.select(sa.func.count()).select_from(conn.t.placex))
+        print(f'placex table has {cnt} rows.')
+
+asyncio.run(print_table_size())
+```
+
+!!! warning
+    Low-level connections may only be used to read data from the database.
+    Do not use it to add or modify data or you might break Nominatim's
+    normal functions.
+
+## SearchConnection class
+
+::: nominatim.api.SearchConnection
+    options:
+        members:
+            - scalar
+            - execute
+            - get_class_table
+            - get_db_property
+            - get_property
+        heading_level: 6
--- a/docs/library/NominatimAPI.md
+++ b/docs/library/NominatimAPI.md
@@ -0,0 +1,36 @@
+# The Nominatim API classes
+
+The API classes are the core object of the search library. Always instantiate
+one of these classes first. The API classes are **not threadsafe**. You need
+to instantiate a separate instance for each thread.
+
+### NominatimAPI
+
+::: nominatim.api.NominatimAPI
+    options:
+        members:
+            - __init__
+            - config
+            - close
+            - status
+            - details
+            - lookup
+            - reverse
+            - search
+            - search_address
+            - search_category
+        heading_level: 6
+        group_by_category: False
+
+
+### NominatimAPIAsync
+
+::: nominatim.api.NominatimAPIAsync
+    options:
+        members:
+            - __init__
+            - setup_database
+            - close
+            - begin
+        heading_level: 6
+        group_by_category: False
--- a/docs/library/Result-Handling.md
+++ b/docs/library/Result-Handling.md
@@ -0,0 +1,58 @@
+# Result handling
+
+The search functions of the Nominatim API always return a result object
+with the raw information about the place that is available in the
+database. This section discusses data types used in the results and utility
+functions that allow further processing of the results.
+
+## Result fields
+
+### Sources
+
+Nominatim takes the result data from multiple sources. The `source_table` field
+in the result describes, from which source the result was retrieved.
+
+::: nominatim.api.SourceTable
+    options:
+        heading_level: 6
+        members_order: source
+
+### Detailed address description
+
+When the `address_details` parameter is set, then functions return not
+only information about the result place but also about the place that
+make up the address. This information is almost always required when you
+want to present the user with a human-readable description of the result.
+See also [Localization](#localization) below.
+
+The address details are available in the `address_rows` field as a ordered
+list of `AddressLine` objects with the country information last. The list also
+contains the result place itself and some artificial entries, for example,
+for the house number or the country code. This makes processing and creating
+a full address easier.
+
+::: nominatim.api.AddressLine
+    options:
+        heading_level: 6
+        members_order: source
+
+### Detailed search terms
+
+The `details` function can return detailed information about which search terms
+may be used to find a place, when the `keywords` parameter is set. Search
+terms are split into terms for the name of the place and search terms for
+its address.
+
+::: nominatim.api.WordInfo
+    options:
+        heading_level: 6
+
+## Localization
+
+Results are always returned with the full list of available names.
+
+### Locale
+
+::: nominatim.api.Locales
+    options:
+        heading_level: 6
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -1,9 +1,13 @@
-site_name: Nominatim Documentation
-theme: readthedocs
+site_name: Nominatim Manual
+theme:
+  name: material
+  features:
+    - navigation.tabs
+copyright: Copyright &copy; Nominatim developer community
 docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
 site_url: https://nominatim.org
 repo_url: https://github.com/openstreetmap/Nominatim
-pages:
+nav:
    - 'Introduction' : 'index.md'
    - 'API Reference':
        - 'Overview': 'api/Overview.md'
@@ -18,27 +22,59 @@ pages:
        - 'Basic Installation': 'admin/Installation.md'
        - 'Import' : 'admin/Import.md'
        - 'Update' : 'admin/Update.md'
-        - 'Deploy' : 'admin/Deployment.md'
+        - 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
+        - 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
        - 'Nominatim UI'  : 'admin/Setup-Nominatim-UI.md'
        - 'Advanced Installations' : 'admin/Advanced-Installations.md'
+        - 'Maintenance' : 'admin/Maintenance.md'
        - 'Migration from older Versions' : 'admin/Migration.md'
        - 'Troubleshooting' : 'admin/Faq.md'
+    - 'Customization Guide':
+        - 'Overview': 'customize/Overview.md'
+        - 'Import Styles': 'customize/Import-Styles.md'
+        - 'Configuration Settings': 'customize/Settings.md'
+        - 'Per-Country Data': 'customize/Country-Settings.md'
+        - 'Place Ranking' : 'customize/Ranking.md'
+        - 'Importance' : 'customize/Importance.md'
+        - 'Tokenizers' : 'customize/Tokenizers.md'
+        - 'Special Phrases': 'customize/Special-Phrases.md'
+        - 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
+        - 'External data: Postcodes': 'customize/Postcodes.md'
+    - 'Library Guide':
+        - 'Getting Started': 'library/Getting-Started.md'
+        - 'Nominatim API class': 'library/NominatimAPI.md'
+        - 'Configuration': 'library/Configuration.md'
+        - 'Input Parameter Types': 'library/Input-Parameter-Types.md'
+        - 'Result Handling': 'library/Result-Handling.md'
+        - 'Low-level DB Access': 'library/Low-Level-DB-Access.md'
    - 'Developers Guide':
-        - 'Setup for Development' : 'develop/Development-Environment.md'
        - 'Architecture Overview' : 'develop/overview.md'
-        - 'OSM Data Import' : 'develop/Import.md'
-        - 'Place Ranking' : 'develop/Ranking.md'
-        - 'Postcodes' : 'develop/Postcodes.md'
+        - 'Database Layout' : 'develop/Database-Layout.md'
+        - 'Indexing' : 'develop/Indexing.md'
+        - 'Tokenizers' : 'develop/Tokenizers.md'
+        - 'Custom modules for ICU tokenizer': 'develop/ICU-Tokenizer-Modules.md'
+        - 'Setup for Development' : 'develop/Development-Environment.md'
        - 'Testing' : 'develop/Testing.md'
        - 'External Data Sources': 'develop/data-sources.md'
    - 'Appendix':
-        - 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
-        - 'Installation on CentOS 8' : 'appendix/Install-on-Centos-8.md'
-        - 'Installation on Ubuntu 18' : 'appendix/Install-on-Ubuntu-18.md'
        - 'Installation on Ubuntu 20' : 'appendix/Install-on-Ubuntu-20.md'
+        - 'Installation on Ubuntu 22' : 'appendix/Install-on-Ubuntu-22.md'
 markdown_extensions:
    - codehilite
    - admonition
+    - pymdownx.superfences
+    - pymdownx.tabbed:
+        alternate_style: true
+    - def_list
    - toc:
        permalink: 
 extra_css: [extra.css, styles.css]
+plugins:
+    - search
+    - mkdocstrings:
+        handlers:
+          python:
+            paths: ["${PROJECT_SOURCE_DIR}"]
+            options:
+              show_source: False
+              show_bases: False
--- a/lib-php/AddressDetails.php
+++ b/lib-php/AddressDetails.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -61,7 +69,7 @@ class AddressDetails
        return join(', ', $aParts);
    }

-    public function getAddressNames($sCountry = null)
+    public function getAddressNames()
    {
        $aAddress = array();

@@ -79,12 +87,14 @@ class AddressDetails
                $sName = $aLine['housenumber'];
            }

-            if (isset($sName)) {
-                $sTypeLabel = strtolower(str_replace(' ', '_', $sTypeLabel));
-                if (!isset($aAddress[$sTypeLabel])
-                    || $aLine['class'] == 'place'
-                ) {
-                    $aAddress[$sTypeLabel] = $sName;
+            if (isset($sName)
+                && (!isset($aAddress[$sTypeLabel])
+                    || $aLine['class'] == 'place')
+            ) {
+                $aAddress[$sTypeLabel] = $sName;
+
+                if (!empty($aLine['name'])) {
+                    $this->addSubdivisionCode($aAddress, $aLine['admin_level'], $aLine['name']);
                }
            }
        }
@@ -168,4 +178,14 @@ class AddressDetails
    {
        return $this->aAddressLines;
    }
+
+    private function addSubdivisionCode(&$aAddress, $iAdminLevel, $nameDetails)
+    {
+        if (is_string($nameDetails)) {
+            $nameDetails = json_decode('{' . str_replace('"=>"', '":"', $nameDetails) . '}', true);
+        }
+        if (!empty($nameDetails['ISO3166-2'])) {
+            $aAddress["ISO3166-2-lvl$iAdminLevel"] = $nameDetails['ISO3166-2'];
+        }
+    }
 }
--- a/lib-php/ClassTypes.php
+++ b/lib-php/ClassTypes.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim\ClassTypes;

--- a/lib-php/DB.php
+++ b/lib-php/DB.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -30,18 +38,25 @@ class DB

        // https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
        try {
-            $conn = new \PDO($this->sDSN, null, null, $aConnOptions);
+            $this->connection = new \PDO($this->sDSN, null, null, $aConnOptions);
        } catch (\PDOException $e) {
            $sMsg = 'Failed to establish database connection:' . $e->getMessage();
            throw new \Nominatim\DatabaseError($sMsg, 500, null, $e->getMessage());
        }

-        $conn->exec("SET DateStyle TO 'sql,european'");
-        $conn->exec("SET client_encoding TO 'utf-8'");
+        $this->connection->exec("SET DateStyle TO 'sql,european'");
+        $this->connection->exec("SET client_encoding TO 'utf-8'");
+        // Disable JIT and parallel workers. They interfere badly with search SQL.
+        $this->connection->exec('SET max_parallel_workers_per_gather TO 0');
+        if ($this->getPostgresVersion() >= 11) {
+            $this->connection->exec('SET jit_above_cost TO -1');
+        }
+        
        $iMaxExecution = ini_get('max_execution_time');
-        if ($iMaxExecution > 0) $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
+        if ($iMaxExecution > 0) {
+            $this->connection->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
+        }

-        $this->connection = $conn;
        return true;
    }

@@ -95,7 +110,9 @@ class DB
        try {
            $stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
            $row = $stmt->fetch(\PDO::FETCH_NUM);
-            if ($row === false) return false;
+            if ($row === false) {
+                return false;
+            }
        } catch (\PDOException $e) {
            throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
        }
@@ -306,9 +323,13 @@ class DB
        if (preg_match('/^pgsql:(.+)$/', $sDSN, $aMatches)) {
            foreach (explode(';', $aMatches[1]) as $sKeyVal) {
                list($sKey, $sVal) = explode('=', $sKeyVal, 2);
-                if ($sKey == 'host') $sKey = 'hostspec';
-                if ($sKey == 'dbname') $sKey = 'database';
-                if ($sKey == 'user') $sKey = 'username';
+                if ($sKey == 'host') {
+                    $sKey = 'hostspec';
+                } elseif ($sKey == 'dbname') {
+                    $sKey = 'database';
+                } elseif ($sKey == 'user') {
+                    $sKey = 'username';
+                }
                $aInfo[$sKey] = $sVal;
            }
        }
--- a/lib-php/DatabaseError.php
+++ b/lib-php/DatabaseError.php
@@ -1,11 +1,19 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

 class DatabaseError extends \Exception
 {

-    public function __construct($message, $code = 500, Exception $previous = null, $oPDOErr, $sSql = null)
+    public function __construct($message, $code, $previous, $oPDOErr, $sSql = null)
    {
        parent::__construct($message, $code, $previous);
        // https://secure.php.net/manual/en/class.pdoexception.php
--- a/lib-php/DebugHtml.php
+++ b/lib-php/DebugHtml.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -78,7 +86,7 @@ class Debug
        echo '<th>Address Tokens</th><th>Address Not</th>';
        echo '<th>country</th><th>operator</th>';
        echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
-        foreach ($aSearches as $iRank => $aRankedSet) {
+        foreach ($aSearches as $aRankedSet) {
            foreach ($aRankedSet as $aRow) {
                $aRow->dumpAsHtmlTableRow($aWordsIDs);
            }
@@ -127,7 +135,7 @@ class Debug

    public static function printSQL($sSQL)
    {
-        echo '<p><tt><font color="#aaa">'.$sSQL.'</font></tt></p>'."\n";
+        echo '<p><tt><b>'.date('c').'</b> <font color="#aaa">'.htmlspecialchars($sSQL, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401).'</font></tt></p>'."\n";
    }

    private static function outputVar($mVar, $sPreNL)
@@ -170,11 +178,12 @@ class Debug
        }

        if (is_string($mVar)) {
-            echo "'$mVar'";
-            return strlen($mVar) + 2;
+            $sOut = "'$mVar'";
+        } else {
+            $sOut = (string)$mVar;
        }

-        echo (string)$mVar;
-        return strlen((string)$mVar);
+        echo htmlspecialchars($sOut, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401);
+        return strlen($sOut);
    }
 }
--- a/lib-php/DebugNone.php
+++ b/lib-php/DebugNone.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

--- a/lib-php/Geocode.php
+++ b/lib-php/Geocode.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -7,18 +15,20 @@ require_once(CONST_LibDir.'/Phrase.php');
 require_once(CONST_LibDir.'/ReverseGeocode.php');
 require_once(CONST_LibDir.'/SearchDescription.php');
 require_once(CONST_LibDir.'/SearchContext.php');
+require_once(CONST_LibDir.'/SearchPosition.php');
 require_once(CONST_LibDir.'/TokenList.php');
+require_once(CONST_TokenizerDir.'/tokenizer.php');

 class Geocode
 {
    protected $oDB;

    protected $oPlaceLookup;
+    protected $oTokenizer;

    protected $aLangPrefOrder = array();

    protected $aExcludePlaceIDs = array();
-    protected $bReverseInPlan = true;

    protected $iLimit = 20;
    protected $iFinalLimit = 10;
@@ -42,28 +52,12 @@ class Geocode
    protected $sQuery = false;
    protected $aStructuredQuery = false;

-    protected $oNormalizer = null;
-

    public function __construct(&$oDB)
    {
        $this->oDB =& $oDB;
        $this->oPlaceLookup = new PlaceLookup($this->oDB);
-        $this->oNormalizer = \Transliterator::createFromRules(CONST_Term_Normalization_Rules);
-    }
-
-    private function normTerm($sTerm)
-    {
-        if ($this->oNormalizer === null) {
-            return $sTerm;
-        }
-
-        return $this->oNormalizer->transliterate($sTerm);
-    }
-
-    public function setReverseInPlan($bReverse)
-    {
-        $this->bReverseInPlan = $bReverse;
+        $this->oTokenizer = new \Nominatim\Tokenizer($this->oDB);
    }

    public function setLanguagePreference($aLangPref)
@@ -85,7 +79,9 @@ class Geocode
            $aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs);
        }

-        if ($this->bBoundedSearch) $aParams['bounded'] = '1';
+        if ($this->bBoundedSearch) {
+            $aParams['bounded'] = '1';
+        }

        if ($this->aCountryCodes) {
            $aParams['countrycodes'] = implode(',', $this->aCountryCodes);
@@ -100,11 +96,14 @@ class Geocode

    public function setLimit($iLimit = 10)
    {
-        if ($iLimit > 50) $iLimit = 50;
-        if ($iLimit < 1) $iLimit = 1;
+        if ($iLimit > 50) {
+            $iLimit = 50;
+        } elseif ($iLimit < 1) {
+            $iLimit = 1;
+        }

        $this->iFinalLimit = $iLimit;
-        $this->iLimit = $iLimit + min($iLimit, 10);
+        $this->iLimit = $iLimit + max($iLimit, 10);
    }

    public function setFeatureType($sFeatureType)
@@ -191,23 +190,29 @@ class Geocode

        $this->bFallback = $oParams->getBool('fallback', $this->bFallback);

-        // List of excluded Place IDs - used for more acurate pageing
+        // List of excluded Place IDs - used for more accurate pageing
        $sExcluded = $oParams->getStringList('exclude_place_ids');
        if ($sExcluded) {
            foreach ($sExcluded as $iExcludedPlaceID) {
                $iExcludedPlaceID = (int)$iExcludedPlaceID;
-                if ($iExcludedPlaceID)
+                if ($iExcludedPlaceID) {
                    $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID;
+                }
            }

-            if (isset($aExcludePlaceIDs))
+            if (isset($aExcludePlaceIDs)) {
                $this->aExcludePlaceIDs = $aExcludePlaceIDs;
+            }
        }

        // Only certain ranks of feature
        $sFeatureType = $oParams->getString('featureType');
-        if (!$sFeatureType) $sFeatureType = $oParams->getString('featuretype');
-        if ($sFeatureType) $this->setFeatureType($sFeatureType);
+        if (!$sFeatureType) {
+            $sFeatureType = $oParams->getString('featuretype');
+        }
+        if ($sFeatureType) {
+            $this->setFeatureType($sFeatureType);
+        }

        // Country code list
        $sCountries = $oParams->getStringList('countrycodes');
@@ -217,8 +222,9 @@ class Geocode
                    $aCountries[] = strtolower($sCountryCode);
                }
            }
-            if (isset($aCountries))
+            if (isset($aCountries)) {
                $this->aCountryCodes = $aCountries;
+            }
        }

        $aViewbox = $oParams->getStringList('viewboxlbrt');
@@ -262,7 +268,6 @@ class Geocode
                $oParams->getString('country'),
                $oParams->getString('postalcode')
            );
-            $this->setReverseInPlan(false);
        } else {
            $this->setQuery($sQuery);
        }
@@ -271,13 +276,17 @@ class Geocode
    public function loadStructuredAddressElement($sValue, $sKey, $iNewMinAddressRank, $iNewMaxAddressRank, $aItemListValues)
    {
        $sValue = trim($sValue);
-        if (!$sValue) return false;
+        if (!$sValue) {
+            return false;
+        }
        $this->aStructuredQuery[$sKey] = $sValue;
        if ($this->iMinAddressRank == 0 && $this->iMaxAddressRank == 30) {
            $this->iMinAddressRank = $iNewMinAddressRank;
            $this->iMaxAddressRank = $iNewMaxAddressRank;
        }
-        if ($aItemListValues) $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues);
+        if ($aItemListValues) {
+            $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues);
+        }
        return true;
    }

@@ -311,11 +320,11 @@ class Geocode

    public function fallbackStructuredQuery()
    {
-        if (!$this->aStructuredQuery) return false;
-
        $aParams = $this->aStructuredQuery;

-        if (count($aParams) == 1) return false;
+        if (!$aParams || count($aParams) == 1) {
+            return false;
+        }

        $aOrderToFallback = array('postalcode', 'street', 'city', 'county', 'state');

@@ -330,7 +339,7 @@ class Geocode
        return false;
    }

-    public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured)
+    public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens)
    {
        /*
             Calculate all searches using oValidTokens i.e.
@@ -345,52 +354,26 @@ class Geocode
         */
        foreach ($aPhrases as $iPhrase => $oPhrase) {
            $aNewPhraseSearches = array();
-            $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
+            $oPosition = new SearchPosition(
+                $oPhrase->getPhraseType(),
+                $iPhrase,
+                count($aPhrases)
+            );

            foreach ($oPhrase->getWordSets() as $aWordset) {
                $aWordsetSearches = $aSearches;

                // Add all words from this wordset
                foreach ($aWordset as $iToken => $sToken) {
-                    //echo "<br><b>$sToken</b>";
                    $aNewWordsetSearches = array();
+                    $oPosition->setTokenPosition($iToken, count($aWordset));

                    foreach ($aWordsetSearches as $oCurrentSearch) {
-                        //echo "<i>";
-                        //var_dump($oCurrentSearch);
-                        //echo "</i>";
-
-                        // Tokens with full name matches.
-                        foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
-                            $aNewSearches = $oCurrentSearch->extendWithFullTerm(
-                                $oSearchTerm,
-                                $oValidTokens->contains($sToken)
-                                  && strpos($sToken, ' ') === false,
-                                $sPhraseType,
-                                $iToken == 0 && $iPhrase == 0,
-                                $iPhrase == 0,
-                                $iToken + 1 == count($aWordset)
-                                  && $iPhrase + 1 == count($aPhrases)
-                            );
-
-                            foreach ($aNewSearches as $oSearch) {
-                                if ($oSearch->getRank() < $this->iMaxRank) {
-                                    $aNewWordsetSearches[] = $oSearch;
-                                }
-                            }
-                        }
-                        // Look for partial matches.
-                        // Note that there is no point in adding country terms here
-                        // because country is omitted in the address.
-                        if ($sPhraseType != 'country') {
-                            // Allow searching for a word - but at extra cost
-                            foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
-                                $aNewSearches = $oCurrentSearch->extendWithPartialTerm(
-                                    $sToken,
-                                    $oSearchTerm,
-                                    $bIsStructured,
-                                    $iPhrase,
-                                    $oValidTokens->get(' '.$sToken)
+                        foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
+                            if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) {
+                                $aNewSearches = $oSearchTerm->extendSearch(
+                                    $oCurrentSearch,
+                                    $oPosition
                                );

                                foreach ($aNewSearches as $oSearch) {
@@ -405,7 +388,6 @@ class Geocode
                    usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
                    $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50);
                }
-                //var_Dump('<hr>',count($aWordsetSearches)); exit;

                $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches);
                usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
@@ -413,8 +395,11 @@ class Geocode
                $aSearchHash = array();
                foreach ($aNewPhraseSearches as $iSearch => $aSearch) {
                    $sHash = serialize($aSearch);
-                    if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]);
-                    else $aSearchHash[$sHash] = 1;
+                    if (isset($aSearchHash[$sHash])) {
+                        unset($aNewPhraseSearches[$iSearch]);
+                    } else {
+                        $aSearchHash[$sHash] = 1;
+                    }
                }

                $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50);
@@ -435,10 +420,12 @@ class Geocode

            $iSearchCount = 0;
            $aSearches = array();
-            foreach ($aGroupedSearches as $iScore => $aNewSearches) {
+            foreach ($aGroupedSearches as $aNewSearches) {
                $iSearchCount += count($aNewSearches);
                $aSearches = array_merge($aSearches, $aNewSearches);
-                if ($iSearchCount > 50) break;
+                if ($iSearchCount > 50) {
+                    break;
+                }
            }
        }

@@ -495,7 +482,9 @@ class Geocode
    public function lookup()
    {
        Debug::newFunction('Geocode::lookup');
-        if (!$this->sQuery && !$this->aStructuredQuery) return array();
+        if (!$this->sQuery && !$this->aStructuredQuery) {
+            return array();
+        }

        Debug::printDebugArray('Geocode', $this);

@@ -520,25 +509,11 @@ class Geocode

        Debug::newSection('Query Preprocessing');

-        $sNormQuery = $this->normTerm($this->sQuery);
-        Debug::printVar('Normalized query', $sNormQuery);
-
-        $sLanguagePrefArraySQL = $this->oDB->getArraySQL(
-            $this->oDB->getDBQuotedList($this->aLangPrefOrder)
-        );
-
        $sQuery = $this->sQuery;
        if (!preg_match('//u', $sQuery)) {
            userError('Query string is not UTF-8 encoded.');
        }

-        // Conflicts between US state abreviations and various words for 'the' in different languages
-        if (isset($this->aLangPrefOrder['name:en'])) {
-            $sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/i', '\1illinois\2', $sQuery);
-            $sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/i', '\1alabama\2', $sQuery);
-            $sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/i', '\1louisiana\2', $sQuery);
-        }
-
        // Do we have anything that looks like a lat/lon pair?
        $sQuery = $oCtx->setNearPointFromQuery($sQuery);

@@ -576,117 +551,62 @@ class Geocode
            }

            if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
-                $sSpecialTerm = pg_escape_string($sSpecialTerm);
-                $sToken = $this->oDB->getOne(
-                    'SELECT make_standard_name(:term)',
-                    array(':term' => $sSpecialTerm),
-                    'Cannot decode query. Wrong encoding?'
-                );
-                $sSQL = 'SELECT class, type FROM word ';
-                $sSQL .= '   WHERE word_token in (\' '.$sToken.'\')';
-                $sSQL .= '   AND class is not null AND class not in (\'place\')';
+                $aTokens = $this->oTokenizer->tokensForSpecialTerm($sSpecialTerm);

-                Debug::printSQL($sSQL);
-                $aSearchWords = $this->oDB->getAll($sSQL);
-                $aNewSearches = array();
-                foreach ($aSearches as $oSearch) {
-                    foreach ($aSearchWords as $aSearchTerm) {
-                        $oNewSearch = clone $oSearch;
-                        $oNewSearch->setPoiSearch(
-                            Operator::TYPE,
-                            $aSearchTerm['class'],
-                            $aSearchTerm['type']
-                        );
-                        $aNewSearches[] = $oNewSearch;
+                if (!empty($aTokens)) {
+                    $aNewSearches = array();
+                    $oPosition = new SearchPosition('', 0, 1);
+                    $oPosition->setTokenPosition(0, 1);
+
+                    foreach ($aSearches as $oSearch) {
+                        foreach ($aTokens as $oToken) {
+                            $aNewSearches = array_merge(
+                                $aNewSearches,
+                                $oToken->extendSearch($oSearch, $oPosition)
+                            );
+                        }
                    }
+                    $aSearches = $aNewSearches;
                }
-                $aSearches = $aNewSearches;
            }

            // Split query into phrases
            // Commas are used to reduce the search space by indicating where phrases split
+            $aPhrases = array();
            if ($this->aStructuredQuery) {
-                $aInPhrases = $this->aStructuredQuery;
-                $bStructuredPhrases = true;
+                foreach ($this->aStructuredQuery as $iPhrase => $sPhrase) {
+                    $aPhrases[] = new Phrase($sPhrase, $iPhrase);
+                }
            } else {
-                $aInPhrases = explode(',', $sQuery);
-                $bStructuredPhrases = false;
+                foreach (explode(',', $sQuery) as $sPhrase) {
+                    $aPhrases[] = new Phrase($sPhrase, '');
+                }
            }

            Debug::printDebugArray('Search context', $oCtx);
            Debug::printDebugArray('Base search', empty($aSearches) ? null : $aSearches[0]);
-            Debug::printVar('Final query phrases', $aInPhrases);

-            // Convert each phrase to standard form
-            // Create a list of standard words
-            // Get all 'sets' of words
-            // Generate a complete list of all
            Debug::newSection('Tokenization');
-            $aTokens = array();
-            $aPhrases = array();
-            foreach ($aInPhrases as $iPhrase => $sPhrase) {
-                $sPhrase = $this->oDB->getOne(
-                    'SELECT make_standard_name(:phrase)',
-                    array(':phrase' => $sPhrase),
-                    'Cannot normalize query string (is it a UTF-8 string?)'
-                );
-                if (trim($sPhrase)) {
-                    $oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
-                    $oPhrase->addTokens($aTokens);
-                    $aPhrases[] = $oPhrase;
-                }
-            }
-
-            Debug::printVar('Tokens', $aTokens);
-
-            $oValidTokens = new TokenList();
-
-            if (!empty($aTokens)) {
-                $oValidTokens->addTokensFromDB(
-                    $this->oDB,
-                    $aTokens,
-                    $this->aCountryCodes,
-                    $sNormQuery,
-                    $this->oNormalizer
-                );
+            $oValidTokens = $this->oTokenizer->extractTokensFromPhrases($aPhrases);

+            if ($oValidTokens->count() > 0) {
                $oCtx->setFullNameWords($oValidTokens->getFullWordIDs());

-                // Try more interpretations for Tokens that could not be matched.
-                foreach ($aTokens as $sToken) {
-                    if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
-                        if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
-                            // US ZIP+4 codes - merge in the 5-digit ZIP code
-                            $oValidTokens->addToken(
-                                $sToken,
-                                new Token\Postcode(null, $aData[1], 'us')
-                            );
-                        } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
-                            // Unknown single word token with a number.
-                            // Assume it is a house number.
-                            $oValidTokens->addToken(
-                                $sToken,
-                                new Token\HouseNumber(null, trim($sToken))
-                            );
-                        }
-                    }
-                }
+                $aPhrases = array_filter($aPhrases, function ($oPhrase) {
+                    return $oPhrase->getWordSets() !== null;
+                });

                // Any words that have failed completely?
                // TODO: suggestions

                Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
-
-                foreach ($aPhrases as $oPhrase) {
-                    $oPhrase->computeWordSets($oValidTokens);
-                }
                Debug::printDebugTable('Phrases', $aPhrases);

                Debug::newSection('Search candidates');

-                $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
+                $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);

-                if ($this->bReverseInPlan) {
+                if (!$this->aStructuredQuery) {
                    // Reverse phrase array and also reverse the order of the wordsets in
                    // the first and final phrase. Don't bother about phrases in the middle
                    // because order in the address doesn't matter.
@@ -695,18 +615,17 @@ class Geocode
                    if (count($aPhrases) > 1) {
                        $aPhrases[count($aPhrases)-1]->invertWordSets();
                    }
-                    $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false);
+                    $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);

-                    foreach ($aGroupedSearches as $aSearches) {
+                    foreach ($aReverseGroupedSearches as $aSearches) {
                        foreach ($aSearches as $aSearch) {
-                            if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) {
-                                $aReverseGroupedSearches[$aSearch->getRank()] = array();
+                            if (!isset($aGroupedSearches[$aSearch->getRank()])) {
+                                $aGroupedSearches[$aSearch->getRank()] = array();
                            }
-                            $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch;
+                            $aGroupedSearches[$aSearch->getRank()][] = $aSearch;
                        }
                    }

-                    $aGroupedSearches = $aReverseGroupedSearches;
                    ksort($aGroupedSearches);
                }
            } else {
@@ -714,7 +633,9 @@ class Geocode
                $aGroupedSearches = array();
                foreach ($aSearches as $aSearch) {
                    if ($aSearch->getRank() < $this->iMaxRank) {
-                        if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array();
+                        if (!isset($aGroupedSearches[$aSearch->getRank()])) {
+                            $aGroupedSearches[$aSearch->getRank()] = array();
+                        }
                        $aGroupedSearches[$aSearch->getRank()][] = $aSearch;
                    }
                }
@@ -728,7 +649,9 @@ class Geocode
                    $sHash = serialize($aSearch);
                    if (isset($aSearchHash[$sHash])) {
                        unset($aGroupedSearches[$iGroup][$iSearch]);
-                        if (empty($aGroupedSearches[$iGroup])) unset($aGroupedSearches[$iGroup]);
+                        if (empty($aGroupedSearches[$iGroup])) {
+                            unset($aGroupedSearches[$iGroup]);
+                        }
                    } else {
                        $aSearchHash[$sHash] = 1;
                    }
@@ -772,7 +695,9 @@ class Geocode
                        }
                    }

-                    if ($iQueryLoop > 20) break;
+                    if ($iQueryLoop > 20) {
+                        break;
+                    }
                }

                if (!empty($aResults)) {
@@ -838,7 +763,6 @@ class Geocode
                    foreach ($aResults as $oResult) {
                        if (($this->iMaxAddressRank == 30 &&
                             ($oResult->iTable == Result::TABLE_OSMLINE
-                              || $oResult->iTable == Result::TABLE_AUX
                              || $oResult->iTable == Result::TABLE_TIGER))
                            || in_array($oResult->iId, $aFilteredIDs)
                        ) {
@@ -848,9 +772,9 @@ class Geocode
                    $aResults = $tempIDs;
                }

-                if (!empty($aResults)) break;
-                if ($iGroupLoop > 4) break;
-                if ($iQueryLoop > 30) break;
+                if (!empty($aResults) || $iGroupLoop > 4 || $iQueryLoop > 30) {
+                    break;
+                }
            }
        } else {
            // Just interpret as a reverse geocode
@@ -868,10 +792,8 @@ class Geocode

        // No results? Done
        if (empty($aResults)) {
-            if ($this->bFallback) {
-                if ($this->fallbackStructuredQuery()) {
-                    return $this->lookup();
-                }
+            if ($this->bFallback && $this->fallbackStructuredQuery()) {
+                return $this->lookup();
            }

            return array();
@@ -890,7 +812,9 @@ class Geocode

        $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery);
        foreach ($aRecheckWords as $i => $sWord) {
-            if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]);
+            if (!preg_match('/[\pL\pN]/', $sWord)) {
+                unset($aRecheckWords[$i]);
+            }
        }

        Debug::printVar('Recheck words', $aRecheckWords);
@@ -919,7 +843,9 @@ class Geocode
                $aResult['importance'] = 0.001;
                $aResult['foundorder'] = $aResult['addressimportance'];
            } else {
-                $aResult['importance'] = max(0.001, $aResult['importance']);
+                if ($aResult['importance'] == 0) {
+                    $aResult['importance'] = 0.0001;
+                }
                $aResult['importance'] *= $this->viewboxImportanceFactor(
                    $aResult['lon'],
                    $aResult['lat']
@@ -948,9 +874,11 @@ class Geocode
                $iCountWords = 0;
                $sAddress = $aResult['langaddress'];
                foreach ($aRecheckWords as $i => $sWord) {
-                    if (stripos($sAddress, $sWord)!==false) {
+                    if (grapheme_stripos($sAddress, $sWord)!==false) {
                        $iCountWords++;
-                        if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1;
+                        if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) {
+                            $iCountWords += 0.1;
+                        }
                    }
                }

@@ -967,15 +895,8 @@ class Geocode
        $aToFilter = $aSearchResults;
        $aSearchResults = array();

-        $bFirst = true;
        foreach ($aToFilter as $aResult) {
            $this->aExcludePlaceIDs[$aResult['place_id']] = $aResult['place_id'];
-            if ($bFirst) {
-                $fLat = $aResult['lat'];
-                $fLon = $aResult['lon'];
-                if (isset($aResult['zoom'])) $iZoom = $aResult['zoom'];
-                $bFirst = false;
-            }
            if (!$this->oPlaceLookup->doDeDupe() || (!isset($aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']])
                && !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['type'].$aResult['name'].$aResult['admin_level']]))
            ) {
@@ -985,7 +906,9 @@ class Geocode
            }

            // Absolute limit on number of results
-            if (count($aSearchResults) >= $this->iFinalLimit) break;
+            if (count($aSearchResults) >= $this->iFinalLimit) {
+                break;
+            }
        }

        Debug::printVar('Post-filter results', $aSearchResults);
@@ -999,7 +922,6 @@ class Geocode
                'Structured query' => $this->aStructuredQuery,
                'Name keys' => Debug::fmtArrayVals($this->aLangPrefOrder),
                'Excluded place IDs' => Debug::fmtArrayVals($this->aExcludePlaceIDs),
-                'Try reversed query'=> $this->bReverseInPlan,
                'Limit (for searches)' => $this->iLimit,
                'Limit (for results)'=> $this->iFinalLimit,
                'Country codes' => Debug::fmtArrayVals($this->aCountryCodes),
--- a/lib-php/ParameterParser.php
+++ b/lib-php/ParameterParser.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -14,7 +22,10 @@ class ParameterParser

    public function getBool($sName, $bDefault = false)
    {
-        if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
+        if (!isset($this->aParams[$sName])
+            || !is_string($this->aParams[$sName])
+            || strlen($this->aParams[$sName]) == 0
+        ) {
            return $bDefault;
        }

@@ -23,7 +34,7 @@ class ParameterParser

    public function getInt($sName, $bDefault = false)
    {
-        if (!isset($this->aParams[$sName])) {
+        if (!isset($this->aParams[$sName]) || is_array($this->aParams[$sName])) {
            return $bDefault;
        }

@@ -36,7 +47,7 @@ class ParameterParser

    public function getFloat($sName, $bDefault = false)
    {
-        if (!isset($this->aParams[$sName])) {
+        if (!isset($this->aParams[$sName]) || is_array($this->aParams[$sName])) {
            return $bDefault;
        }

@@ -49,7 +60,10 @@ class ParameterParser

    public function getString($sName, $bDefault = false)
    {
-        if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
+        if (!isset($this->aParams[$sName])
+            || !is_string($this->aParams[$sName])
+            || strlen($this->aParams[$sName]) == 0
+        ) {
            return $bDefault;
        }

@@ -58,11 +72,14 @@ class ParameterParser

    public function getSet($sName, $aValues, $sDefault = false)
    {
-        if (!isset($this->aParams[$sName]) || strlen($this->aParams[$sName]) == 0) {
+        if (!isset($this->aParams[$sName])
+            || !is_string($this->aParams[$sName])
+            || strlen($this->aParams[$sName]) == 0
+        ) {
            return $sDefault;
        }

-        if (!in_array($this->aParams[$sName], $aValues)) {
+        if (!in_array($this->aParams[$sName], $aValues, true)) {
            userError("Parameter '$sName' must be one of: ".join(', ', $aValues));
        }

@@ -90,35 +107,43 @@ class ParameterParser
        $aLanguages = array();
        $sLangString = $this->getString('accept-language', $sFallback);

-        if ($sLangString) {
-            if (preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)) {
-                foreach ($aLanguagesParse as $iLang => $aLanguage) {
-                    $aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100);
-                    if (!isset($aLanguages[$aLanguage[2]])) $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10;
+        if ($sLangString
+            && preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)
+        ) {
+            foreach ($aLanguagesParse as $iLang => $aLanguage) {
+                $aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100);
+                if (!isset($aLanguages[$aLanguage[2]])) {
+                    $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10;
                }
-                arsort($aLanguages);
            }
+            arsort($aLanguages);
        }
        if (empty($aLanguages) && CONST_Default_Language) {
            $aLanguages[CONST_Default_Language] = 1;
        }

        foreach ($aLanguages as $sLanguage => $fLanguagePref) {
-            $aLangPrefOrder['name:'.$sLanguage] = 'name:'.$sLanguage;
+            $this->addNameTag($aLangPrefOrder, 'name:'.$sLanguage);
        }
-        $aLangPrefOrder['name'] = 'name';
-        $aLangPrefOrder['brand'] = 'brand';
+        $this->addNameTag($aLangPrefOrder, 'name');
+        $this->addNameTag($aLangPrefOrder, 'brand');
        foreach ($aLanguages as $sLanguage => $fLanguagePref) {
-            $aLangPrefOrder['official_name:'.$sLanguage] = 'official_name:'.$sLanguage;
-            $aLangPrefOrder['short_name:'.$sLanguage] = 'short_name:'.$sLanguage;
+            $this->addNameTag($aLangPrefOrder, 'official_name:'.$sLanguage);
+            $this->addNameTag($aLangPrefOrder, 'short_name:'.$sLanguage);
        }
-        $aLangPrefOrder['official_name'] = 'official_name';
-        $aLangPrefOrder['short_name'] = 'short_name';
-        $aLangPrefOrder['ref'] = 'ref';
-        $aLangPrefOrder['type'] = 'type';
+        $this->addNameTag($aLangPrefOrder, 'official_name');
+        $this->addNameTag($aLangPrefOrder, 'short_name');
+        $this->addNameTag($aLangPrefOrder, 'ref');
+        $this->addNameTag($aLangPrefOrder, 'type');
        return $aLangPrefOrder;
    }

+    private function addNameTag(&$aLangPrefOrder, $sTag)
+    {
+        $aLangPrefOrder[$sTag] = $sTag;
+        $aLangPrefOrder['_place_'.$sTag] = '_place_'.$sTag;
+    }
+
    public function hasSetAny($aParamNames)
    {
        foreach ($aParamNames as $sName) {
--- a/lib-php/Phrase.php
+++ b/lib-php/Phrase.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -9,36 +17,26 @@ namespace Nominatim;
 */
 class Phrase
 {
-    const MAX_WORDSET_LEN = 20;
-    const MAX_WORDSETS = 100;
-
-    // Complete phrase as a string.
+    // Complete phrase as a string (guaranteed to have no leading or trailing
+    // spaces).
    private $sPhrase;
    // Element type for structured searches.
    private $sPhraseType;
-    // Space-separated words of the phrase.
-    private $aWords;
    // Possible segmentations of the phrase.
    private $aWordSets;

-    public static function cmpByArraylen($aA, $aB)
-    {
-        $iALen = count($aA);
-        $iBLen = count($aB);
-
-        if ($iALen == $iBLen) {
-            return 0;
-        }
-
-        return ($iALen < $iBLen) ? -1 : 1;
-    }
-
-
    public function __construct($sPhrase, $sPhraseType)
    {
        $this->sPhrase = trim($sPhrase);
        $this->sPhraseType = $sPhraseType;
-        $this->aWords = explode(' ', $this->sPhrase);
+    }
+
+    /**
+     * Get the original phrase of the string.
+     */
+    public function getPhrase()
+    {
+        return $this->sPhrase;
    }

    /**
@@ -52,6 +50,11 @@ class Phrase
        return $this->sPhraseType;
    }

+    public function setWordSets($aWordSets)
+    {
+        $this->aWordSets = $aWordSets;
+    }
+
    /**
     * Return the array of possible segmentations of the phrase.
     *
@@ -63,30 +66,6 @@ class Phrase
        return $this->aWordSets;
    }

-    /**
-     * Add the tokens from this phrase to the given list of tokens.
-     *
-     * @param string[] $aTokens List of tokens to append.
-     *
-     * @return void
-     */
-    public function addTokens(&$aTokens)
-    {
-        $iNumWords = count($this->aWords);
-
-        for ($i = 0; $i < $iNumWords; $i++) {
-            $sPhrase = $this->aWords[$i];
-            $aTokens[' '.$sPhrase] = ' '.$sPhrase;
-            $aTokens[$sPhrase] = $sPhrase;
-
-            for ($j = $i + 1; $j < $iNumWords; $j++) {
-                $sPhrase .= ' '.$this->aWords[$j];
-                $aTokens[' '.$sPhrase] = ' '.$sPhrase;
-                $aTokens[$sPhrase] = $sPhrase;
-            }
-        }
-    }
-
    /**
     * Invert the set of possible segmentations.
     *
@@ -99,61 +78,11 @@ class Phrase
        }
    }

-    public function computeWordSets($oTokens)
-    {
-        $iNumWords = count($this->aWords);
-        // Caches the word set for the partial phrase up to word i.
-        $aSetCache = array_fill(0, $iNumWords, array());
-
-        // Initialise first element of cache. There can only be the word.
-        if ($oTokens->containsAny($this->aWords[0])) {
-            $aSetCache[0][] = array($this->aWords[0]);
-        }
-
-        // Now do the next elements using what we already have.
-        for ($i = 1; $i < $iNumWords; $i++) {
-            for ($j = $i; $j > 0; $j--) {
-                $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
-                if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
-                    $aPartial = array($sPartial);
-                    foreach ($aSetCache[$j - 1] as $aSet) {
-                        if (count($aSet) < Phrase::MAX_WORDSET_LEN) {
-                            $aSetCache[$i][] = array_merge($aSet, $aPartial);
-                        }
-                    }
-                    if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) {
-                        usort(
-                            $aSetCache[$i],
-                            array('\Nominatim\Phrase', 'cmpByArraylen')
-                        );
-                        $aSetCache[$i] = array_slice(
-                            $aSetCache[$i],
-                            0,
-                            Phrase::MAX_WORDSETS
-                        );
-                    }
-                }
-            }
-
-            // finally the current full phrase
-            $sPartial = $this->aWords[0].' '.$sPartial;
-            if ($oTokens->containsAny($sPartial)) {
-                $aSetCache[$i][] = array($sPartial);
-            }
-        }
-
-        $this->aWordSets = $aSetCache[$iNumWords - 1];
-        usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen'));
-        $this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS);
-    }
-
-
    public function debugInfo()
    {
        return array(
                'Type' => $this->sPhraseType,
                'Phrase' => $this->sPhrase,
-                'Words' => $this->aWords,
                'WordSets' => $this->aWordSets
               );
    }
--- a/lib-php/PlaceLookup.php
+++ b/lib-php/PlaceLookup.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -78,7 +86,7 @@ class PlaceLookup
            ($this->bIncludePolygonAsSVG ? 1 : 0);
        if ($iWantedTypes > CONST_PolygonOutput_MaximumTypes) {
            if (CONST_PolygonOutput_MaximumTypes) {
-                userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polgyon output option');
+                userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polygon output option');
            } else {
                userError('Polygon output is disabled');
            }
@@ -89,20 +97,36 @@ class PlaceLookup
    {
        $aParams = array();

-        if ($this->bAddressDetails) $aParams['addressdetails'] = '1';
-        if ($this->bExtraTags) $aParams['extratags'] = '1';
-        if ($this->bNameDetails) $aParams['namedetails'] = '1';
+        if ($this->bAddressDetails) {
+            $aParams['addressdetails'] = '1';
+        }
+        if ($this->bExtraTags) {
+            $aParams['extratags'] = '1';
+        }
+        if ($this->bNameDetails) {
+            $aParams['namedetails'] = '1';
+        }

-        if ($this->bIncludePolygonAsText) $aParams['polygon_text'] = '1';
-        if ($this->bIncludePolygonAsGeoJSON) $aParams['polygon_geojson'] = '1';
-        if ($this->bIncludePolygonAsKML) $aParams['polygon_kml'] = '1';
-        if ($this->bIncludePolygonAsSVG) $aParams['polygon_svg'] = '1';
+        if ($this->bIncludePolygonAsText) {
+            $aParams['polygon_text'] = '1';
+        }
+        if ($this->bIncludePolygonAsGeoJSON) {
+            $aParams['polygon_geojson'] = '1';
+        }
+        if ($this->bIncludePolygonAsKML) {
+            $aParams['polygon_kml'] = '1';
+        }
+        if ($this->bIncludePolygonAsSVG) {
+            $aParams['polygon_svg'] = '1';
+        }

        if ($this->fPolygonSimplificationThreshold > 0.0) {
            $aParams['polygon_threshold'] = $this->fPolygonSimplificationThreshold;
        }

-        if (!$this->bDeDupe) $aParams['dedupe'] = '0';
+        if (!$this->bDeDupe) {
+            $aParams['dedupe'] = '0';
+        }

        return $aParams;
    }
@@ -147,8 +171,9 @@ class PlaceLookup

    private function langAddressSql($sHousenumber)
    {
-        if ($this->bAddressDetails)
+        if ($this->bAddressDetails) {
            return ''; // langaddress will be computed from address details
+        }

        return 'get_address_by_language(place_id,'.$sHousenumber.','.$this->aLangPrefOrderSql.') AS langaddress,';
    }
@@ -162,12 +187,12 @@ class PlaceLookup
            return null;
        }

-        $aResults = $this->lookup(array($iPlaceID => new Result($iPlaceID)));
+        $aResults = $this->lookup(array($iPlaceID => new Result($iPlaceID)), 0, 30, true);

        return empty($aResults) ? null : reset($aResults);
    }

-    public function lookup($aResults, $iMinRank = 0, $iMaxRank = 30)
+    public function lookup($aResults, $iMinRank = 0, $iMaxRank = 30, $bAllowLinked = false)
    {
        Debug::newFunction('Place lookup');

@@ -222,7 +247,9 @@ class PlaceLookup
            if ($this->sAllowedTypesSQLList) {
                $sSQL .= 'AND placex.class in '.$this->sAllowedTypesSQLList;
            }
-            $sSQL .= '    AND linked_place_id is null ';
+            if (!$bAllowLinked) {
+                $sSQL .= '    AND linked_place_id is null ';
+            }
            $sSQL .= ' GROUP BY ';
            $sSQL .= '     osm_type, ';
            $sSQL .= '     osm_id, ';
@@ -234,12 +261,20 @@ class PlaceLookup
            $sSQL .= '     housenumber,';
            $sSQL .= '     country_code, ';
            $sSQL .= '     importance, ';
-            if (!$this->bDeDupe) $sSQL .= 'place_id,';
-            if (!$this->bAddressDetails) $sSQL .= 'langaddress, ';
+            if (!$this->bDeDupe) {
+                $sSQL .= 'place_id,';
+            }
+            if (!$this->bAddressDetails) {
+                $sSQL .= 'langaddress, ';
+            }
            $sSQL .= '     placename, ';
            $sSQL .= '     ref, ';
-            if ($this->bExtraTags) $sSQL .= 'extratags, ';
-            if ($this->bNameDetails) $sSQL .= 'name, ';
+            if ($this->bExtraTags) {
+                $sSQL .= 'extratags, ';
+            }
+            if ($this->bNameDetails) {
+                $sSQL .= 'name, ';
+            }
            $sSQL .= '     extra_place ';

            $aSubSelects[] = $sSQL;
@@ -260,8 +295,12 @@ class PlaceLookup
            $sSQL .= $this->langAddressSql('-1');
            $sSQL .= '  postcode as placename,';
            $sSQL .= '  postcode as ref,';
-            if ($this->bExtraTags) $sSQL .= 'null::text AS extra,';
-            if ($this->bNameDetails) $sSQL .= 'null::text AS names,';
+            if ($this->bExtraTags) {
+                $sSQL .= 'null::text AS extra,';
+            }
+            if ($this->bNameDetails) {
+                $sSQL .= 'null::text AS names,';
+            }
            $sSQL .= '  ST_x(geometry) AS lon, ST_y(geometry) AS lat,';
            $sSQL .= '  (0.75-(rank_search::float/40)) AS importance, ';
            $sSQL .= $this->addressImportanceSql('geometry', 'lp.parent_place_id');
@@ -298,8 +337,12 @@ class PlaceLookup
                    $sSQL .= $this->langAddressSql('housenumber_for_place');
                    $sSQL .= '     null::text AS placename, ';
                    $sSQL .= '     null::text AS ref, ';
-                    if ($this->bExtraTags) $sSQL .= 'null::text AS extra,';
-                    if ($this->bNameDetails) $sSQL .= 'null::text AS names,';
+                    if ($this->bExtraTags) {
+                        $sSQL .= 'null::text AS extra,';
+                    }
+                    if ($this->bNameDetails) {
+                        $sSQL .= 'null::text AS names,';
+                    }
                    $sSQL .= '     st_x(centroid) AS lon, ';
                    $sSQL .= '     st_y(centroid) AS lat,';
                    $sSQL .= '     -1.15 AS importance, ';
@@ -307,7 +350,9 @@ class PlaceLookup
                    $sSQL .= '     null::text AS extra_place ';
                    $sSQL .= ' FROM (';
                    $sSQL .= '     SELECT place_id, ';    // interpolate the Tiger housenumbers here
-                    $sSQL .= '         ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) AS centroid, ';
+                    $sSQL .= '         CASE WHEN startnumber != endnumber';
+                    $sSQL .= '              THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float)';
+                    $sSQL .= '              ELSE ST_LineInterpolatePoint(linegeo, 0.5) END AS centroid, ';
                    $sSQL .= '         parent_place_id, ';
                    $sSQL .= '         housenumber_for_place';
                    $sSQL .= '     FROM (';
@@ -344,8 +389,12 @@ class PlaceLookup
                $sSQL .= $this->langAddressSql('housenumber_for_place');
                $sSQL .= '  null::text AS placename, ';
                $sSQL .= '  null::text AS ref, ';
-                if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
-                if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
+                if ($this->bExtraTags) {
+                    $sSQL .= 'null::text AS extra, ';
+                }
+                if ($this->bNameDetails) {
+                    $sSQL .= 'null::text AS names, ';
+                }
                $sSQL .= '  st_x(centroid) AS lon, ';
                $sSQL .= '  st_y(centroid) AS lat, ';
                // slightly smaller than the importance for normal houses
@@ -360,7 +409,7 @@ class PlaceLookup
                $sSQL .= '         CASE ';             // interpolate the housenumbers here
                $sSQL .= '           WHEN startnumber != endnumber ';
                $sSQL .= '           THEN ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) ';
-                $sSQL .= '           ELSE ST_LineInterpolatePoint(linegeo, 0.5) ';
+                $sSQL .= '           ELSE linegeo ';
                $sSQL .= '         END as centroid, ';
                $sSQL .= '         parent_place_id, ';
                $sSQL .= '         housenumber_for_place ';
@@ -373,42 +422,6 @@ class PlaceLookup

                $aSubSelects[] = $sSQL;
            }
-
-            if (CONST_Use_Aux_Location_data) {
-                $sPlaceIDs = Result::joinIdsByTable($aResults, Result::TABLE_AUX);
-                if ($sPlaceIDs) {
-                    $sHousenumbers = Result::sqlHouseNumberTable($aResults, Result::TABLE_AUX);
-                    $sSQL = '  SELECT ';
-                    $sSQL .= "     'L' AS osm_type, ";
-                    $sSQL .= '     place_id AS osm_id, ';
-                    $sSQL .= "     'place' AS class,";
-                    $sSQL .= "     'house' AS type, ";
-                    $sSQL .= '     null::smallint AS admin_level, ';
-                    $sSQL .= '     30 AS rank_search,';
-                    $sSQL .= '     30 AS rank_address, ';
-                    $sSQL .= '     place_id,';
-                    $sSQL .= '     parent_place_id, ';
-                    $sSQL .= '     housenumber,';
-                    $sSQL .= "     'us' AS country_code, ";
-                    $sSQL .= $this->langAddressSql('-1');
-                    $sSQL .= '     null::text AS placename, ';
-                    $sSQL .= '     null::text AS ref, ';
-                    if ($this->bExtraTags) $sSQL .= 'null::text AS extra, ';
-                    if ($this->bNameDetails) $sSQL .= 'null::text AS names, ';
-                    $sSQL .= '     ST_X(centroid) AS lon, ';
-                    $sSQL .= '     ST_Y(centroid) AS lat, ';
-                    $sSQL .= '     -1.10 AS importance, ';
-                    $sSQL .= $this->addressImportanceSql(
-                        'centroid',
-                        'location_property_aux.parent_place_id'
-                    );
-                    $sSQL .= '     null::text AS extra_place ';
-                    $sSQL .= '  FROM location_property_aux ';
-                    $sSQL .= "  WHERE place_id in ($sPlaceIDs) ";
-
-                    $aSubSelects[] = $sSQL;
-                }
-            }
        }

        if (empty($aSubSelects)) {
@@ -434,18 +447,14 @@ class PlaceLookup

            if ($this->bExtraTags) {
                if ($aPlace['extra']) {
-                    $aPlace['sExtraTags'] = json_decode($aPlace['extra']);
+                    $aPlace['sExtraTags'] = json_decode($aPlace['extra'], true);
                } else {
                    $aPlace['sExtraTags'] = (object) array();
                }
            }

            if ($this->bNameDetails) {
-                if ($aPlace['names']) {
-                    $aPlace['sNameDetails'] = json_decode($aPlace['names']);
-                } else {
-                    $aPlace['sNameDetails'] = (object) array();
-                }
+                $aPlace['sNameDetails'] = $this->extractNames($aPlace['names']);
            }

            $aPlace['addresstype'] = ClassTypes\getLabelTag(
@@ -468,6 +477,33 @@ class PlaceLookup
        return $aResults;
    }

+
+    private function extractNames($sNames)
+    {
+        if (!$sNames) {
+            return (object) array();
+        }
+
+        $aFullNames = json_decode($sNames, true);
+        $aNames = array();
+
+        foreach ($aFullNames as $sKey => $sValue) {
+            if (strpos($sKey, '_place_') === 0) {
+                $sSubKey = substr($sKey, 7);
+                if (array_key_exists($sSubKey, $aFullNames)) {
+                    $aNames[$sKey] = $sValue;
+                } else {
+                    $aNames[$sSubKey] = $sValue;
+                }
+            } else {
+                $aNames[$sKey] = $sValue;
+            }
+        }
+
+        return $aNames;
+    }
+
+
    /* returns an array which will contain the keys
     *   aBoundingBox
     * and may also contain one or more of the keys
@@ -478,41 +514,46 @@ class PlaceLookup
     *   lat
     *   lon
     */
-
-
    public function getOutlines($iPlaceID, $fLon = null, $fLat = null, $fRadius = null, $fLonReverse = null, $fLatReverse = null)
    {

        $aOutlineResult = array();
-        if (!$iPlaceID) return $aOutlineResult;
+        if (!$iPlaceID) {
+            return $aOutlineResult;
+        }

        // Get the bounding box and outline polygon
        $sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,';
-        if ($fLonReverse != null && $fLatReverse != null) {
-            $sSQL .= ' ST_Y(closest_point) as centrelat,';
-            $sSQL .= ' ST_X(closest_point) as centrelon,';
-        } else {
-            $sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
-        }
+        $sSQL .= ' ST_Y(centroid) as centrelat, ST_X(centroid) as centrelon,';
        $sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,';
        $sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon';
-        if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
-        if ($this->bIncludePolygonAsKML) $sSQL .= ',ST_AsKML(geometry) as askml';
-        if ($this->bIncludePolygonAsSVG) $sSQL .= ',ST_AsSVG(geometry) as assvg';
-        if ($this->bIncludePolygonAsText) $sSQL .= ',ST_AsText(geometry) as astext';
+        if ($this->bIncludePolygonAsGeoJSON) {
+            $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson';
+        }
+        if ($this->bIncludePolygonAsKML) {
+            $sSQL .= ',ST_AsKML(geometry) as askml';
+        }
+        if ($this->bIncludePolygonAsSVG) {
+            $sSQL .= ',ST_AsSVG(geometry) as assvg';
+        }
+        if ($this->bIncludePolygonAsText) {
+            $sSQL .= ',ST_AsText(geometry) as astext';
+        }
+
+        $sSQL .= ' FROM (SELECT place_id';
        if ($fLonReverse != null && $fLatReverse != null) {
-            $sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
-            $sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
-            $sFrom .=' ELSE centroid END AS closest_point';
-            $sFrom .= ' from placex where place_id = '.$iPlaceID.') as plx';
+            $sSQL .= ',CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN ';
+            $sSQL .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))';
+            $sSQL .=' ELSE centroid END AS centroid';
        } else {
-            $sFrom = ' from placex where place_id = '.$iPlaceID;
+            $sSQL .= ',centroid';
        }
        if ($this->fPolygonSimplificationThreshold > 0) {
-            $sSQL .= ' from (select place_id,centroid,ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry'.$sFrom.') as plx';
+            $sSQL .= ',ST_SimplifyPreserveTopology(geometry,'.$this->fPolygonSimplificationThreshold.') as geometry';
        } else {
-            $sSQL .= $sFrom;
+            $sSQL .= ',geometry';
        }
+        $sSQL .= ' FROM placex where place_id = '.$iPlaceID.') as plx';

        $aPointPolygon = $this->oDB->getRow($sSQL, null, 'Could not get outline');

@@ -522,10 +563,18 @@ class PlaceLookup
                $aOutlineResult['lon'] = $aPointPolygon['centrelon'];
            }

-            if ($this->bIncludePolygonAsGeoJSON) $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
-            if ($this->bIncludePolygonAsKML) $aOutlineResult['askml'] = $aPointPolygon['askml'];
-            if ($this->bIncludePolygonAsSVG) $aOutlineResult['assvg'] = $aPointPolygon['assvg'];
-            if ($this->bIncludePolygonAsText) $aOutlineResult['astext'] = $aPointPolygon['astext'];
+            if ($this->bIncludePolygonAsGeoJSON) {
+                $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson'];
+            }
+            if ($this->bIncludePolygonAsKML) {
+                $aOutlineResult['askml'] = $aPointPolygon['askml'];
+            }
+            if ($this->bIncludePolygonAsSVG) {
+                $aOutlineResult['assvg'] = $aPointPolygon['assvg'];
+            }
+            if ($this->bIncludePolygonAsText) {
+                $aOutlineResult['astext'] = $aPointPolygon['astext'];
+            }

            if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) {
                $aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius;
--- a/lib-php/Result.php
+++ b/lib-php/Result.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -13,8 +21,7 @@ class Result
    const TABLE_PLACEX = 0;
    const TABLE_POSTCODE = 1;
    const TABLE_OSMLINE = 2;
-    const TABLE_AUX = 3;
-    const TABLE_TIGER = 4;
+    const TABLE_TIGER = 3;

    /// Database table that contains the result.
    public $iTable;
@@ -56,6 +63,27 @@ class Result
            }
        )));
    }
+
+    public static function joinIdsByTableMinRank($aResults, $iTable, $iMinAddressRank)
+    {
+        return join(',', array_keys(array_filter(
+            $aResults,
+            function ($aValue) use ($iTable, $iMinAddressRank) {
+                return $aValue->iTable == $iTable && $aValue->iAddressRank >= $iMinAddressRank;
+            }
+        )));
+    }
+
+    public static function joinIdsByTableMaxRank($aResults, $iTable, $iMaxAddressRank)
+    {
+        return join(',', array_keys(array_filter(
+            $aResults,
+            function ($aValue) use ($iTable, $iMaxAddressRank) {
+                return $aValue->iTable == $iTable && $aValue->iAddressRank <= $iMaxAddressRank;
+            }
+        )));
+    }
+
    public static function sqlHouseNumberTable($aResults, $iTable)
    {
        $sHousenumbers = '';
--- a/lib-php/ReverseGeocode.php
+++ b/lib-php/ReverseGeocode.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -32,10 +40,10 @@ class ReverseGeocode
                      9 => 12,
                      10 => 17, // City
                      11 => 17,
-                      12 => 18, // Town / Village
-                      13 => 18,
-                      14 => 22, // Suburb
-                      15 => 22,
+                      12 => 18, // Town
+                      13 => 19, // Village
+                      14 => 22, // Neighbourhood
+                      15 => 25, // Locality
                      16 => 26, // major street
                      17 => 27, // minor street
                      18 => 30, // or >, Building
@@ -56,12 +64,15 @@ class ReverseGeocode
    {
        Debug::newFunction('lookupInterpolation');
        $sSQL = 'SELECT place_id, parent_place_id, 30 as rank_search,';
-        $sSQL .= '  ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fraction,';
-        $sSQL .= '  startnumber, endnumber, interpolationtype,';
+        $sSQL .= '  (CASE WHEN endnumber != startnumber';
+        $sSQL .= '        THEN (endnumber - startnumber) * ST_LineLocatePoint(linegeo,'.$sPointSQL.')';
+        $sSQL .= '        ELSE startnumber END) as fhnr,';
+        $sSQL .= '  startnumber, endnumber, step,';
        $sSQL .= '  ST_Distance(linegeo,'.$sPointSQL.') as distance';
        $sSQL .= ' FROM location_property_osmline';
        $sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', linegeo, '.$fSearchDiam.')';
-        $sSQL .= ' and indexed_status = 0 and startnumber is not NULL ';
+        $sSQL .= '       and indexed_status = 0 and startnumber is not NULL ';
+        $sSQL .= '       and parent_place_id != 0';
        $sSQL .= ' ORDER BY distance ASC limit 1';
        Debug::printSQL($sSQL);

@@ -74,7 +85,10 @@ class ReverseGeocode

    protected function lookupLargeArea($sPointSQL, $iMaxRank)
    {
-        $oResult = null;
+        $sCountryCode = $this->getCountryCode($sPointSQL);
+        if (CONST_Search_WithinCountries and $sCountryCode == null) {
+            return  null;
+        }

        if ($iMaxRank > 4) {
            $aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
@@ -85,12 +99,12 @@ class ReverseGeocode

        // If no polygon which contains the searchpoint is found,
        // searches in the country_osm_grid table for a polygon.
-        return  $this->lookupInCountry($sPointSQL, $iMaxRank);
+        return  $this->lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode);
    }

-    protected function lookupInCountry($sPointSQL, $iMaxRank)
+    protected function getCountryCode($sPointSQL)
    {
-        Debug::newFunction('lookupInCountry');
+        Debug::newFunction('getCountryCode');
        // searches for polygon in table country_osm_grid which contains the searchpoint
        // and searches for the nearest place node to the searchpoint in this polygon
        $sSQL = 'SELECT country_code FROM country_osm_grid';
@@ -102,8 +116,12 @@ class ReverseGeocode
            null,
            'Could not determine country polygon containing the point.'
        );
-        Debug::printVar('Country code', $sCountryCode);
+        return $sCountryCode;
+    }

+    protected function lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode)
+    {
+        Debug::newFunction('lookupInCountry');
        if ($sCountryCode) {
            if ($iMaxRank > 4) {
                // look for place nodes with the given country code
@@ -113,11 +131,13 @@ class ReverseGeocode
                $sSQL .= ' FROM placex';
                $sSQL .= ' WHERE osm_type = \'N\'';
                $sSQL .= ' AND country_code = \''.$sCountryCode.'\'';
+                $sSQL .= ' AND rank_address between 4 and 25'; // needed to select right index
                $sSQL .= ' AND rank_search between 5 and ' .min(25, $iMaxRank);
-                $sSQL .= ' AND class = \'place\' AND type != \'postcode\'';
+                $sSQL .= ' AND type != \'postcode\'';
                $sSQL .= ' AND name IS NOT NULL ';
                $sSQL .= ' and indexed_status = 0 and linked_place_id is null';
-                $sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, 1.8)) p ';
+                $sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
+                $sSQL .= ') as a ';
                $sSQL .= 'WHERE distance <= reverse_place_diameter(rank_search)';
                $sSQL .= ' ORDER BY rank_search DESC, distance ASC';
                $sSQL .= ' LIMIT 1';
@@ -167,22 +187,28 @@ class ReverseGeocode
    {
        Debug::newFunction('lookupPolygon');
        // polygon search begins at suburb-level
-        if ($iMaxRank > 25) $iMaxRank = 25;
+        if ($iMaxRank > 25) {
+            $iMaxRank = 25;
+        }
        // no polygon search over country-level
-        if ($iMaxRank < 5) $iMaxRank = 5;
+        if ($iMaxRank < 5) {
+            $iMaxRank = 5;
+        }
        // search for polygon
        $sSQL = 'SELECT place_id, parent_place_id, rank_address, rank_search FROM';
        $sSQL .= '(select place_id, parent_place_id, rank_address, rank_search, country_code, geometry';
        $sSQL .= ' FROM placex';
        $sSQL .= ' WHERE ST_GeometryType(geometry) in (\'ST_Polygon\', \'ST_MultiPolygon\')';
-        $sSQL .= ' AND rank_address Between 5 AND ' .$iMaxRank;
+        // Ensure that query planner doesn't use the index on rank_search.
+        $sSQL .= ' AND coalesce(rank_search, 0) between 5 and ' .$iMaxRank;
+        $sSQL .= ' AND rank_address between 4 and 25'; // needed for index selection
        $sSQL .= ' AND geometry && '.$sPointSQL;
        $sSQL .= ' AND type != \'postcode\' ';
        $sSQL .= ' AND name is not null';
        $sSQL .= ' AND indexed_status = 0 and linked_place_id is null';
-        $sSQL .= ' ORDER BY rank_address DESC LIMIT 50 ) as a';
-        $sSQL .= ' WHERE ST_CONTAINS(geometry, '.$sPointSQL.' )';
-        $sSQL .= ' ORDER BY rank_address DESC LIMIT 1';
+        $sSQL .= ' ORDER BY rank_search DESC LIMIT 50 ) as a';
+        $sSQL .= ' WHERE ST_Contains(geometry, '.$sPointSQL.' )';
+        $sSQL .= ' ORDER BY rank_search DESC LIMIT 1';
        Debug::printSQL($sSQL);

        $aPoly = $this->oDB->getRow($sSQL, null, 'Could not determine polygon containing the point.');
@@ -190,33 +216,28 @@ class ReverseGeocode

        if ($aPoly) {
        // if a polygon is found, search for placenodes begins ...
-            $iParentPlaceID = $aPoly['parent_place_id'];
            $iRankAddress = $aPoly['rank_address'];
            $iRankSearch = $aPoly['rank_search'];
            $iPlaceID = $aPoly['place_id'];

-            if ($iRankAddress != $iMaxRank) {
+            if ($iRankSearch != $iMaxRank) {
                $sSQL = 'SELECT place_id FROM ';
                $sSQL .= '(SELECT place_id, rank_search, country_code, geometry,';
                $sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
                $sSQL .= ' FROM placex';
                $sSQL .= ' WHERE osm_type = \'N\'';
-                // using rank_search because of a better differentiation
-                // for place nodes at rank_address 16
                $sSQL .= ' AND rank_search > '.$iRankSearch;
                $sSQL .= ' AND rank_search <= '.$iMaxRank;
-                $sSQL .= ' AND rank_address > 0';
-                $sSQL .= ' AND class = \'place\'';
+                $sSQL .= ' AND rank_address between 4 and 25';  // needed to select right index
                $sSQL .= ' AND type != \'postcode\'';
                $sSQL .= ' AND name IS NOT NULL ';
                $sSQL .= ' AND indexed_status = 0 AND linked_place_id is null';
-                $sSQL .= ' AND ST_DWithin('.$sPointSQL.', geometry, reverse_place_diameter('.$iRankSearch.'::smallint))';
-                $sSQL .= ' ORDER BY distance ASC,';
-                $sSQL .= ' rank_address DESC';
-                $sSQL .= ' limit 500) as a';
-                $sSQL .= ' WHERE ST_CONTAINS((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
+                $sSQL .= ' AND ST_Buffer(geometry, reverse_place_diameter(rank_search)) && '.$sPointSQL;
+                $sSQL .= ' ORDER BY rank_search DESC, distance ASC';
+                $sSQL .= ' limit 100) as a';
+                $sSQL .= ' WHERE ST_Contains((SELECT geometry FROM placex WHERE place_id = '.$iPlaceID.'), geometry )';
                $sSQL .= ' AND distance <= reverse_place_diameter(rank_search)';
-                $sSQL .= ' ORDER BY distance ASC, rank_search DESC';
+                $sSQL .= ' ORDER BY rank_search DESC, distance ASC';
                $sSQL .= ' LIMIT 1';
                Debug::printSQL($sSQL);

@@ -242,26 +263,24 @@ class ReverseGeocode
    public function lookupPoint($sPointSQL, $bDoInterpolation = true)
    {
        Debug::newFunction('lookupPoint');
-        // starts if the search is on POI or street level,
-        // searches for the nearest POI or street,
-        // if a street is found and a POI is searched for,
-        // the nearest POI which the found street is a parent of is choosen.
-        $iMaxRank = $this->iMaxRank;
-
        // Find the nearest point
        $fSearchDiam = 0.006;
        $oResult = null;
        $aPlace = null;

        // for POI or street level
-        if ($iMaxRank >= 26) {
+        if ($this->iMaxRank >= 26) {
+            // starts if the search is on POI or street level,
+            // searches for the nearest POI or street,
+            // if a street is found and a POI is searched for,
+            // the nearest POI which the found street is a parent of is chosen.
            $sSQL = 'select place_id,parent_place_id,rank_address,country_code,';
            $sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
            $sSQL .= ' FROM ';
            $sSQL .= ' placex';
            $sSQL .= '   WHERE ST_DWithin('.$sPointSQL.', geometry, '.$fSearchDiam.')';
            $sSQL .= '   AND';
-            $sSQL .= ' rank_address between 26 and '.$iMaxRank;
+            $sSQL .= ' rank_address between 26 and '.$this->iMaxRank;
            $sSQL .= ' and (name is not null or housenumber is not null';
            $sSQL .= ' or rank_address between 26 and 27)';
            $sSQL .= ' and (rank_address between 26 and 27';
@@ -284,7 +303,7 @@ class ReverseGeocode

            if ($aPlace) {
                // if street and maxrank > streetlevel
-                if ($iRankAddress <= 27 && $iMaxRank > 27) {
+                if ($iRankAddress <= 27 && $this->iMaxRank > 27) {
                    // find the closest object (up to a certain radius) of which the street is a parent of
                    $sSQL = ' select place_id,';
                    $sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance';
@@ -318,9 +337,9 @@ class ReverseGeocode
                    && $this->iMaxRank >= 28
                ) {
                    $sSQL = 'SELECT place_id,parent_place_id,30 as rank_search,';
-                    $sSQL .= 'ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fraction,';
-                    $sSQL .= 'ST_distance('.$sPointSQL.', linegeo) as distance,';
-                    $sSQL .= 'startnumber,endnumber,interpolationtype';
+                    $sSQL .= '      (endnumber - startnumber) * ST_LineLocatePoint(linegeo,'.$sPointSQL.') as fhnr,';
+                    $sSQL .= '      startnumber, endnumber, step,';
+                    $sSQL .= '      ST_Distance('.$sPointSQL.', linegeo) as distance';
                    $sSQL .= ' FROM location_property_tiger WHERE parent_place_id = '.$oResult->iId;
                    $sSQL .= ' AND ST_DWithin('.$sPointSQL.', linegeo, 0.001)';
                    $sSQL .= ' ORDER BY distance ASC limit 1';
@@ -332,19 +351,23 @@ class ReverseGeocode
                    if ($aPlaceTiger) {
                        $aPlace = $aPlaceTiger;
                        $oResult = new Result($aPlaceTiger['place_id'], Result::TABLE_TIGER);
-                        $oResult->iHouseNumber = closestHouseNumber($aPlaceTiger);
+                        $iRndNum = max(0, round($aPlaceTiger['fhnr'] / $aPlaceTiger['step']) * $aPlaceTiger['step']);
+                        $oResult->iHouseNumber = $aPlaceTiger['startnumber'] + $iRndNum;
+                        if ($oResult->iHouseNumber > $aPlaceTiger['endnumber']) {
+                            $oResult->iHouseNumber = $aPlaceTiger['endnumber'];
+                        }
                        $iRankAddress = 30;
                    }
                }
            }

-            if ($bDoInterpolation && $iMaxRank >= 30) {
+            if ($bDoInterpolation && $this->iMaxRank >= 30) {
                $fDistance = $fSearchDiam;
                if ($aPlace) {
                    // We can't reliably go from the closest street to an
                    // interpolation line because the closest interpolation
                    // may have a different street segments as a parent.
-                    // Therefore allow an interpolation line to take precendence
+                    // Therefore allow an interpolation line to take precedence
                    // even when the street is closer.
                    $fDistance = $iRankAddress < 28 ? 0.001 : $aPlace['distance'];
                }
@@ -354,9 +377,12 @@ class ReverseGeocode

                if ($aHouse) {
                    $oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE);
-                    $oResult->iHouseNumber = closestHouseNumber($aHouse);
+                    $iRndNum = max(0, round($aHouse['fhnr'] / $aHouse['step']) * $aHouse['step']);
+                    $oResult->iHouseNumber = $aHouse['startnumber'] + $iRndNum;
+                    if ($oResult->iHouseNumber > $aHouse['endnumber']) {
+                        $oResult->iHouseNumber = $aHouse['endnumber'];
+                    }
                    $aPlace = $aHouse;
-                    $iRankAddress = 30;
                }
            }

@@ -366,7 +392,7 @@ class ReverseGeocode
            }
        } else {
            // lower than street level ($iMaxRank < 26 )
-            $oResult = $this->lookupLargeArea($sPointSQL, $iMaxRank);
+            $oResult = $this->lookupLargeArea($sPointSQL, $this->iMaxRank);
        }

        Debug::printVar('Final result', $oResult);
--- a/lib-php/SearchContext.php
+++ b/lib-php/SearchContext.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -28,6 +36,8 @@ class SearchContext
    public $sqlViewboxLarge = '';
    /// Reference along a route (as SQL).
    public $sqlViewboxCentre = '';
+    /// List of countries to restrict search to (as array).
+    public $aCountryList = null;
    /// List of countries to restrict search to (as SQL).
    public $sqlCountryList = '';
    /// List of place IDs to exclude (as SQL).
@@ -187,6 +197,7 @@ class SearchContext
    public function setCountryList($aCountries)
    {
        $this->sqlCountryList = '('.join(',', array_map('addQuotes', $aCountries)).')';
+        $this->aCountryList = $aCountries;
    }

    /**
@@ -279,6 +290,19 @@ class SearchContext
        return '';
    }

+    /**
+     * Check if the given country is covered by the search context.
+     *
+     * @param string $sCountryCode  Country code of the country to check.
+     *
+     * @return True, if no country code restrictions are set or the
+     *         country is included in the country list.
+     */
+    public function isCountryApplicable($sCountryCode)
+    {
+        return $this->aCountryList === null || in_array($sCountryCode, $this->aCountryList);
+    }
+
    public function debugInfo()
    {
        return array(
--- a/lib-php/SearchDescription.php
+++ b/lib-php/SearchDescription.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -19,6 +27,8 @@ class SearchDescription
    private $aName = array();
    /// True if the name is rare enough to force index use on name.
    private $bRareName = false;
+    /// True if the name requires to be accompanied by address terms.
+    private $bNameNeedsAddress = false;
    /// List of word ids making up the address of the object.
    private $aAddress = array();
    /// List of word ids that appear in the name but should be ignored.
@@ -67,35 +77,6 @@ class SearchDescription
        return $this->iSearchRank;
    }

-    /**
-     * Make this search a POI search.
-     *
-     * In a POI search, objects are not (only) searched by their name
-     * but also by the primary OSM key/value pair (class and type in Nominatim).
-     *
-     * @param integer $iOperator Type of POI search
-     * @param string  $sClass    Class (or OSM tag key) of POI.
-     * @param string  $sType     Type (or OSM tag value) of POI.
-     *
-     * @return void
-     */
-    public function setPoiSearch($iOperator, $sClass, $sType)
-    {
-        $this->iOperator = $iOperator;
-        $this->sClass = $sClass;
-        $this->sType = $sType;
-    }
-
-    /**
-     * Check if any operator is set.
-     *
-     * @return bool True, if this is a special search operation.
-     */
-    public function hasOperator()
-    {
-        return $this->iOperator != Operator::NONE;
-    }
-
    /**
     * Extract key/value pairs from a query.
     *
@@ -142,257 +123,255 @@ class SearchDescription
                return false;
            }
        }
+        if ($this->bNameNeedsAddress && empty($this->aAddress)) {
+            return false;
+        }

        return true;
    }

    /////////// Search building functions

-
    /**
-     * Derive new searches by adding a full term to the existing search.
+     * Create a copy of this search description adding to search rank.
     *
-     * @param object $oSearchTerm  Description of the token.
-     * @param bool   $bHasPartial  True if there are also tokens of partial terms
-     *                             with the same name.
-     * @param string $sPhraseType  Type of phrase the token is contained in.
-     * @param bool   $bFirstToken  True if the token is at the beginning of the
-     *                             query.
-     * @param bool   $bFirstPhrase True if the token is in the first phrase of
-     *                             the query.
-     * @param bool   $bLastToken   True if the token is at the end of the query.
+     * @param integer $iTermCost  Cost to add to the current search rank.
     *
-     * @return SearchDescription[] List of derived search descriptions.
+     * @return object Cloned search description.
     */
-    public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
+    public function clone($iTermCost)
    {
-        $aNewSearches = array();
+        $oSearch = clone $this;
+        $oSearch->iSearchRank += $iTermCost;

-        if (($sPhraseType == '' || $sPhraseType == 'country')
-            && is_a($oSearchTerm, '\Nominatim\Token\Country')
-        ) {
-            if (!$this->sCountryCode) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank++;
-                $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
-                // Country is almost always at the end of the string
-                // - increase score for finding it anywhere else (optimisation)
-                if (!$bLastToken) {
-                    $oSearch->iSearchRank += 5;
-                    $oSearch->iNamePhrase = -1;
-                }
-                $aNewSearches[] = $oSearch;
-            }
-        } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
-                  && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
-        ) {
-            if (!$this->sPostcode) {
-                // If we have structured search or this is the first term,
-                // make the postcode the primary search element.
-                if ($this->iOperator == Operator::NONE && $bFirstToken) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank++;
-                    $oSearch->iOperator = Operator::POSTCODE;
-                    $oSearch->aAddress = array_merge($this->aAddress, $this->aName);
-                    $oSearch->aName =
-                        array($oSearchTerm->iId => $oSearchTerm->sPostcode);
-                    $aNewSearches[] = $oSearch;
-                }
-
-                // If we have a structured search or this is not the first term,
-                // add the postcode as an addendum.
-                if ($this->iOperator != Operator::POSTCODE
-                    && ($sPhraseType == 'postalcode' || !empty($this->aName))
-                ) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank++;
-                    $oSearch->iNamePhrase = -1;
-                    if (strlen($oSearchTerm->sPostcode) < 4) {
-                        $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
-                    }
-                    $oSearch->sPostcode = $oSearchTerm->sPostcode;
-                    $aNewSearches[] = $oSearch;
-                }
-            }
-        } elseif (($sPhraseType == '' || $sPhraseType == 'street')
-                 && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
-        ) {
-            if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank++;
-                $oSearch->iNamePhrase = -1;
-                $oSearch->sHouseNumber = $oSearchTerm->sToken;
-                if ($this->iOperator != Operator::NONE) {
-                    $oSearch->iSearchRank++;
-                }
-                // sanity check: if the housenumber is not mainly made
-                // up of numbers, add a penalty
-                if (preg_match('/\\d/', $oSearch->sHouseNumber) === 0
-                    || preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) {
-                    $oSearch->iSearchRank++;
-                }
-                if (empty($oSearchTerm->iId)) {
-                    $oSearch->iSearchRank++;
-                }
-                // also must not appear in the middle of the address
-                if (!empty($this->aAddress)
-                    || (!empty($this->aAddressNonSearch))
-                    || $this->sPostcode
-                ) {
-                    $oSearch->iSearchRank++;
-                }
-                $aNewSearches[] = $oSearch;
-                // Housenumbers may appear in the name when the place has its own
-                // address terms.
-                if ($oSearchTerm->iId !== null
-                    && ($this->iNamePhrase >= 0 || empty($this->aName))
-                    && empty($this->aAddress)
-                   ) {
-                    $oSearch = clone $this;
-                    $oSearch->iSearchRank++;
-                    $oSearch->aAddress = $this->aName;
-                    $oSearch->bRareName = false;
-                    $oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId);
-                    $aNewSearches[] = $oSearch;
-                }
-            }
-        } elseif ($sPhraseType == ''
-                  && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
-        ) {
-            if ($this->iOperator == Operator::NONE) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank += 2;
-                $oSearch->iNamePhrase = -1;
-
-                $iOp = $oSearchTerm->iOperator;
-                if ($iOp == Operator::NONE) {
-                    if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
-                        $iOp = Operator::NAME;
-                    } else {
-                        $iOp = Operator::NEAR;
-                    }
-                    $oSearch->iSearchRank += 2;
-                } elseif (!$bFirstToken && !$bLastToken) {
-                    $oSearch->iSearchRank += 2;
-                }
-                if ($this->sHouseNumber) {
-                    $oSearch->iSearchRank++;
-                }
-
-                $oSearch->setPoiSearch(
-                    $iOp,
-                    $oSearchTerm->sClass,
-                    $oSearchTerm->sType
-                );
-                $aNewSearches[] = $oSearch;
-            }
-        } elseif ($sPhraseType != 'country'
-                  && is_a($oSearchTerm, '\Nominatim\Token\Word')
-        ) {
-            $iWordID = $oSearchTerm->iId;
-            // Full words can only be a name if they appear at the beginning
-            // of the phrase. In structured search the name must forcably in
-            // the first phrase. In unstructured search it may be in a later
-            // phrase when the first phrase is a house number.
-            if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
-                if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
-                    $oSearch = clone $this;
-                    $oSearch->iNamePhrase = -1;
-                    $oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount;
-                    $oSearch->aAddress[$iWordID] = $iWordID;
-                    $aNewSearches[] = $oSearch;
-                }
-            } elseif (empty($this->aNameNonSearch)) {
-                $oSearch = clone $this;
-                $oSearch->iSearchRank++;
-                $oSearch->aName = array($iWordID => $iWordID);
-                if (CONST_Search_NameOnlySearchFrequencyThreshold) {
-                    $oSearch->bRareName =
-                        $oSearchTerm->iSearchNameCount
-                          < CONST_Search_NameOnlySearchFrequencyThreshold;
-                }
-                $aNewSearches[] = $oSearch;
-            }
-        }
-
-        return $aNewSearches;
+        return $oSearch;
    }

    /**
-     * Derive new searches by adding a partial term to the existing search.
+     * Check if the search currently includes a name.
     *
-     * @param string  $sToken             Term for the token.
-     * @param object  $oSearchTerm        Description of the token.
-     * @param bool    $bStructuredPhrases True if the search is structured.
-     * @param integer $iPhrase            Number of the phrase the token is in.
-     * @param array[] $aFullTokens        List of full term tokens with the
-     *                                    same name.
+     * @param bool bIncludeNonNames  If true stop-word tokens are taken into
+     *                               account, too.
     *
-     * @return SearchDescription[] List of derived search descriptions.
+     * @return bool True, if search has a name.
     */
-    public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
+    public function hasName($bIncludeNonNames = false)
    {
-        // Only allow name terms.
-        if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) {
-            return array();
+        return !empty($this->aName)
+               || (!empty($this->aNameNonSearch) && $bIncludeNonNames);
+    }
+
+    /**
+     * Check if the search currently includes an address term.
+     *
+     * @return bool True, if any address term is included, including stop-word
+     *              terms.
+     */
+    public function hasAddress()
+    {
+        return !empty($this->aAddress) || !empty($this->aAddressNonSearch);
+    }
+
+    /**
+     * Check if a country restriction is currently included in the search.
+     *
+     * @return bool True, if a country restriction is set.
+     */
+    public function hasCountry()
+    {
+        return $this->sCountryCode !== '';
+    }
+
+    /**
+     * Check if a postcode is currently included in the search.
+     *
+     * @return bool True, if a postcode is set.
+     */
+    public function hasPostcode()
+    {
+        return $this->sPostcode !== '';
+    }
+
+    /**
+     * Check if a house number is set for the search.
+     *
+     * @return bool True, if a house number is set.
+     */
+    public function hasHousenumber()
+    {
+        return $this->sHouseNumber !== '';
+    }
+
+    /**
+     * Check if a special type of place is requested.
+     *
+     * param integer iOperator  When set, check for the particular
+     *                          operator used for the special type.
+     *
+     * @return bool True, if speial type is requested or, if requested,
+     *              a special type with the given operator.
+     */
+    public function hasOperator($iOperator = null)
+    {
+        return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator;
+    }
+
+    /**
+     * Add the given token to the list of terms to search for in the address.
+     *
+     * @param integer iID       ID of term to add.
+     * @param bool bSearchable  Term should be used to search for result
+     *                          (i.e. term is not a stop word).
+     */
+    public function addAddressToken($iId, $bSearchable = true)
+    {
+        if ($bSearchable) {
+            $this->aAddress[$iId] = $iId;
+        } else {
+            $this->aAddressNonSearch[$iId] = $iId;
        }
+    }

-        $aNewSearches = array();
-        $iWordID = $oSearchTerm->iId;
+    /**
+     * Add the given full-word token to the list of terms to search for in the
+     * name.
+     *
+     * @param integer iId    ID of term to add.
+     * @param bool bRareName  True if the term is infrequent enough to not
+     *                        require other constraints for efficient search.
+     */
+    public function addNameToken($iId, $bRareName)
+    {
+        $this->aName[$iId] = $iId;
+        $this->bRareName = $bRareName;
+        $this->bNameNeedsAddress = false;
+    }

-        if ((!$bStructuredPhrases || $iPhrase > 0)
-            && (!empty($this->aName))
-        ) {
-            $oSearch = clone $this;
-            $oSearch->iSearchRank++;
-            if (preg_match('#^[0-9 ]+$#', $sToken)) {
-                $oSearch->iSearchRank++;
-            }
-            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
-                $oSearch->aAddress[$iWordID] = $iWordID;
-            } else {
-                $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
-                if (!empty($aFullTokens)) {
-                    $oSearch->iSearchRank++;
-                }
-            }
-            $aNewSearches[] = $oSearch;
+    /**
+     * Add the given partial token to the list of terms to search for in
+     * the name.
+     *
+     * @param integer iID            ID of term to add.
+     * @param bool bSearchable       Term should be used to search for result
+     *                               (i.e. term is not a stop word).
+     * @param bool bNeedsAddress     True if the term is too unspecific to be used
+     *                               in a stand-alone search without an address
+     *                               to narrow down the search.
+     * @param integer iPhraseNumber  Index of phrase, where the partial term
+     *                               appears.
+     */
+    public function addPartialNameToken($iId, $bSearchable, $bNeedsAddress, $iPhraseNumber)
+    {
+        if (empty($this->aName)) {
+            $this->bNameNeedsAddress = $bNeedsAddress;
+        } elseif ($bSearchable && count($this->aName) >= 2) {
+            $this->bNameNeedsAddress = false;
+        } else {
+            $this->bNameNeedsAddress &= $bNeedsAddress;
        }
-
-        if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
-            && ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase)
-            && strpos($sToken, ' ') === false
-        ) {
-            $oSearch = clone $this;
-            $oSearch->iSearchRank++;
-            if (empty($this->aName) && empty($this->aNameNonSearch)) {
-                $oSearch->iSearchRank++;
-            }
-            if (preg_match('#^[0-9 ]+$#', $sToken)) {
-                $oSearch->iSearchRank++;
-            }
-            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
-                if (empty($this->aName)
-                    && CONST_Search_NameOnlySearchFrequencyThreshold
-                ) {
-                    $oSearch->bRareName =
-                        $oSearchTerm->iSearchNameCount
-                          < CONST_Search_NameOnlySearchFrequencyThreshold;
-                } else {
-                    $oSearch->bRareName = false;
-                }
-                $oSearch->aName[$iWordID] = $iWordID;
-            } else {
-                if (!empty($aFullTokens)) {
-                    $oSearch->iSearchRank++;
-                }
-                $oSearch->aNameNonSearch[$iWordID] = $iWordID;
-            }
-            $oSearch->iNamePhrase = $iPhrase;
-            $aNewSearches[] = $oSearch;
+        if ($bSearchable) {
+            $this->aName[$iId] = $iId;
+        } else {
+            $this->aNameNonSearch[$iId] = $iId;
        }
+        $this->iNamePhrase = $iPhraseNumber;
+    }

-        return $aNewSearches;
+    /**
+     * Set country restriction for the search.
+     *
+     * @param string sCountryCode  Country code of country to restrict search to.
+     */
+    public function setCountry($sCountryCode)
+    {
+        $this->sCountryCode = $sCountryCode;
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Set postcode search constraint.
+     *
+     * @param string sPostcode  Postcode the result should have.
+     */
+    public function setPostcode($sPostcode)
+    {
+        $this->sPostcode = $sPostcode;
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Make this search a search for a postcode object.
+     *
+     * @param integer iId       Token Id for the postcode.
+     * @param string sPostcode  Postcode to look for.
+     */
+    public function setPostcodeAsName($iId, $sPostcode)
+    {
+        $this->iOperator = Operator::POSTCODE;
+        $this->aAddress = array_merge($this->aAddress, $this->aName);
+        $this->aName = array($iId => $sPostcode);
+        $this->bRareName = true;
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Set house number search cnstraint.
+     *
+     * @param string sNumber  House number the result should have.
+     */
+    public function setHousenumber($sNumber)
+    {
+        $this->sHouseNumber = $sNumber;
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Make this search a search for a house number.
+     *
+     * @param integer iId  Token Id for the house number.
+     */
+    public function setHousenumberAsName($iId)
+    {
+        $this->aAddress = array_merge($this->aAddress, $this->aName);
+        $this->bRareName = false;
+        $this->bNameNeedsAddress = true;
+        $this->aName = array($iId => $iId);
+        $this->iNamePhrase = -1;
+    }
+
+    /**
+     * Make this search a POI search.
+     *
+     * In a POI search, objects are not (only) searched by their name
+     * but also by the primary OSM key/value pair (class and type in Nominatim).
+     *
+     * @param integer $iOperator Type of POI search
+     * @param string  $sClass    Class (or OSM tag key) of POI.
+     * @param string  $sType     Type (or OSM tag value) of POI.
+     *
+     * @return void
+     */
+    public function setPoiSearch($iOperator, $sClass, $sType)
+    {
+        $this->iOperator = $iOperator;
+        $this->sClass = $sClass;
+        $this->sType = $sType;
+        $this->iNamePhrase = -1;
+    }
+
+    public function getNamePhrase()
+    {
+        return $this->iNamePhrase;
+    }
+
+    /**
+     * Get the global search context.
+     *
+     * @return object  Objects of global search constraints.
+     */
+    public function getContext()
+    {
+        return $this->oContext;
    }

    /////////// Query functions
@@ -408,12 +387,11 @@ class SearchDescription
     *
     * @return mixed[] An array with two fields: IDs contains the list of
     *                 matching place IDs and houseNumber the houseNumber
-     *                 if appicable or -1 if not.
+     *                 if applicable or -1 if not.
     */
    public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
    {
        $aResults = array();
-        $iHousenumber = -1;

        if ($this->sCountryCode
            && empty($this->aName)
@@ -444,27 +422,6 @@ class SearchDescription
                $iLimit
            );

-            // Now search for housenumber, if housenumber provided. Can be zero.
-            if (($this->sHouseNumber || $this->sHouseNumber === '0') && !empty($aResults)) {
-                // Downgrade the rank of the street results, they are missing
-                // the housenumber.
-                foreach ($aResults as $oRes) {
-                    if ($oRes->iAddressRank >= 26) {
-                        $oRes->iResultRank++;
-                    } else {
-                        $oRes->iResultRank += 2;
-                    }
-                }
-
-                $aHnResults = $this->queryHouseNumber($oDB, $aResults);
-
-                if (!empty($aHnResults)) {
-                    foreach ($aHnResults as $oRes) {
-                        $aResults[$oRes->iId] = $oRes;
-                    }
-                }
-            }
-
            // finally get POIs if requested
            if ($this->sClass && !empty($aResults)) {
                $aResults = $this->queryPoiByOperator($oDB, $aResults, $iLimit);
@@ -610,36 +567,6 @@ class SearchDescription
        $aTerms = array();
        $aOrder = array();

-        // Sort by existence of the requested house number but only if not
-        // too many results are expected for the street, i.e. if the result
-        // will be narrowed down by an address. Remeber that with ordering
-        // every single result has to be checked.
-        if ($this->sHouseNumber && ($this->bRareName || !empty($this->aAddress) || $this->sPostcode)) {
-            $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
-            $aOrder[] = ' (';
-            $aOrder[0] .= 'EXISTS(';
-            $aOrder[0] .= '  SELECT place_id';
-            $aOrder[0] .= '  FROM placex';
-            $aOrder[0] .= '  WHERE parent_place_id = search_name.place_id';
-            $aOrder[0] .= "    AND housenumber ~* E'".$sHouseNumberRegex."'";
-            $aOrder[0] .= '  LIMIT 1';
-            $aOrder[0] .= ') ';
-            // also housenumbers from interpolation lines table are needed
-            if (preg_match('/[0-9]+/', $this->sHouseNumber)) {
-                $iHouseNumber = intval($this->sHouseNumber);
-                $aOrder[0] .= 'OR EXISTS(';
-                $aOrder[0] .= '  SELECT place_id ';
-                $aOrder[0] .= '  FROM location_property_osmline ';
-                $aOrder[0] .= '  WHERE parent_place_id = search_name.place_id';
-                $aOrder[0] .= '    AND startnumber is not NULL';
-                $aOrder[0] .= '    AND '.$iHouseNumber.'>=startnumber ';
-                $aOrder[0] .= '    AND '.$iHouseNumber.'<=endnumber ';
-                $aOrder[0] .= '  LIMIT 1';
-                $aOrder[0] .= ')';
-            }
-            $aOrder[0] .= ') DESC';
-        }
-
        if (!empty($this->aName)) {
            $aTerms[] = 'name_vector @> '.$oDB->getArraySQL($this->aName);
        }
@@ -670,7 +597,7 @@ class SearchDescription
            $aOrder[] = $this->oContext->distanceSQL('centroid');
        } elseif ($this->sPostcode) {
            if (empty($this->aAddress)) {
-                $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))";
+                $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.12))";
            } else {
                $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."')";
            }
@@ -685,10 +612,6 @@ class SearchDescription
            $aTerms[] = 'centroid && '.$this->oContext->sqlViewboxSmall;
        }

-        if ($this->oContext->hasNearPoint()) {
-            $aOrder[] = $this->oContext->distanceSQL('centroid');
-        }
-
        if ($this->sHouseNumber) {
            $sImportanceSQL = '- abs(26 - address_rank) + 3';
        } else {
@@ -711,119 +634,128 @@ class SearchDescription
            $sExactMatchSQL = '0::int as exactmatch';
        }

-        if ($this->sHouseNumber || $this->sClass) {
-            $iLimit = 40;
+        if (empty($aTerms)) {
+            return array();
        }

-        $aResults = array();
+        if ($this->hasHousenumber()) {
+            $sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M');

-        if (!empty($aTerms)) {
-            $sSQL = 'SELECT place_id, address_rank,'.$sExactMatchSQL;
+            // Housenumbers on streets and places.
+            $sPlacexSql = 'SELECT array_agg(place_id) FROM placex';
+            $sPlacexSql .= ' WHERE parent_place_id = sin.place_id AND sin.address_rank < 30';
+            $sPlacexSql .= $this->oContext->excludeSQL(' AND place_id');
+            $sPlacexSql .= '       and housenumber ~* E'.$sHouseNumberRegex;
+
+            // Interpolations on streets and places.
+            $sInterpolSql = 'null';
+            $sTigerSql = 'null';
+            if (preg_match('/^[0-9]+$/', $this->sHouseNumber)) {
+                $sIpolHnr = 'WHERE parent_place_id = sin.place_id ';
+                $sIpolHnr .= '  AND startnumber is not NULL AND sin.address_rank < 30';
+                $sIpolHnr .= '  AND '.$this->sHouseNumber.' between startnumber and endnumber';
+                $sIpolHnr .= '  AND ('.$this->sHouseNumber.' - startnumber) % step = 0';
+
+                $sInterpolSql = 'SELECT array_agg(place_id) FROM location_property_osmline '.$sIpolHnr;
+                if (CONST_Use_US_Tiger_Data) {
+                    $sTigerSql = 'SELECT array_agg(place_id) FROM location_property_tiger '.$sIpolHnr;
+                    $sTigerSql .= "      and sin.country_code = 'us'";
+                }
+            }
+
+            if ($this->sClass) {
+                $iLimit = 40;
+            }
+
+            $sSelfHnr = 'SELECT * FROM placex WHERE place_id = search_name.place_id';
+            $sSelfHnr .= '    AND housenumber ~* E'.$sHouseNumberRegex;
+
+            $aTerms[] = '(address_rank < 30 or exists('.$sSelfHnr.'))';
+
+
+            $sSQL = 'SELECT sin.*, ';
+            $sSQL .=        '('.$sPlacexSql.') as placex_hnr, ';
+            $sSQL .=        '('.$sInterpolSql.') as interpol_hnr, ';
+            $sSQL .=        '('.$sTigerSql.') as tiger_hnr ';
+            $sSQL .= ' FROM (';
+            $sSQL .= '    SELECT place_id, address_rank, country_code,'.$sExactMatchSQL.',';
+            $sSQL .= '            CASE WHEN importance = 0 OR importance IS NULL';
+            $sSQL .= '               THEN 0.75001-(search_rank::float/40) ELSE importance END as importance';
+            $sSQL .= '     FROM search_name';
+            $sSQL .= '     WHERE '.join(' and ', $aTerms);
+            $sSQL .= '     ORDER BY '.join(', ', $aOrder);
+            $sSQL .= '     LIMIT 40000';
+            $sSQL .= ') as sin';
+            $sSQL .= ' ORDER BY address_rank = 30 desc, placex_hnr, interpol_hnr, tiger_hnr,';
+            $sSQL .= '          importance';
+            $sSQL .= ' LIMIT '.$iLimit;
+        } else {
+            if ($this->sClass) {
+                $iLimit = 40;
+            }
+
+            $sSQL = 'SELECT place_id, address_rank, '.$sExactMatchSQL;
            $sSQL .= ' FROM search_name';
            $sSQL .= ' WHERE '.join(' and ', $aTerms);
            $sSQL .= ' ORDER BY '.join(', ', $aOrder);
            $sSQL .= ' LIMIT '.$iLimit;
-
-            Debug::printSQL($sSQL);
-
-            $aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.');
-
-            foreach ($aDBResults as $aResult) {
-                $oResult = new Result($aResult['place_id']);
-                $oResult->iExactMatches = $aResult['exactmatch'];
-                $oResult->iAddressRank = $aResult['address_rank'];
-                $aResults[$aResult['place_id']] = $oResult;
-            }
        }

-        return $aResults;
-    }
-
-    private function queryHouseNumber(&$oDB, $aRoadPlaceIDs)
-    {
-        $aResults = array();
-        $sPlaceIDs = Result::joinIdsByTable($aRoadPlaceIDs, Result::TABLE_PLACEX);
-
-        if (!$sPlaceIDs) {
-            return $aResults;
-        }
-
-        $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
-        $sSQL = 'SELECT place_id FROM placex ';
-        $sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
-        $sSQL .= "  AND housenumber ~* E'".$sHouseNumberRegex."'";
-        $sSQL .= $this->oContext->excludeSQL(' AND place_id');
-
        Debug::printSQL($sSQL);

-        // XXX should inherit the exactMatches from its parent
-        foreach ($oDB->getCol($sSQL) as $iPlaceId) {
-            $aResults[$iPlaceId] = new Result($iPlaceId);
-        }
+        $aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.');

-        $bIsIntHouseNumber= (bool) preg_match('/[0-9]+/', $this->sHouseNumber);
-        $iHousenumber = intval($this->sHouseNumber);
-        if ($bIsIntHouseNumber && empty($aResults)) {
-            // if nothing found, search in the interpolation line table
-            $sSQL = 'SELECT distinct place_id FROM location_property_osmline';
-            $sSQL .= ' WHERE startnumber is not NULL';
-            $sSQL .= '  AND parent_place_id in ('.$sPlaceIDs.') AND (';
-            if ($iHousenumber % 2 == 0) {
-                // If housenumber is even, look for housenumber in streets
-                // with interpolationtype even or all.
-                $sSQL .= "interpolationtype='even'";
-            } else {
-                // Else look for housenumber with interpolationtype odd or all.
-                $sSQL .= "interpolationtype='odd'";
+        $aResults = array();
+
+        foreach ($aDBResults as $aResult) {
+            $oResult = new Result($aResult['place_id']);
+            $oResult->iExactMatches = $aResult['exactmatch'];
+            $oResult->iAddressRank = $aResult['address_rank'];
+
+            $bNeedResult = true;
+            if ($this->hasHousenumber() && $aResult['address_rank'] < 30) {
+                if ($aResult['placex_hnr']) {
+                    foreach (explode(',', substr($aResult['placex_hnr'], 1, -1)) as $sPlaceID) {
+                        $iPlaceID = intval($sPlaceID);
+                        $oHnrResult = new Result($iPlaceID);
+                        $oHnrResult->iExactMatches = $aResult['exactmatch'];
+                        $oHnrResult->iAddressRank = 30;
+                        $aResults[$iPlaceID] = $oHnrResult;
+                        $bNeedResult = false;
+                    }
+                }
+                if ($aResult['interpol_hnr']) {
+                    foreach (explode(',', substr($aResult['interpol_hnr'], 1, -1)) as $sPlaceID) {
+                        $iPlaceID = intval($sPlaceID);
+                        $oHnrResult = new Result($iPlaceID, Result::TABLE_OSMLINE);
+                        $oHnrResult->iExactMatches = $aResult['exactmatch'];
+                        $oHnrResult->iAddressRank = 30;
+                        $oHnrResult->iHouseNumber = intval($this->sHouseNumber);
+                        $aResults[$iPlaceID] = $oHnrResult;
+                        $bNeedResult = false;
+                    }
+                }
+                if ($aResult['tiger_hnr']) {
+                    foreach (explode(',', substr($aResult['tiger_hnr'], 1, -1)) as $sPlaceID) {
+                        $iPlaceID = intval($sPlaceID);
+                        $oHnrResult = new Result($iPlaceID, Result::TABLE_TIGER);
+                        $oHnrResult->iExactMatches = $aResult['exactmatch'];
+                        $oHnrResult->iAddressRank = 30;
+                        $oHnrResult->iHouseNumber = intval($this->sHouseNumber);
+                        $aResults[$iPlaceID] = $oHnrResult;
+                        $bNeedResult = false;
+                    }
+                }
+
+                if ($aResult['address_rank'] < 26) {
+                    $oResult->iResultRank += 2;
+                } else {
+                    $oResult->iResultRank++;
+                }
            }
-            $sSQL .= " or interpolationtype='all') and ";
-            $sSQL .= $iHousenumber.'>=startnumber and ';
-            $sSQL .= $iHousenumber.'<=endnumber';
-            $sSQL .= $this->oContext->excludeSQL(' AND place_id');

-            Debug::printSQL($sSQL);
-
-            foreach ($oDB->getCol($sSQL) as $iPlaceId) {
-                $oResult = new Result($iPlaceId, Result::TABLE_OSMLINE);
-                $oResult->iHouseNumber = $iHousenumber;
-                $aResults[$iPlaceId] = $oResult;
-            }
-        }
-
-        // If nothing found try the aux fallback table
-        if (CONST_Use_Aux_Location_data && empty($aResults)) {
-            $sSQL = 'SELECT place_id FROM location_property_aux';
-            $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')';
-            $sSQL .= " AND housenumber = '".$this->sHouseNumber."'";
-            $sSQL .= $this->oContext->excludeSQL(' AND place_id');
-
-            Debug::printSQL($sSQL);
-
-            foreach ($oDB->getCol($sSQL) as $iPlaceId) {
-                $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX);
-            }
-        }
-
-        // If nothing found then search in Tiger data (location_property_tiger)
-        if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && empty($aResults)) {
-            $sSQL = 'SELECT place_id FROM location_property_tiger';
-            $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.') and (';
-            if ($iHousenumber % 2 == 0) {
-                $sSQL .= "interpolationtype='even'";
-            } else {
-                $sSQL .= "interpolationtype='odd'";
-            }
-            $sSQL .= " or interpolationtype='all') and ";
-            $sSQL .= $iHousenumber.'>=startnumber and ';
-            $sSQL .= $iHousenumber.'<=endnumber';
-            $sSQL .= $this->oContext->excludeSQL(' AND place_id');
-
-            Debug::printSQL($sSQL);
-
-            foreach ($oDB->getCol($sSQL) as $iPlaceId) {
-                $oResult = new Result($iPlaceId, Result::TABLE_TIGER);
-                $oResult->iHouseNumber = $iHousenumber;
-                $aResults[$iPlaceId] = $oResult;
+            if ($bNeedResult) {
+                $aResults[$aResult['place_id']] = $oResult;
            }
        }

@@ -875,6 +807,7 @@ class SearchDescription
                $sSQL = 'SELECT geometry FROM placex';
                $sSQL .= " WHERE place_id in ($sPlaceIDs)";
                $sSQL .= "   AND rank_search < $iMaxRank + 5";
+                $sSQL .= '   AND ST_Area(Box2d(geometry)) < 20';
                $sSQL .= "   AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')";
                $sSQL .= ' ORDER BY rank_search ASC ';
                $sSQL .= ' LIMIT 1';
--- a/lib-php/SearchPosition.php
+++ b/lib-php/SearchPosition.php
@@ -0,0 +1,95 @@
+<?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */
+
+namespace Nominatim;
+
+/**
+ * Description of the position of a token within a query.
+ */
+class SearchPosition
+{
+    private $sPhraseType;
+
+    private $iPhrase;
+    private $iNumPhrases;
+
+    private $iToken;
+    private $iNumTokens;
+
+
+    public function __construct($sPhraseType, $iPhrase, $iNumPhrases)
+    {
+        $this->sPhraseType = $sPhraseType;
+        $this->iPhrase = $iPhrase;
+        $this->iNumPhrases = $iNumPhrases;
+    }
+
+    public function setTokenPosition($iToken, $iNumTokens)
+    {
+        $this->iToken = $iToken;
+        $this->iNumTokens = $iNumTokens;
+    }
+
+    /**
+     * Check if the phrase can be of the given type.
+     *
+     * @param string  $sType  Type of phrse requested.
+     *
+     * @return True if the phrase is untyped or of the given type.
+     */
+    public function maybePhrase($sType)
+    {
+        return $this->sPhraseType == '' || $this->sPhraseType == $sType;
+    }
+
+    /**
+     * Check if the phrase is exactly of the given type.
+     *
+     * @param string  $sType  Type of phrse requested.
+     *
+     * @return True if the phrase of the given type.
+     */
+    public function isPhrase($sType)
+    {
+        return $this->sPhraseType == $sType;
+    }
+
+    /**
+     * Return true if the token is the very first in the query.
+     */
+    public function isFirstToken()
+    {
+        return $this->iPhrase == 0 && $this->iToken == 0;
+    }
+
+    /**
+     * Check if the token is the final one in the query.
+     */
+    public function isLastToken()
+    {
+        return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases;
+    }
+
+    /**
+     * Check if the current token is part of the first phrase in the query.
+     */
+    public function isFirstPhrase()
+    {
+        return $this->iPhrase == 0;
+    }
+
+    /**
+     * Get the phrase position in the query.
+     */
+    public function getPhrase()
+    {
+        return $this->iPhrase;
+    }
+}
--- a/lib-php/Shell.php
+++ b/lib-php/Shell.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -33,7 +41,9 @@ class Shell
    public function addEnvPair($sKey, $sVal)
    {
        if (isset($sKey) && $sKey && isset($sVal)) {
-            if (!isset($this->aEnv)) $this->aEnv = $_ENV;
+            if (!isset($this->aEnv)) {
+                $this->aEnv = $_ENV;
+            }
            $this->aEnv = array_merge($this->aEnv, array($sKey => $sVal), $_ENV);
        }
        return $this;
@@ -75,11 +85,8 @@ class Shell
        return $iStat;
    }

-
-
    private function escapeParam($sParam)
    {
-        if (preg_match('/^-*\w+$/', $sParam)) return $sParam;
-        return escapeshellarg($sParam);
+        return (preg_match('/^-*\w+$/', $sParam)) ? $sParam : escapeshellarg($sParam);
    }
 }
--- a/lib-php/SimpleWordList.php
+++ b/lib-php/SimpleWordList.php
@@ -0,0 +1,144 @@
+<?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */
+
+namespace Nominatim;
+
+/**
+ * A word list creator based on simple splitting by space.
+ *
+ * Creates possible permutations of split phrases by finding all combination
+ * of splitting the phrase on space boundaries.
+ */
+class SimpleWordList
+{
+    const MAX_WORDSET_LEN = 20;
+    const MAX_WORDSETS = 100;
+
+    // The phrase as a list of simple terms (without spaces).
+    private $aWords;
+
+    /**
+     * Create a new word list
+     *
+     * @param string sPhrase  Phrase to create the word list from. The phrase is
+     *                        expected to be normalised, so that there are no
+     *                        subsequent spaces.
+     */
+    public function __construct($sPhrase)
+    {
+        if (strlen($sPhrase) > 0) {
+            $this->aWords = explode(' ', $sPhrase);
+        } else {
+            $this->aWords = array();
+        }
+    }
+
+    /**
+     * Get all possible tokens that are present in this word list.
+     *
+     * @return array The list of string tokens in the word list.
+     */
+    public function getTokens()
+    {
+        $aTokens = array();
+        $iNumWords = count($this->aWords);
+
+        for ($i = 0; $i < $iNumWords; $i++) {
+            $sPhrase = $this->aWords[$i];
+            $aTokens[$sPhrase] = $sPhrase;
+
+            for ($j = $i + 1; $j < $iNumWords; $j++) {
+                $sPhrase .= ' '.$this->aWords[$j];
+                $aTokens[$sPhrase] = $sPhrase;
+            }
+        }
+
+        return $aTokens;
+    }
+
+    /**
+     * Compute all possible permutations of phrase splits that result in
+     * words which are in the token list.
+     */
+    public function getWordSets($oTokens)
+    {
+        $iNumWords = count($this->aWords);
+
+        if ($iNumWords == 0) {
+            return null;
+        }
+
+        // Caches the word set for the partial phrase up to word i.
+        $aSetCache = array_fill(0, $iNumWords, array());
+
+        // Initialise first element of cache. There can only be the word.
+        if ($oTokens->containsAny($this->aWords[0])) {
+            $aSetCache[0][] = array($this->aWords[0]);
+        }
+
+        // Now do the next elements using what we already have.
+        for ($i = 1; $i < $iNumWords; $i++) {
+            for ($j = $i; $j > 0; $j--) {
+                $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
+                if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
+                    $aPartial = array($sPartial);
+                    foreach ($aSetCache[$j - 1] as $aSet) {
+                        if (count($aSet) < SimpleWordList::MAX_WORDSET_LEN) {
+                            $aSetCache[$i][] = array_merge($aSet, $aPartial);
+                        }
+                    }
+                    if (count($aSetCache[$i]) > 2 * SimpleWordList::MAX_WORDSETS) {
+                        usort(
+                            $aSetCache[$i],
+                            array('\Nominatim\SimpleWordList', 'cmpByArraylen')
+                        );
+                        $aSetCache[$i] = array_slice(
+                            $aSetCache[$i],
+                            0,
+                            SimpleWordList::MAX_WORDSETS
+                        );
+                    }
+                }
+            }
+
+            // finally the current full phrase
+            $sPartial = $this->aWords[0].' '.$sPartial;
+            if ($oTokens->containsAny($sPartial)) {
+                $aSetCache[$i][] = array($sPartial);
+            }
+        }
+
+        $aWordSets = $aSetCache[$iNumWords - 1];
+        usort($aWordSets, array('\Nominatim\SimpleWordList', 'cmpByArraylen'));
+        return array_slice($aWordSets, 0, SimpleWordList::MAX_WORDSETS);
+    }
+
+    /**
+     * Custom search routine which takes two arrays. The array with the fewest
+     * items wins. If same number of items then the one with the longest first
+     * element wins.
+     */
+    public static function cmpByArraylen($aA, $aB)
+    {
+        $iALen = count($aA);
+        $iBLen = count($aB);
+
+        if ($iALen == $iBLen) {
+            return strlen($aB[0]) <=> strlen($aA[0]);
+        }
+
+        return ($iALen < $iBLen) ? -1 : 1;
+    }
+
+    public function debugInfo()
+    {
+        return $this->aWords;
+    }
+}
--- a/lib-php/SpecialSearchOperator.php
+++ b/lib-php/SpecialSearchOperator.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

--- a/lib-php/Status.php
+++ b/lib-php/Status.php
@@ -1,7 +1,17 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

+require_once(CONST_TokenizerDir.'/tokenizer.php');
+
 use Exception;

 class Status
@@ -25,24 +35,8 @@ class Status
            throw new Exception('Database connection failed', 700);
        }

-        $sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
-        if ($sStandardWord === false) {
-            throw new Exception('Module failed', 701);
-        }
-
-        if ($sStandardWord != 'a') {
-            throw new Exception('Module call failed', 702);
-        }
-
-        $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, ';
-        $sSQL .= "operator, search_name_count FROM word WHERE word_token IN (' a')";
-        $iWordID = $this->oDB->getOne($sSQL);
-        if ($iWordID === false) {
-            throw new Exception('Query failed', 703);
-        }
-        if (!$iWordID) {
-            throw new Exception('No value', 704);
-        }
+        $oTokenizer = new \Nominatim\Tokenizer($this->oDB);
+        $oTokenizer->checkStatus();
    }

    public function dataDate()
@@ -51,7 +45,7 @@ class Status
        $iDataDateEpoch = $this->oDB->getOne($sSQL);

        if ($iDataDateEpoch === false) {
-            throw Exception('Data date query failed '.$iDataDateEpoch->getMessage(), 705);
+            throw new Exception('Import date is not available', 705);
        }

        return $iDataDateEpoch;
--- a/lib-php/TokenCountry.php
+++ b/lib-php/TokenCountry.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim\Token;

@@ -8,9 +16,9 @@ namespace Nominatim\Token;
 class Country
 {
    /// Database word id, if available.
-    public $iId;
+    private $iId;
    /// Two-letter country code (lower-cased).
-    public $sCountryCode;
+    private $sCountryCode;

    public function __construct($iId, $sCountryCode)
    {
@@ -18,6 +26,46 @@ class Country
        $this->sCountryCode = $sCountryCode;
    }

+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Check if the token can be added to the given search.
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return True if the token is compatible with the search configuration
+     *         given the position.
+     */
+    public function isExtendable($oSearch, $oPosition)
+    {
+        return !$oSearch->hasCountry()
+               && $oPosition->maybePhrase('country')
+               && $oSearch->getContext()->isCountryApplicable($this->sCountryCode);
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $oNewSearch = $oSearch->clone($oPosition->isLastToken() ? 1 : 6);
+        $oNewSearch->setCountry($this->sCountryCode);
+
+        return array($oNewSearch);
+    }
+
    public function debugInfo()
    {
        return array(
@@ -26,4 +74,9 @@ class Country
                'Info' => $this->sCountryCode
               );
    }
+
+    public function debugCode()
+    {
+        return 'C';
+    }
 }
--- a/lib-php/TokenHousenumber.php
+++ b/lib-php/TokenHousenumber.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim\Token;

@@ -8,9 +16,9 @@ namespace Nominatim\Token;
 class HouseNumber
 {
    /// Database word id, if available.
-    public $iId;
+    private $iId;
    /// Normalized house number.
-    public $sToken;
+    private $sToken;

    public function __construct($iId, $sToken)
    {
@@ -18,6 +26,80 @@ class HouseNumber
        $this->sToken = $sToken;
    }

+    public function getId()
+    {
+        return $this->iId;
+    }
+
+    /**
+     * Check if the token can be added to the given search.
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return True if the token is compatible with the search configuration
+     *         given the position.
+     */
+    public function isExtendable($oSearch, $oPosition)
+    {
+        return !$oSearch->hasHousenumber()
+               && !$oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
+               && $oPosition->maybePhrase('street');
+    }
+
+    /**
+     * Derive new searches by adding this token to an existing search.
+     *
+     * @param object  $oSearch      Partial search description derived so far.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
+     *
+     * @return SearchDescription[] List of derived search descriptions.
+     */
+    public function extendSearch($oSearch, $oPosition)
+    {
+        $aNewSearches = array();
+
+        // sanity check: if the housenumber is not mainly made
+        // up of numbers, add a penalty
+        $iSearchCost = 1;
+        if (preg_match('/\\d/', $this->sToken) === 0
+            || preg_match_all('/[^0-9 ]/', $this->sToken, $aMatches) > 3) {
+            $iSearchCost += strlen($this->sToken) - 1;
+        }
+        if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) {
+            $iSearchCost++;
+        }
+        if (empty($this->iId)) {
+            $iSearchCost++;
+        }
+        // also must not appear in the middle of the address
+        if ($oSearch->hasAddress() || $oSearch->hasPostcode()) {
+            $iSearchCost++;
+        }
+
+        $oNewSearch = $oSearch->clone($iSearchCost);
+        $oNewSearch->setHousenumber($this->sToken);
+        $aNewSearches[] = $oNewSearch;
+
+        // Housenumbers may appear in the name when the place has its own
+        // address terms.
+        if ($this->iId !== null
+            && ($oSearch->getNamePhrase() >= 0 || !$oSearch->hasName())
+            && !$oSearch->hasAddress()
+        ) {
+            $oNewSearch = $oSearch->clone($iSearchCost);
+            $oNewSearch->setHousenumberAsName($this->iId);
+
+            $aNewSearches[] = $oNewSearch;
+        }
+
+        return $aNewSearches;
+    }
+
+
    public function debugInfo()
    {
        return array(
@@ -26,4 +108,9 @@ class HouseNumber
                'Info' => array('nr' => $this->sToken)
               );
    }
+
+    public function debugCode()
+    {
+        return 'H';
+    }
 }
--- a/lib-php/TokenList.php
+++ b/lib-php/TokenList.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -7,6 +15,7 @@ require_once(CONST_LibDir.'/TokenHousenumber.php');
 require_once(CONST_LibDir.'/TokenPostcode.php');
 require_once(CONST_LibDir.'/TokenSpecialTerm.php');
 require_once(CONST_LibDir.'/TokenWord.php');
+require_once(CONST_LibDir.'/TokenPartial.php');
 require_once(CONST_LibDir.'/SpecialSearchOperator.php');

 /**
@@ -17,15 +26,6 @@ require_once(CONST_LibDir.'/SpecialSearchOperator.php');
 * tokens do not have a common base class. All tokens need to have a field
 * with the word id that points to an entry in the `word` database table
 * but otherwise the information saved about a token can be very different.
- *
- * There are two different kinds of token words: full words and partial terms.
- *
- * Full words start with a space. They represent a complete name of a place.
- * All special tokens are normally full words.
- *
- * Partial terms have no space at the beginning. They may represent a part of
- * a name of a place (e.g. in the name 'World Trade Center' a partial term
- * would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
 */
 class TokenList
 {
@@ -64,7 +64,7 @@ class TokenList
     */
    public function containsAny($sWord)
    {
-        return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]);
+        return isset($this->aTokens[$sWord]);
    }

    /**
@@ -86,8 +86,8 @@ class TokenList

        foreach ($this->aTokens as $aTokenList) {
            foreach ($aTokenList as $oToken) {
-                if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
-                    $ids[$oToken->iId] = $oToken->iId;
+                if (is_a($oToken, '\Nominatim\Token\Word')) {
+                    $ids[$oToken->getId()] = $oToken->getId();
                }
            }
        }
@@ -95,88 +95,6 @@ class TokenList
        return $ids;
    }

-    /**
-     * Add token information from the word table in the database.
-     *
-     * @param object   $oDB           Nominatim::DB instance.
-     * @param string[] $aTokens       List of tokens to look up in the database.
-     * @param string[] $aCountryCodes List of country restrictions.
-     * @param string   $sNormQuery    Normalized query string.
-     * @param object   $oNormalizer   Normalizer function to use on tokens.
-     *
-     * @return void
-     */
-    public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
-    {
-        // Check which tokens we have, get the ID numbers
-        $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
-        $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
-        $sSQL .= ' FROM word WHERE word_token in (';
-        $sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
-
-        Debug::printSQL($sSQL);
-
-        $aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
-
-        foreach ($aDBWords as $aWord) {
-            $oToken = null;
-            $iId = (int) $aWord['word_id'];
-
-            if ($aWord['class']) {
-                // Special terms need to appear in their normalized form.
-                if ($aWord['word']) {
-                    $sNormWord = $aWord['word'];
-                    if ($oNormalizer != null) {
-                        $sNormWord = $oNormalizer->transliterate($aWord['word']);
-                    }
-                    if (strpos($sNormQuery, $sNormWord) === false) {
-                        continue;
-                    }
-                }
-
-                if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
-                    $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
-                } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
-                    if ($aWord['word']
-                        && pg_escape_string($aWord['word']) == $aWord['word']
-                    ) {
-                        $oToken = new Token\Postcode(
-                            $iId,
-                            $aWord['word'],
-                            $aWord['country_code']
-                        );
-                    }
-                } else {
-                    // near and in operator the same at the moment
-                    $oToken = new Token\SpecialTerm(
-                        $iId,
-                        $aWord['class'],
-                        $aWord['type'],
-                        $aWord['operator'] ? Operator::NEAR : Operator::NONE
-                    );
-                }
-            } elseif ($aWord['country_code']) {
-                // Filter country tokens that do not match restricted countries.
-                if (!$aCountryCodes
-                    || in_array($aWord['country_code'], $aCountryCodes)
-                ) {
-                    $oToken = new Token\Country($iId, $aWord['country_code']);
-                }
-            } else {
-                $oToken = new Token\Word(
-                    $iId,
-                    $aWord['word_token'][0] != ' ',
-                    (int) $aWord['count'],
-                    substr_count($aWord['word_token'], ' ')
-                );
-            }
-
-            if ($oToken) {
-                $this->addToken($aWord['word_token'], $oToken);
-            }
-        }
-    }
-
    /**
     * Add a new token for the given word.
     *
@@ -199,9 +117,9 @@ class TokenList
        $aWordsIDs = array();
        foreach ($this->aTokens as $sToken => $aWords) {
            foreach ($aWords as $aToken) {
-                if ($aToken->iId !== null) {
-                    $aWordsIDs[$aToken->iId] =
-                        '#'.$sToken.'('.$aToken->iId.')#';
+                $iId = $aToken->getId();
+                if ($iId !== null) {
+                    $aWordsIDs[$iId] = '#'.$sToken.'('.$aToken->debugCode().' '.$iId.')#';
                }
            }
        }
--- a/Show More
+++ b/Show More