prepare release 4.1.1

update osm2pgsql to 1.7.1
drop illegal values for addr:interpolation on update
2026-02-14 10:27:57 +00:00 · 2022-11-19 16:15:47 +01:00 · 2022-11-19 15:54:27 +01:00 · 2022-11-19 15:53:29 +01:00 · 2022-11-19 15:52:19 +01:00 · 2022-11-19 15:51:09 +01:00
4060 changed files with 92854 additions and 33767 deletions
--- a/.github/ISSUE_TEMPLATE/config.yml
+++ b/.github/ISSUE_TEMPLATE/config.yml
@@ -0,0 +1,4 @@
+contact_links:
+  - name: Nominatim Discussions
+    url: https://github.com/osm-search/Nominatim/discussions
+    about: Ask questions, get support, share ideas and discuss with community members.
--- a/.github/ISSUE_TEMPLATE/feature_request.md
+++ b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,22 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+<!-- Before opening a new feature request, please search through the open issue to check that your request hasn't been reported already. -->
+
+**Is your feature request related to a problem? Please describe.**
+<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
+
+**Describe the solution you'd like**
+<!-- A clear and concise description of what you want to happen. -->
+
+**Describe alternatives you've considered**
+<!-- A clear and concise description of any alternative solutions or features you've considered. -->
+
+**Additional context**
+<!-- Add any other context or screenshots about the feature request here. -->
--- a/.github/ISSUE_TEMPLATE/report-issues-with-search-results.md
+++ b/.github/ISSUE_TEMPLATE/report-issues-with-search-results.md
@@ -0,0 +1,39 @@
+---
+name: Report issues with search results
+about: You have searched something with Nominatim and did not get the expected result.
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+<!-- Note: this template is for reporting problems with searching. If you have found an issue with the data, you need to report/fix the issue directly in OpenStreetMap. See https://www.openstreetmap.org/fixthemap for details. -->
+
+## What did you search for?
+
+<!-- Please try to provide a link to your search. You  can go to https://nominatim.openstreetmap.org and repeat your search there. If you originally found the issue somewhere else, please tell us what software/website you were using. -->
+
+## What result did you get?
+
+## What result did you expect?
+
+**When the result in the right place and just named wrongly:** 
+
+<!-- Please tell us the display name you expected. -->
+
+**When the result missing completely:**
+
+<!-- Make sure that the data you are looking for is in OpenStreetMap. Provide a link to the OpenStreetMap object or if you cannot get it, a link to the map on https://openstreetmap.org where you expect the result to be.
+
+To get the link to the OSM object, you can try the following:
+
+ * Go to [https://openstreetmap.org](https://openstreetmap.org).
+ * Move to the area of the map where you expect the result and then zoom in as much as possible.
+ * Click on the question mark on the right side of the map. You get a question cursor. Use it to click on the map where your object is located.
+ * Find the object of interest in the list that appears on the left side.
+ * Click on the object and report back the URL that the browser shows.
+-->
+
+## Further details
+
+<!-- Anything else we should know about the search. Particularities with addresses in the area etc. -->
--- a/.github/ISSUE_TEMPLATE/report-problems-with-the-software.md
+++ b/.github/ISSUE_TEMPLATE/report-problems-with-the-software.md
@@ -0,0 +1,36 @@
+---
+name: Report problems with the software
+about: You have your own installation of Nominatim and found a bug.
+title: ''
+labels: ''
+assignees: ''
+
+---
+
+<!-- Note: if you are installing Nominatim through a docker image, you should report issues with the installation process with the docker repository first. -->
+
+**Describe the bug**
+<!-- A clear and concise description of what the bug is. -->
+
+**To Reproduce**
+<!-- Please describe what you did to get to the issue. -->
+
+**Software Environment (please complete the following information):**
+- Nominatim version: 
+- Postgresql version: 
+- Postgis version:
+- OS: 
+
+**Hardware Configuration (please complete the following information):**
+- RAM: 
+- number of CPUs:
+- type and size of disks:
+- bare metal/AWS/other cloud service: 
+
+**Postgresql Configuration:**
+
+<!-- List any configuration items you changed in your postgresql configuration. -->
+
+**Additional context**
+
+<!-- Add any other context about the problem here. -->
--- a/.github/actions/build-nominatim/action.yml
+++ b/.github/actions/build-nominatim/action.yml
@@ -0,0 +1,46 @@
+name: 'Build Nominatim'
+
+inputs:
+    ubuntu:
+        description: 'Version of Ubuntu to install on'
+        required: false
+        default: '20'
+    cmake-args:
+        description: 'Additional options to hand to cmake'
+        required: false
+        default: ''
+
+runs:
+    using: "composite"
+
+    steps:
+        - name: Clean out the disk
+          run: |
+            sudo rm -rf /opt/hostedtoolcache/go /opt/hostedtoolcache/CodeQL /usr/lib/jvm /usr/local/share/chromium /usr/local/lib/android
+            df -h
+          shell: bash
+        - name: Install prerequisites
+          run: |
+            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev
+            if [ "x$UBUNTUVER" == "x18" ]; then
+                pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 datrie
+            else
+                sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
+            fi
+          shell: bash
+          env:
+            UBUNTUVER: ${{ inputs.ubuntu }}
+            CMAKE_ARGS: ${{ inputs.cmake-args }}
+
+        - name: Configure
+          run: mkdir build && cd build && cmake $CMAKE_ARGS ../Nominatim
+          shell: bash
+          env:
+            CMAKE_ARGS: ${{ inputs.cmake-args }}
+
+        - name: Build
+          run: |
+              make -j2 all
+              sudo make install
+          shell: bash
+          working-directory: build
--- a/.github/actions/setup-postgresql/action.yml
+++ b/.github/actions/setup-postgresql/action.yml
@@ -0,0 +1,47 @@
+name: 'Setup Postgresql and Postgis'
+
+inputs:
+    postgresql-version:
+        description: 'Version of PostgreSQL to install'
+        required: true
+    postgis-version:
+        description: 'Version of Postgis to install'
+        required: true
+
+runs:
+    using: "composite"
+
+    steps:
+        - name: Remove existing PostgreSQL
+          run: |
+              sudo apt-get purge -yq postgresql*
+              sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
+              sudo apt-get update -qq
+
+          shell: bash
+
+        - name: Install PostgreSQL
+          run: |
+              sudo apt-get install -y -qq --no-install-suggests --no-install-recommends postgresql-client-${PGVER} postgresql-${PGVER}-postgis-${POSTGISVER} postgresql-${PGVER}-postgis-${POSTGISVER}-scripts postgresql-contrib-${PGVER} postgresql-${PGVER}
+          shell: bash
+          env:
+              PGVER: ${{ inputs.postgresql-version }}
+              POSTGISVER: ${{ inputs.postgis-version }}
+
+        - name: Adapt postgresql configuration
+          run: |
+              echo 'fsync = off' | sudo tee /etc/postgresql/${PGVER}/main/conf.d/local.conf
+              echo 'synchronous_commit = off' | sudo tee -a /etc/postgresql/${PGVER}/main/conf.d/local.conf
+              echo 'full_page_writes = off' | sudo tee -a /etc/postgresql/${PGVER}/main/conf.d/local.conf
+              echo 'shared_buffers = 1GB' | sudo tee -a /etc/postgresql/${PGVER}/main/conf.d/local.conf
+              echo 'port = 5432' | sudo tee -a /etc/postgresql/${PGVER}/main/conf.d/local.conf
+          shell: bash
+          env:
+              PGVER: ${{ inputs.postgresql-version }}
+
+        - name: Setup database
+          run: |
+              sudo systemctl restart postgresql
+              sudo -u postgres createuser -S www-data
+              sudo -u postgres createuser -s runner
+          shell: bash
--- a/.github/workflows/ci-tests.yml
+++ b/.github/workflows/ci-tests.yml
@@ -0,0 +1,312 @@
+name: CI Tests
+
+on: [ push, pull_request ]
+
+jobs:
+    create-archive:
+        runs-on: ubuntu-latest
+
+        steps:
+            - uses: actions/checkout@v3
+              with:
+                submodules: true
+
+            - uses: actions/cache@v3
+              with:
+                  path: |
+                     data/country_osm_grid.sql.gz
+                  key: nominatim-country-data-1
+
+            - name: Package tarball
+              run: |
+                  if [ ! -f data/country_osm_grid.sql.gz ]; then
+                      wget --no-verbose -O data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
+                  fi
+                  cd ..
+                  tar czf nominatim-src.tar.bz2 Nominatim
+                  mv nominatim-src.tar.bz2 Nominatim
+
+            - name: 'Upload Artifact'
+              uses: actions/upload-artifact@v3
+              with:
+                  name: full-source
+                  path: nominatim-src.tar.bz2
+                  retention-days: 1
+
+    tests:
+        needs: create-archive
+        strategy:
+            matrix:
+                ubuntu: [18, 20, 22]
+                include:
+                    - ubuntu: 18
+                      postgresql: 9.6
+                      postgis: 2.5
+                      pytest: pytest
+                      php: 7.2
+                    - ubuntu: 20
+                      postgresql: 13
+                      postgis: 3
+                      pytest: py.test-3
+                      php: 7.4
+                    - ubuntu: 22
+                      postgresql: 14
+                      postgis: 3
+                      pytest: py.test-3
+                      php: 8.1
+
+        runs-on: ubuntu-${{ matrix.ubuntu }}.04
+
+        steps:
+            - uses: actions/download-artifact@v3
+              with:
+                  name: full-source
+
+            - name: Unpack Nominatim
+              run: tar xf nominatim-src.tar.bz2
+
+            - name: Setup PHP
+              uses: shivammathur/setup-php@v2
+              with:
+                  php-version: ${{ matrix.php }}
+                  tools: phpunit, phpcs, composer
+                  ini-values: opcache.jit=disable
+
+            - uses: actions/setup-python@v4
+              with:
+                python-version: 3.6
+              if: matrix.ubuntu == 18
+
+            - uses: ./Nominatim/.github/actions/setup-postgresql
+              with:
+                  postgresql-version: ${{ matrix.postgresql }}
+                  postgis-version: ${{ matrix.postgis }}
+
+            - uses: ./Nominatim/.github/actions/build-nominatim
+              with:
+                  ubuntu: ${{ matrix.ubuntu }}
+
+            - name: Install test prerequsites
+              run: sudo apt-get install -y -qq python3-pytest python3-behave
+              if: matrix.ubuntu == 20
+
+            - name: Install test prerequsites
+              run: pip3 install pylint pytest behave==1.2.6
+              if: ${{ (matrix.ubuntu == 18) || (matrix.ubuntu == 22) }}
+
+            - name: Install test prerequsites
+              run: sudo apt-get install -y -qq python3-pytest
+              if: matrix.ubuntu == 22
+
+            - name: Install latest pylint/mypy
+              run: pip3 install -U pylint mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil types-requests typing-extensions
+
+            - name: PHP linting
+              run: phpcs --report-width=120 .
+              working-directory: Nominatim
+
+            - name: Python linting
+              run: pylint nominatim
+              working-directory: Nominatim
+
+            - name: Python static typechecking
+              run: mypy --strict nominatim
+              working-directory: Nominatim
+
+
+            - name: PHP unit tests
+              run: phpunit ./
+              working-directory: Nominatim/test/php
+              if: ${{ (matrix.ubuntu == 20) || (matrix.ubuntu == 22) }}
+
+            - name: Python unit tests
+              run: $PYTEST test/python
+              working-directory: Nominatim
+              env:
+                PYTEST: ${{ matrix.pytest }}
+
+            - name: BDD tests
+              run: |
+                  behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
+              working-directory: Nominatim/test/bdd
+
+
+    legacy-test:
+        needs: create-archive
+        runs-on: ubuntu-20.04
+
+        steps:
+            - uses: actions/download-artifact@v3
+              with:
+                  name: full-source
+
+            - name: Unpack Nominatim
+              run: tar xf nominatim-src.tar.bz2
+
+            - name: Setup PHP
+              uses: shivammathur/setup-php@v2
+              with:
+                  php-version: 7.4
+
+            - uses: ./Nominatim/.github/actions/setup-postgresql
+              with:
+                  postgresql-version: 13
+                  postgis-version: 3
+
+            - name: Install Postgresql server dev
+              run: sudo apt-get install postgresql-server-dev-13
+
+            - uses: ./Nominatim/.github/actions/build-nominatim
+              with:
+                  ubuntu: 20
+                  cmake-args: -DBUILD_MODULE=on
+
+            - name: Install test prerequsites
+              run: sudo apt-get install -y -qq python3-behave
+
+            - name: BDD tests (legacy tokenizer)
+              run: |
+                  behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build -DTOKENIZER=legacy --format=progress3
+              working-directory: Nominatim/test/bdd
+
+
+    install:
+        runs-on: ubuntu-latest
+        needs: create-archive
+
+        strategy:
+            matrix:
+                name: [Ubuntu-18, Ubuntu-20, Ubuntu-22]
+                include:
+                    - name: Ubuntu-18
+                      flavour: ubuntu
+                      image: "ubuntu:18.04"
+                      ubuntu: 18
+                      install_mode: install-nginx
+                    - name: Ubuntu-20
+                      flavour: ubuntu
+                      image: "ubuntu:20.04"
+                      ubuntu: 20
+                      install_mode: install-apache
+                    - name: Ubuntu-22
+                      flavour: ubuntu
+                      image: "ubuntu:22.04"
+                      ubuntu: 22
+                      install_mode: install-apache
+
+        container:
+            image: ${{ matrix.image }}
+            env:
+                LANG: en_US.UTF-8
+
+        defaults:
+            run:
+                shell: sudo -Hu nominatim bash --noprofile --norc -eo pipefail {0}
+
+        steps:
+            - name: Prepare container (Ubuntu)
+              run: |
+                  export APT_LISTCHANGES_FRONTEND=none
+                  export DEBIAN_FRONTEND=noninteractive
+                  apt-get update -qq
+                  apt-get install -y git sudo wget
+                  ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
+              shell: bash
+              if: matrix.flavour == 'ubuntu'
+
+            - name: Prepare container (CentOS)
+              run: |
+                  dnf update -y
+                  dnf install -y sudo glibc-langpack-en
+              shell: bash
+              if: matrix.flavour == 'centos'
+
+            - name: Setup import user
+              run: |
+                  useradd -m nominatim
+                  echo 'nominatim   ALL=(ALL:ALL) NOPASSWD: ALL' > /etc/sudoers.d/nominiatim
+                  echo "/home/nominatim/Nominatim/vagrant/Install-on-${OS}.sh no $INSTALL_MODE" > /home/nominatim/vagrant.sh
+              shell: bash
+              env:
+                OS: ${{ matrix.name }}
+                INSTALL_MODE: ${{ matrix.install_mode }}
+
+            - uses: actions/download-artifact@v3
+              with:
+                  name: full-source
+                  path: /home/nominatim
+
+            - name: Install Nominatim
+              run: |
+                export USERNAME=nominatim
+                export USERHOME=/home/nominatim
+                export NOSYSTEMD=yes
+                export HAVE_SELINUX=no
+                tar xf nominatim-src.tar.bz2
+                . vagrant.sh
+              working-directory: /home/nominatim
+
+            - name: Prepare import environment
+              run: |
+                  mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
+                  rm -rf Nominatim
+                  mkdir data-env-reverse
+              working-directory: /home/nominatim
+
+            - name: Prepare import environment (CentOS)
+              run: |
+                  sudo ln -s /usr/local/bin/nominatim /usr/bin/nominatim
+                  echo NOMINATIM_DATABASE_WEBUSER="apache" > nominatim-project/.env
+                  cp nominatim-project/.env data-env-reverse/.env
+              working-directory: /home/nominatim
+              if: matrix.flavour == 'centos'
+
+            - name: Print version
+              run: nominatim --version
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Import
+              run: nominatim import --osm-file ../test.pbf
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Import special phrases
+              run: nominatim special-phrases --import-from-wiki
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Check full import
+              run: nominatim admin --check-database
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Warm up database
+              run: nominatim admin --warm
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Prepare update (Ubuntu)
+              run: apt-get install -y python3-pip
+              shell: bash
+              if: matrix.flavour == 'ubuntu'
+
+            - name: Run update
+              run: |
+                  pip3 install --user osmium
+                  nominatim replication --init
+                  NOMINATIM_REPLICATION_MAX_DIFF=1 nominatim replication --once
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Clean up database
+              run: nominatim refresh --postcodes --word-tokens
+              working-directory: /home/nominatim/nominatim-project
+
+            - name: Run reverse-only import
+              run : |
+                  echo 'NOMINATIM_DATABASE_DSN="pgsql:dbname=reverse"' >> .env
+                  nominatim import --osm-file ../test.pbf --reverse-only --no-updates
+              working-directory: /home/nominatim/data-env-reverse
+
+            - name: Check reverse-only import
+              run: nominatim admin --check-database
+              working-directory: /home/nominatim/data-env-reverse
+
+            - name: Clean up database (reverse-only import)
+              run: nominatim refresh --postcodes --word-tokens
+              working-directory: /home/nominatim/nominatim-project
--- a/.gitignore
+++ b/.gitignore
@@ -1,11 +1,9 @@
 *.log
 *.pyc

-build
-settings/local.php
+docs/develop/*.png

-data/wiki_import.sql
-data/wiki_specialphrases.sql
-data/osmosischange.osc
+build

 .vagrant
+data/country_osm_grid.sql.gz
--- a/.mypy.ini
+++ b/.mypy.ini
@@ -0,0 +1,13 @@
+[mypy]
+
+[mypy-icu.*]
+ignore_missing_imports = True
+
+[mypy-osmium.*]
+ignore_missing_imports = True
+
+[mypy-datrie.*]
+ignore_missing_imports = True
+
+[mypy-dotenv.*]
+ignore_missing_imports = True
--- a/.pylintrc
+++ b/.pylintrc
@@ -0,0 +1,18 @@
+[MASTER]
+
+extension-pkg-whitelist=osmium
+ignored-modules=icu,datrie
+
+[MESSAGES CONTROL]
+
+[TYPECHECK]
+
+# closing added here because it sometimes triggers a false positive with
+# 'with' statements.
+ignored-classes=NominatimArgs,closing
+# 'too-many-ancestors' is triggered already by deriving from UserDict
+# 'not-context-manager' disabled because it causes false positives once
+#   typed Python is enabled. See also https://github.com/PyCQA/pylint/issues/5273
+disable=too-few-public-methods,duplicate-code,too-many-ancestors,bad-option-value,no-self-use,not-context-manager
+
+good-names=i,x,y,fd,db,cc
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,34 +0,0 @@
---
-sudo: required
-dist: xenial
-language: python
-python:
-  - "3.6"
-addons:
-  postgresql: "9.6"
-git:
-  depth: 3
-env:
-  - TEST_SUITE=tests
-  - TEST_SUITE=monaco
-before_install:
-  - phpenv global 7.1
-install:
-  - vagrant/install-on-travis-ci.sh
-before_script:
-  - psql -U postgres -c "create extension postgis"
-script:
-  - cd $TRAVIS_BUILD_DIR/
-  - if [[ $TEST_SUITE == "tests" ]]; then phpcs --report-width=120 . ; fi
-  - cd $TRAVIS_BUILD_DIR/test/php
-  - if [[ $TEST_SUITE == "tests" ]]; then /usr/bin/phpunit ./ ; fi
-  - cd $TRAVIS_BUILD_DIR/test/bdd
-  - # behave --format=progress3 api
-  - if [[ $TEST_SUITE == "tests" ]]; then behave -DREMOVE_TEMPLATE=1 --format=progress3 db ; fi
-  - if [[ $TEST_SUITE == "tests" ]]; then behave --format=progress3 osm2pgsql ; fi
-  - cd $TRAVIS_BUILD_DIR/build
-  - if [[ $TEST_SUITE == "monaco" ]]; then wget --no-verbose --output-document=../data/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf; fi
-  - if [[ $TEST_SUITE == "monaco" ]]; then /usr/bin/env php ./utils/setup.php --osm-file ../data/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | grep -v 'ETA (seconds)'; fi
-  - if [[ $TEST_SUITE == "monaco" ]]; then /usr/bin/env php ./utils/specialphrases.php --wiki-import | psql -d test_api_nominatim >/dev/null; fi
-notifications:
-  email: false
--- a/16
+++ b/16
@@ -1,15 +1,15 @@
 Nominatim was written by:

-  Brian Quinion
-  Sarah Hoffmann
-  Marc Tobias Metten
+* Brian Quinion
+* Sarah Hoffmann
+* Marc Tobias Metten

-  markigail
-  gemo1011
-  IrlJidel
-  Frederik Ramm
+* markigail
+* AntoJvlt
+* gemo1011
+* darkshredder

 and many more.

-For a full list of contributors see
+For a full list of contributors see the Git logs or visit
 https://github.com/openstreetmap/Nominatim/graphs/contributors
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -6,7 +6,7 @@
 #
 #-----------------------------------------------------------------------------

-cmake_minimum_required(VERSION 2.8 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.0 FATAL_ERROR)
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")


@@ -18,158 +18,270 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")

 project(nominatim)

-set(NOMINATIM_VERSION_MAJOR 3)
-set(NOMINATIM_VERSION_MINOR 4)
-set(NOMINATIM_VERSION_PATCH 0)
+set(NOMINATIM_VERSION_MAJOR 4)
+set(NOMINATIM_VERSION_MINOR 1)
+set(NOMINATIM_VERSION_PATCH 1)

 set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")

 add_definitions(-DNOMINATIM_VERSION="${NOMINATIM_VERSION}")

+# Setting GIT_HASH
+find_package(Git)
+if (GIT_FOUND)
+    execute_process(
+        COMMAND "${GIT_EXECUTABLE}" log -1 --format=%h
+        WORKING_DIRECTORY ${CMAKE_CURRENT_LIST_DIR}
+        OUTPUT_VARIABLE GIT_HASH
+        OUTPUT_STRIP_TRAILING_WHITESPACE
+        ERROR_QUIET
+        )
+endif()

 #-----------------------------------------------------------------------------
-#
-#  Find external dependencies
-#
+#  Configuration
 #-----------------------------------------------------------------------------

-set(BUILD_TESTS off CACHE BOOL "Build test suite" FORCE)
-set(WITH_LUA off CACHE BOOL "Build with lua support" FORCE)
-set(ONLY_DOCS off CACHE BOOL "Build documentation only")
+set(BUILD_IMPORTER on CACHE BOOL "Build everything for importing/updating the database")
+set(BUILD_API on CACHE BOOL "Build everything for the API server")
+set(BUILD_MODULE off CACHE BOOL "Build PostgreSQL module for legacy tokenizer")
+set(BUILD_TESTS on CACHE BOOL "Build test suite")
+set(BUILD_DOCS on CACHE BOOL "Build documentation")
+set(BUILD_MANPAGE on CACHE BOOL "Build Manual Page")
+set(BUILD_OSM2PGSQL on CACHE BOOL "Build osm2pgsql (expert only)")
+set(INSTALL_MUNIN_PLUGINS on CACHE BOOL "Install Munin plugins for supervising Nominatim")

-if (NOT ONLY_DOCS)
+#-----------------------------------------------------------------------------
+#  osm2pgsql (imports/updates only)
+#-----------------------------------------------------------------------------
+
+if (BUILD_IMPORTER AND BUILD_OSM2PGSQL)
    if (NOT EXISTS "${CMAKE_SOURCE_DIR}/osm2pgsql/CMakeLists.txt")
        message(FATAL_ERROR "The osm2pgsql directory is empty.\
        Did you forget to check out Nominatim recursively?\
        \nTry updating submodules with: git submodule update --init")
    endif()
+    set(BUILD_TESTS_SAVED "${BUILD_TESTS}")
+    set(BUILD_TESTS off)
+    set(WITH_LUA off CACHE BOOL "")
    add_subdirectory(osm2pgsql)
-
-    find_package(Threads REQUIRED)
-
-    unset(PostgreSQL_TYPE_INCLUDE_DIR CACHE)
-    set(PostgreSQL_TYPE_INCLUDE_DIR "/usr/include/")
-    find_package(PostgreSQL REQUIRED)
-    include_directories(${PostgreSQL_INCLUDE_DIRS})
-    link_directories(${PostgreSQL_LIBRARY_DIRS})
-
-    find_program(PYOSMIUM pyosmium-get-changes)
-    if (NOT EXISTS "${PYOSMIUM}")
-        set(PYOSMIUM_PATH "")
-            message(WARNING "pyosmium-get-changes not found (required for updates)")
-    else()
-        set(PYOSMIUM_PATH "${PYOSMIUM}")
-        message(STATUS "Using pyosmium-get-changes at ${PYOSMIUM_PATH}")
-    endif()
+    set(BUILD_TESTS ${BUILD_TESTS_SAVED})
+endif()


-    find_program(PG_CONFIG pg_config)
-    execute_process(COMMAND ${PG_CONFIG} --pgxs
-                    OUTPUT_VARIABLE PGXS
-                    OUTPUT_STRIP_TRAILING_WHITESPACE)
+#-----------------------------------------------------------------------------
+#  python (imports/updates only)
+#-----------------------------------------------------------------------------

-    if (NOT EXISTS "${PGXS}")
-        message(FATAL_ERROR "Postgresql server package not found.")
-    endif()
+if (BUILD_IMPORTER)
+    find_package(PythonInterp 3.6 REQUIRED)
+endif()

-    find_package(ZLIB REQUIRED)
+#-----------------------------------------------------------------------------
+# PHP
+#-----------------------------------------------------------------------------

-    find_package(BZip2 REQUIRED)
+# Setting PHP binary variable as to command line (prevailing) or auto detect

-    find_package(LibXml2 REQUIRED)
-    include_directories(${LIBXML2_INCLUDE_DIR})
-
-    # Setting PHP binary variable as to command line (prevailing) or auto detect
+if (BUILD_API OR BUILD_IMPORTER)
    if (NOT PHP_BIN)
         find_program (PHP_BIN php)
    endif()
    # sanity check if PHP binary exists
    if (NOT EXISTS ${PHP_BIN})
        message(FATAL_ERROR "PHP binary not found. Install php or provide location with -DPHP_BIN=/path/php ")
+    else()
+        message (STATUS "Using PHP binary " ${PHP_BIN})
+    endif()
+    if (NOT PHPCGI_BIN)
+        find_program (PHPCGI_BIN php-cgi)
+    endif()
+    # sanity check if PHP binary exists
+    if (NOT EXISTS ${PHPCGI_BIN})
+        message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
+        set (PHPCGI_BIN "")
+    else()
+        message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
    endif()
-    message (STATUS "Using PHP binary " ${PHP_BIN})
 endif()

 #-----------------------------------------------------------------------------
-#
-# Setup settings and paths
-#
+# import scripts and utilities (importer only)
 #-----------------------------------------------------------------------------

-set(WEBSITESCRIPTS
-    website/deletable.php
-    website/details.php
-    website/hierarchy.php
-    website/lookup.php
-    website/polygons.php
-    website/reverse.php
-    website/search.php
-    website/status.php
-)
+if (BUILD_IMPORTER)
+   find_file(COUNTRY_GRID_FILE country_osm_grid.sql.gz
+             PATHS ${PROJECT_SOURCE_DIR}/data
+             NO_DEFAULT_PATH
+             DOC "Location of the country grid file."
+            )

-set(CUSTOMSCRIPTS
-    utils/country_languages.php
-    utils/importWikipedia.php
-    utils/export.php
-    utils/query.php
-    utils/setup.php
-    utils/specialphrases.php
-    utils/update.php
-    utils/warm.php
-   )
-
-foreach (script_source ${CUSTOMSCRIPTS})
-    configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl
-                   ${PROJECT_BINARY_DIR}/${script_source})
-endforeach()
-
-foreach (script_source ${WEBSITESCRIPTS})
-    configure_file(${PROJECT_SOURCE_DIR}/cmake/website.tmpl
-                   ${PROJECT_BINARY_DIR}/${script_source})
-endforeach()
-
-configure_file(${PROJECT_SOURCE_DIR}/settings/defaults.php
-               ${PROJECT_BINARY_DIR}/settings/settings.php)
-
-set(WEBPATHS css images js)
-
-foreach (wp ${WEBPATHS})
-    execute_process(
-        COMMAND ln -sf ${PROJECT_SOURCE_DIR}/website/${wp} ${PROJECT_BINARY_DIR}/website/
-    )
-endforeach()
+   if (NOT COUNTRY_GRID_FILE)
+       message(FATAL_ERROR "\nYou need to download the country_osm_grid first:\n"
+                           "    wget -O ${PROJECT_SOURCE_DIR}/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz")
+   endif()

+   configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl
+                  ${PROJECT_BINARY_DIR}/nominatim)
+endif()

 #-----------------------------------------------------------------------------
-#
 # Tests
-#
 #-----------------------------------------------------------------------------

-if (NOT ONLY_DOCS)
+if (BUILD_TESTS)
    include(CTest)

    set(TEST_BDD db osm2pgsql api)

-    foreach (test ${TEST_BDD})
-        add_test(NAME bdd_${test}
-                 COMMAND lettuce features/${test}
-                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests)
-        set_tests_properties(bdd_${test}
-            PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
-    endforeach()
+    find_program(PYTHON_BEHAVE behave)
+    find_program(PYLINT NAMES pylint3 pylint)
+    find_program(PYTEST NAMES pytest py.test-3 py.test)
+    find_program(PHPCS phpcs)
+    find_program(PHPUNIT phpunit)

-    add_test(NAME php
-             COMMAND phpunit ./
-             WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests-php)
+    if (PYTHON_BEHAVE)
+        message(STATUS "Using Python behave binary ${PYTHON_BEHAVE}")
+        foreach (test ${TEST_BDD})
+            add_test(NAME bdd_${test}
+                     COMMAND ${PYTHON_BEHAVE} ${test}
+                     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/bdd)
+            set_tests_properties(bdd_${test}
+                PROPERTIES ENVIRONMENT "NOMINATIM_DIR=${PROJECT_BINARY_DIR}")
+        endforeach()
+    else()
+        message(WARNING "behave not found. BDD tests disabled." )
+    endif()
+
+    if (PHPUNIT)
+        message(STATUS "Using phpunit binary ${PHPUNIT}")
+        add_test(NAME php
+                 COMMAND ${PHPUNIT} ./
+                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/test/php)
+    else()
+        message(WARNING "phpunit not found. PHP unit tests disabled." )
+    endif()
+
+    if (PHPCS)
+        message(STATUS "Using phpcs binary ${PHPCS}")
+        add_test(NAME phpcs
+                 COMMAND ${PHPCS} --report-width=120 --colors lib-php
+                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+    else()
+        message(WARNING "phpcs not found. PHP linting tests disabled." )
+    endif()
+
+    if (PYLINT)
+        message(STATUS "Using pylint binary ${PYLINT}")
+        add_test(NAME pylint
+                 COMMAND ${PYLINT} nominatim
+                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+    else()
+        message(WARNING "pylint not found. Python linting tests disabled.")
+    endif()
+
+    if (PYTEST)
+        message(STATUS "Using pytest binary ${PYTEST}")
+        add_test(NAME pytest
+                 COMMAND ${PYTEST} test/python
+                 WORKING_DIRECTORY ${PROJECT_SOURCE_DIR})
+    else()
+        message(WARNING "pytest not found. Python tests disabled." )
+    endif()
 endif()

+#-----------------------------------------------------------------------------
+# Postgres module
 #-----------------------------------------------------------------------------

-if (NOT ONLY_DOCS)
+if (BUILD_MODULE)
    add_subdirectory(module)
-    add_subdirectory(nominatim)
 endif()
-add_subdirectory(docs)

 #-----------------------------------------------------------------------------
+# Documentation
+#-----------------------------------------------------------------------------
+
+if (BUILD_DOCS)
+   add_subdirectory(docs)
+endif()
+
+#-----------------------------------------------------------------------------
+# Manual page
+#-----------------------------------------------------------------------------
+
+if (BUILD_MANPAGE)
+   add_subdirectory(man)
+endif()
+
+#-----------------------------------------------------------------------------
+# Installation
+#-----------------------------------------------------------------------------
+
+
+include(GNUInstallDirs)
+set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
+set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
+set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
+set(NOMINATIM_MUNINDIR ${CMAKE_INSTALL_FULL_DATADIR}/munin/plugins)
+
+if (BUILD_IMPORTER)
+    configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
+    install(PROGRAMS ${PROJECT_BINARY_DIR}/installed.bin
+            DESTINATION ${CMAKE_INSTALL_BINDIR}
+            RENAME nominatim)
+
+    install(DIRECTORY nominatim
+            DESTINATION ${NOMINATIM_LIBDIR}/lib-python
+            FILES_MATCHING PATTERN "*.py"
+            PATTERN __pycache__ EXCLUDE)
+    install(DIRECTORY lib-sql DESTINATION ${NOMINATIM_LIBDIR})
+
+    install(FILES ${COUNTRY_GRID_FILE}
+                  data/words.sql
+            DESTINATION ${NOMINATIM_DATADIR})
+endif()
+
+if (BUILD_OSM2PGSQL)
+    if (${CMAKE_VERSION} VERSION_LESS 3.13)
+        # Installation of subdirectory targets was only introduced in 3.13.
+        # So just copy the osm2pgsql file for older versions.
+        install(PROGRAMS ${PROJECT_BINARY_DIR}/osm2pgsql/osm2pgsql
+                DESTINATION ${NOMINATIM_LIBDIR})
+    else()
+        install(TARGETS osm2pgsql RUNTIME DESTINATION ${NOMINATIM_LIBDIR})
+    endif()
+endif()
+
+if (BUILD_MODULE)
+    install(PROGRAMS ${PROJECT_BINARY_DIR}/module/nominatim.so
+            DESTINATION ${NOMINATIM_LIBDIR}/module)
+endif()
+
+if (BUILD_API)
+    install(DIRECTORY lib-php DESTINATION ${NOMINATIM_LIBDIR})
+endif()
+
+install(FILES settings/env.defaults
+              settings/address-levels.json
+              settings/phrase-settings.json
+              settings/import-admin.style
+              settings/import-street.style
+              settings/import-address.style
+              settings/import-full.style
+              settings/import-extratags.style
+              settings/icu_tokenizer.yaml
+              settings/country_settings.yaml
+        DESTINATION ${NOMINATIM_CONFIGDIR})
+
+install(DIRECTORY settings/icu-rules
+        DESTINATION ${NOMINATIM_CONFIGDIR})
+install(DIRECTORY settings/country-names
+        DESTINATION ${NOMINATIM_CONFIGDIR})
+
+if (INSTALL_MUNIN_PLUGINS)
+    install(FILES munin/nominatim_importlag
+                  munin/nominatim_query_speed
+                  munin/nominatim_requests
+            DESTINATION ${NOMINATIM_MUNINDIR})
+endif()
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -7,38 +7,6 @@ Please always open a separate issue for each problem. In particular, do
 not add your bugs to closed issues. They may looks similar to you but
 often are completely different from the maintainer's point of view.

-### When Reporting Bad Search Results...
-
-Please make sure to add the following information:
-
- * the URL of the query that produces the bad result
- * the result you are getting
- * the expected result, preferably a link to the OSM object you want to find,
-   otherwise an address that is as precise as possible
- 
- To get the link to the OSM object, you can try the following:
- 
- * go to https://openstreetmap.org
- * zoom to the area of the map where you expect the result and
-   zoom in as much as possible
- * click on the question mark on the right side of the map,
-   then with the queston cursor on the map where your object is located
- * find the object of interest in the list that appears on the left side
- * click on the object and report the URL back that the browser shows
-
-### When Reporting Problems with your Installation...
-
-Please add the following information to your issue:
-
- * hardware configuration: RAM size, CPUs, kind and size of disks
- * Operating system (also mention if you are running on a cloud service)
- * Postgres and Postgis version
- * list of settings you changed in your Postgres configuration
- * Nominatim version (release version or,
-   if you run from the git repo, the output of `git rev-parse HEAD`)
- * (if applicable) exact command line of the command that was causing the issue
-
-
 ## Workflow for Pull Requests

 We love to get pull requests from you. We operate the "Fork & Pull" model
@@ -68,7 +36,7 @@ Nominatim historically hasn't followed a particular coding style but we
 are in process of consolidating the style. The following rules apply:

 * Python code uses the official Python style
- * indention
+ * indentation
   * SQL use 2 spaces
   * all other file types use 4 spaces
   * [BSD style](https://en.wikipedia.org/wiki/Indent_style#Allman_style) for braces
@@ -81,22 +49,18 @@ are in process of consolidating the style. The following rules apply:
 * for PHP variables use CamelCase with a prefixing letter indicating the type
   (i - integer, f - float, a - array, s - string, o - object)

-The coding style is enforced with PHPCS and can be tested with:
+The coding style is enforced with PHPCS and pylint. It can be tested with:

 ```
-  phpcs --report-width=120 --colors .
+phpcs --report-width=120 --colors .
+pylint3 --extension-pkg-whitelist=osmium nominatim
 ```

 ## Testing

-Before submitting a pull request make sure that the following tests pass:
+Before submitting a pull request make sure that the tests pass:

 ```
-  cd test/bdd
-  behave -DBUILDDIR=<builddir> db osm2pgsql
-```
-
-```
-  cd test/php
-  phpunit ./
+  cd build
+  make test
 ```
--- a/213
+++ b/213
@@ -1,3 +1,216 @@
+4.1.1
+
+ * fix crash on update when addr:interpolation receives an illegal value
+ * fix minimum number of retrived results to be at least 10
+ * fix search for combinations of special term + name (e.g Hotel Bellevue)
+ * do not return interpolations without a parent street on reverse search
+ * improve invalidation of linked places on updates
+ * fix address parsing for interpolation lines
+ * make sure socket timeouts are respected during replication
+   (working around a bug in some versions of pyosmium)
+ * update bundled osm2pgsql to 1.7.1
+ * typing fixes to work with latest type annotations from typeshed
+ * smaller improvements to documention (thanks to @mausch)
+
+4.1.0
+
+ * switch to ICU tokenizer as default
+ * add housenumber normalization and support optional spaces during search
+ * add postcode format checking and support optional spaces during search
+ * add function for cleaning housenumbers in word table
+ * add updates/deletion of country names imported from OSM
+ * linked places no longer overwrite names from a place permanently
+ * move default country name configuration into yaml file (thanks @tareqpi)
+ * more compact layout for interpolation and TIGER tables
+ * introduce mutations to ICU tokenizer (used for German umlauts)
+ * support reinitializing a full project directory with refresh --website
+ * fix various issues with linked places on updates
+ * add support for external sanitizers and token analyzers
+ * add CLI commands for forced indexing
+ * add CLI command for version report
+ * add offline import mode
+ * change geocodejson to return a feature class in the 'type' field
+ * add ISO3166-2 to address output (thanks @I70l0teN4ik)
+ * improve parsing and matching of addr: tags
+ * support relations as street members of associatedStreet
+ * better ranking for address results from TIGER data
+ * adapt rank classification to changed tag usage in OSM
+ * update bundled osm2pgsql to 1.6.0
+ * add typing information to Python code
+ * improve unit test coverage
+ * reorganise and speed up code for BDD tests, drop support for scenes
+ * move PHP unit tests to PHP 9.5
+ * extensive typo fixes in documentation (thanks @woodpeck,@StephanGeorg,
+   @amandasaurus, @nslxndr, @stefkiourk, @Luflosi, @kianmeng)
+ * drop official support for installation on CentOS
+ * add installation instructions for Ubuntu 22.04
+ * add support for PHP8
+ * add setup instructions for updates and systemd
+ * drop support for PostgreSQL 9.5
+
+4.0.1
+
+ * fix initialisation error in replication script
+ * ICU tokenizer: avoid any special characters in word tokens
+ * better error message when API php script does not exist
+ * fix quoting of house numbers in SQL queries
+ * small fixes and improvements in search query parsing
+ * add documentation for moving the database to a different machine
+
+4.0.0
+
+ * refactor name token computation and introduce ICU tokenizer
+   * name processing now happens in the indexer outside the DB
+   * reorganizes abbreviation handling and moves it to the indexing phases
+   * adds preprocessing of names
+ * add country-specific ranking for Spain, Slovakia
+ * partially switch to using SP-GIST indexes
+ * better updating of dependent addresses for name changes in streets
+ * remove unused/broken tables for external housenumbers
+ * move external postcodes to CSV format and no longer save them in tables
+   (adds support for postcodes for arbitrary countries)
+ * remove postcode helper entries from placex (thanks @AntoJvlt)
+ * change required format for TIGER data to CSV
+ * move configuration of default languages from wiki into config file
+ * expect customized configuration files in project directory by default
+ * disable search API for reverse-only import (thanks @darkshredder)
+ * port most of maintenance/import code to Python and remove PHP utils
+ * add catch-up mode for replication
+ * add updating of special phrases (thanks @AntoJvlt)
+ * add support for special phrases in CSV files (thanks @AntoJvlt)
+ * switch to case-independent matching between place and boundary names
+ * remove disabling of reverse query parsing
+ * minor tweaks to search algorithm to avoid more false positives
+ * major overhaul of the administrator and developer documentation
+ * add security disclosure policy
+ * add testing of installation scripts via CI
+ * drop support for Python < 3.6 and Postgresql < 9.5
+
+3.7.2
+
+ * fix database check for reverse-only imports
+ * do not error out in status API result when import date is missing
+ * add array_key_last function for PHP < 7.3 (thanks to @woodpeck)
+ * fix more url when server name is unknown (thanks to @mogita)
+ * commit changes to replication log table
+
+3.7.1
+
+ * fix smaller issues with special phrases import (thanks @AntoJvlt)
+ * add index to speed up continued indexing during import
+ * fix index on location_property_tiger(parent_place_id) (thanks @changpingc)
+ * make sure Python code is backward-compatible with Python 3.5
+ * various documentation fixes
+
+3.7.0
+
+ * switch to dotenv for configuration file
+ * introduce 'make install' (reorganising most of the code)
+ * introduce nominatim tool as replacement for various php scripts
+ * introduce project directories and allow multiple installations from same build
+ * clean up BDD tests: drop nose, reorganise step code
+ * simplify test database for API BDD tests and autoinstall database
+ * port most of the code for command-line tools to Python
+   (thanks to @darkshredder and @AntoJvlt)
+ * add tests for all tooling
+ * replace pyosmium-get-changes with custom internal implementation using
+   pyosmium
+ * improve search for queries with housenumber and partial terms
+ * add database versioning
+ * use jinja2 for preprocessing SQL files
+ * introduce automatic migrations
+ * reverse fix preference of interpolations over housenumbers
+ * parallelize indexing of postcodes
+ * add non-key indexes to speed up housenumber + street searches
+ * switch housenumber field in placex to save transliterated names
+
+3.6.0
+
+ * add full support for searching by and displaying of addr:* tags
+ * improve address output for large-area objects
+ * better use of country names from OSM data for search and display
+ * better debug output for reverse call
+ * add support for addr:place links without an place equivalent in OSM
+ * improve finding postcodes with normalisation artefacts
+ * batch object to index for rank 30, avoiding a wrap-around of transaction
+   IDs in PostgreSQL
+ * introduce dynamic address rank computation for administrative boundaries
+   depending on linked objects and their place in the admin level hierarchy
+ * add country-specific address ranking for Indonesia, Russia, Belgium and
+   the Netherlands (thanks @hendrikmoree)
+ * make sure wikidata/wikipedia tags are imported for all styles
+ * make POIs searchable by name and housenumber (thanks @joy-yyd)
+ * reverse geocoding now ignores places without an address rank (rivers etc.)
+ * installation of a webserver is no longer mandatory, for development
+   use the php internal webserver via 'make serve
+ * reduce the influence of place nodes in addresses
+ * drop support for the unspecific is_in tag
+ * various minor tweaks to supplied styles
+ * move HTML web frontend into its own project
+ * move scripts for processing external data sources into separate directories
+ * introduce separate configuration for website (thanks @krahulreddy)
+ * update documentation, in particular, clean up development docs
+ * update osm2pgsql to 1.4.0
+
+3.5.2
+
+ * ensure that wikipedia tags are imported for all styles
+ * reinstate verbosity for indexing during updates
+ * make house number reappear in display name on named POIs
+ * introduce batch processing in indexer to avoid transaction ID overrun
+ * increase splitting for large geometries to improve indexing speed
+ * remove deprecated get_magic_quotes_gpc() function
+ * make sure that all postcodes have an entry in word and are thus searchable
+ * remove use of ST_Covers in conjunction woth ST_Intersects,
+   causes bad query planning and slow updates in Postgis3
+ * update osm2pgsql
+
+3.5.1
+
+ * disable jit and parallel processing in PostgreSQL for osm2pgsql
+ * update libosmium to 2.15.6 (fixes an issue with processing hanging
+   on large multipolygons)
+
+3.5.0
+
+ * structured select on HTML search page
+ * new PHP Nominatim\Shell class to wrap shell escaping
+ * remove polygon parameter from all API calls
+ * improve handling of postcode areas
+ * reorganise place linking algorithm, now using wikidata tag as well
+ * remove linkees from search_name and larger_area tables
+ * introduce country-specific address ranks
+ * reorganise rank address computation
+ * cleanup of partition function
+ * improve parenting for large POIs
+ * add support for Postgresql 12 and Postgis 3
+ * add earlier cleanup when --drop is given, to reduce memory usage
+ * remove use of place_id in URLs
+ * replace C nominatim indexer with a simpler Python implementation
+ * split up the huge sql/functions.sql file
+ * move osm2pgsql tests to osm2pgsql
+ * add new extratags style which imports all tags from OSM
+ * add new script for checking the import after completion
+ * update osm2pgsql, reducing memory usage
+ * use new wikipedia importance and add processing of wikidata tags
+ * add search form for details page
+ * use ExtraDataPath for country_grid table
+ * remove short_name from list of names to be displayed
+ * split up CMakeFile, so that all parts can be built separately
+ * update installation instructions for CentOS and Ubuntu
+ * add script for importing/updating multiple country extracts
+ * various documentation improvements
+
+3.4.2
+
+ * fix security bug in /details endpoint where user input was not
+   properly sanitized
+
+3.4.1
+
+ * update osm2pgsql to fix hans during updates and lost address numbers
+   during updates
+
 3.4.0

 * increase required version for PostgreSQL(9.3), PostGIS(2.2) and PHP(7.0)
--- a/README.md
+++ b/README.md
@@ -1,4 +1,5 @@
-[![Build Status](https://travis-ci.org/openstreetmap/Nominatim.svg?branch=master)](https://travis-ci.org/openstreetmap/Nominatim)
+[![Build Status](https://github.com/osm-search/Nominatim/workflows/CI%20Tests/badge.svg)](https://github.com/osm-search/Nominatim/actions?query=workflow%3A%22CI+Tests%22)
+[![codecov](https://codecov.io/gh/osm-search/Nominatim/branch/master/graph/badge.svg?token=8P1LXrhCMy)](https://codecov.io/gh/osm-search/Nominatim)

 Nominatim
 =========
@@ -20,11 +21,10 @@ Installation
 ============

 The latest stable release can be downloaded from https://nominatim.org.
-There you can also find [installation instructions for the release](https://nominatim.org/release-docs/latest/admin/Installation).
+There you can also find [installation instructions for the release](https://nominatim.org/release-docs/latest/admin/Installation), as well as an extensive [Troubleshooting/FAQ section](https://nominatim.org/release-docs/latest/admin/Faq/).

-Detailed installation instructions for the development version can be
-found at [nominatim.org](https://nominatim.org/release-docs/develop/admin/Installation)
-as well.
+[Detailed installation instructions for current master](https://nominatim.org/release-docs/develop/admin/Installation)
+can be found at nominatim.org as well.

 A quick summary of the necessary steps:

@@ -34,12 +34,15 @@ A quick summary of the necessary steps:
        cd build
        cmake ..
        make
+        sudo make install

-2. Get OSM data and import:
+2. Create a project directory, get OSM data and import:

-        ./build/utils/setup.php --osm-file <your planet file> --all
+        mkdir nominatim-project
+        cd nominatim-project
+        nominatim import --osm-file <your planet file>

-3. Point your webserver to the ./build/website directory.
+3. Point your webserver to the nominatim-project/website directory.


 License
@@ -51,13 +54,14 @@ The source code is available under a GPLv2 license.
 Contributing
 ============

-Contributions are welcome. For details see [contribution guide](CONTRIBUTING.md).
-
-Both bug reports and pull requests are welcome.
+Contributions, bugreport and pull requests are welcome.
+For details see [contribution guide](CONTRIBUTING.md).


-Mailing list
-============
+Questions and help
+==================

-For questions you can join the geocoding mailing list, see
-https://lists.openstreetmap.org/listinfo/geocoding
+For questions, community help and discussions you can use the
+[Github discussions forum](https://github.com/osm-search/Nominatim/discussions)
+or join the
+[geocoding mailing list](https://lists.openstreetmap.org/listinfo/geocoding).
--- a/SECURITY.md
+++ b/SECURITY.md
@@ -0,0 +1,39 @@
+# Security Policy
+
+## Supported Versions
+
+All Nominatim releases receive security updates for two years.
+
+The following table lists the end of support for all currently supported
+versions.
+
+| Version | End of support for security updates |
+| ------- | ----------------------------------- |
+| 4.0.x   | 2023-11-02                          |
+| 3.7.x   | 2023-04-05                          |
+| 3.6.x   | 2022-12-12                          |
+| 3.5.x   | 2022-06-05                          |
+
+## Reporting a Vulnerability
+
+If you believe, you have found an issue in Nominatim that has implications on
+security, please send a description of the issue to **security@nominatim.org**.
+You will receive an acknowledgement of your mail within 3 work days where we
+also notify you of the next steps.
+
+## How we Disclose Security Issues
+
+** The following section only applies to security issues found in released
+versions. Issues that concern the master development branch only will be
+fixed immediately on the branch with the corresponding PR containing the
+description of the nature and severity of the issue. **
+
+Patches for identified security issues are applied to all affected versions and
+new minor versions are released. At the same time we release a statement at
+the [Nominatim blog](https://nominatim.org/blog/) describing the nature of the
+incident. Announcements will also be published at the
+[geocoding mailinglist](https://lists.openstreetmap.org/listinfo/geocoding).
+
+## List of Previous Incidents
+
+* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
--- a/VAGRANT.md
+++ b/VAGRANT.md
@@ -42,9 +42,9 @@ is.

      ```
      # inside the virtual machine:
-      cd build
-      wget --no-verbose --output-document=/tmp/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf
-      ./utils/setup.php --osm-file /tmp/monaco.osm.pbf --osm2pgsql-cache 1000 --all 2>&1 | tee monaco.$$.log
+      cd nominatim-project
+      wget --no-verbose --output-document=monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf
+      nominatim import --osm-file monaco.osm.pbf 2>&1 | tee monaco.$$.log
      ```

    To repeat an import you'd need to delete the database first
@@ -56,7 +56,7 @@ is.
 ## Development

 Vagrant maps the virtual machine's port 8089 to your host machine. Thus you can
-see Nominatim in action on [locahost:8089](http://localhost:8089/nominatim/).
+see Nominatim in action on [localhost:8089](http://localhost:8089/nominatim/).

 You edit code on your host machine in any editor you like. There is no need to
 restart any software: just refresh your browser window.
@@ -141,7 +141,7 @@ No. Long running Nominatim installations will differ once new import features (o
 bug fixes) get added since those usually only get applied to new/changed data.

 Also this document skips the optional Wikipedia data import which affects ranking
-of search results. See [Nominatim installation](http://nominatim.org/release-docs/latest/Installation) for details.
+of search results. See [Nominatim installation](https://nominatim.org/release-docs/latest/admin/Installation) for details.

 ##### Why Ubuntu? Can I test CentOS/Fedora/CoreOS/FreeBSD?

@@ -160,9 +160,9 @@ You can configure/download other Vagrant boxes from [https://app.vagrantup.com/b

 Let's say you have a Postgres database named `nominatim_it` on server `your-server.com` and port `5432`. The Postgres username is `postgres`. You can edit `settings/local.php` and point Nominatim to it.

-    pgsql://postgres@your-server.com:5432/nominatim_it
+    pgsql:host=your-server.com;port=5432;user=postgres;dbname=nominatim_it
    
-No data import necessary or restarting necessary.
+No data import or restarting necessary.

 If the Postgres installation is behind a firewall, you can try

--- a/88
+++ b/88
@@ -4,18 +4,65 @@
 Vagrant.configure("2") do |config|
  # Apache webserver
  config.vm.network "forwarded_port", guest: 80, host: 8089
+  config.vm.network "forwarded_port", guest: 8088, host: 8088

  # If true, then any SSH connections made will enable agent forwarding.
  config.ssh.forward_agent = true

+  # Never sync the current directory to /vagrant.
+  config.vm.synced_folder ".", "/vagrant", disabled: true
+
  checkout = "yes"
  if ENV['CHECKOUT'] != 'y' then
-      config.vm.synced_folder ".", "/home/vagrant/Nominatim"
-      checkout = "no"
+    checkout = "no"
+  end
+
+  config.vm.provider "virtualbox" do |vb, override|
+    vb.gui = false
+    vb.memory = 2048
+    vb.customize ["setextradata", :id, "VBoxInternal2/SharedFoldersEnableSymlinksCreate//vagrant","0"]
+    if ENV['CHECKOUT'] != 'y' then
+      override.vm.synced_folder ".", "/home/vagrant/Nominatim"
+    end
+  end
+
+  config.vm.provider "libvirt" do |lv, override|
+    lv.memory = 2048
+    lv.nested = true
+    if ENV['CHECKOUT'] != 'y' then
+      override.vm.synced_folder ".", "/home/vagrant/Nominatim", type: 'nfs'
+    end
  end

  config.vm.define "ubuntu", primary: true do |sub|
-      sub.vm.box = "bento/ubuntu-18.04"
+      sub.vm.box = "generic/ubuntu2004"
+      sub.vm.provision :shell do |s|
+        s.path = "vagrant/Install-on-Ubuntu-20.sh"
+        s.privileged = false
+        s.args = [checkout]
+      end
+  end
+
+  config.vm.define "ubuntu-apache" do |sub|
+      sub.vm.box = "generic/ubuntu2004"
+      sub.vm.provision :shell do |s|
+        s.path = "vagrant/Install-on-Ubuntu-20.sh"
+        s.privileged = false
+        s.args = [checkout, "install-apache"]
+      end
+  end
+
+  config.vm.define "ubuntu-nginx" do |sub|
+      sub.vm.box = "generic/ubuntu2004"
+      sub.vm.provision :shell do |s|
+        s.path = "vagrant/Install-on-Ubuntu-20.sh"
+        s.privileged = false
+        s.args = [checkout, "install-nginx"]
+      end
+  end
+
+  config.vm.define "ubuntu18" do |sub|
+      sub.vm.box = "generic/ubuntu1804"
      sub.vm.provision :shell do |s|
        s.path = "vagrant/Install-on-Ubuntu-18.sh"
        s.privileged = false
@@ -23,48 +70,41 @@ Vagrant.configure("2") do |config|
      end
  end

-  config.vm.define "ubuntu18nginx" do |sub|
-      sub.vm.box = "bento/ubuntu-18.04"
+  config.vm.define "ubuntu18-apache" do |sub|
+      sub.vm.box = "generic/ubuntu1804"
      sub.vm.provision :shell do |s|
-        s.path = "vagrant/Install-on-Ubuntu-18-nginx.sh"
+        s.path = "vagrant/Install-on-Ubuntu-18.sh"
        s.privileged = false
-        s.args = [checkout]
+        s.args = [checkout, "install-apache"]
      end
  end

-  config.vm.define "ubuntu16" do |sub|
-      sub.vm.box = "bento/ubuntu-16.04"
+  config.vm.define "ubuntu18-nginx" do |sub|
+      sub.vm.box = "generic/ubuntu1804"
      sub.vm.provision :shell do |s|
-        s.path = "vagrant/Install-on-Ubuntu-16.sh"
+        s.path = "vagrant/Install-on-Ubuntu-18.sh"
        s.privileged = false
-        s.args = [checkout]
+        s.args = [checkout, "install-nginx"]
      end
  end

-  config.vm.define "travis" do |sub|
-      sub.vm.box = "bento/ubuntu-14.04"
+  config.vm.define "centos7" do |sub|
+      sub.vm.box = "centos/7"
      sub.vm.provision :shell do |s|
-        s.path = "vagrant/install-on-travis-ci.sh"
+        s.path = "vagrant/Install-on-Centos-7.sh"
        s.privileged = false
        s.args = [checkout]
      end
  end

  config.vm.define "centos" do |sub|
-      sub.vm.box = "centos/7"
+      sub.vm.box = "generic/centos8"
      sub.vm.provision :shell do |s|
-        s.path = "vagrant/Install-on-Centos-7.sh"
+        s.path = "vagrant/Install-on-Centos-8.sh"
        s.privileged = false
-        s.args = "yes"
+        s.args = [checkout]
      end
-      sub.vm.synced_folder ".", "/home/vagrant/Nominatim", disabled: true
-      sub.vm.synced_folder ".", "/vagrant", disabled: true
  end

-  config.vm.provider "virtualbox" do |vb|
-    vb.gui = false
-    vb.memory = 2048
-    vb.customize ["setextradata", :id, "VBoxInternal2/SharedFoldersEnableSymlinksCreate//vagrant","0"]
-  end

 end
--- a/cmake/script.tmpl
+++ b/cmake/script.tmpl
@@ -1,4 +0,0 @@
-#!@PHP_BIN@ -Cq
-<?php
-require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
-require_once(CONST_BasePath.'/@script_source@');
--- a/cmake/tool-installed.tmpl
+++ b/cmake/tool-installed.tmpl
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+import sys
+import os
+
+sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
+
+os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
+
+from nominatim import cli
+from nominatim import version
+
+version.GIT_COMMIT_HASH = '@GIT_HASH@'
+
+exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
+                   osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
+                   phplib_dir='@NOMINATIM_LIBDIR@/lib-php',
+                   sqllib_dir='@NOMINATIM_LIBDIR@/lib-sql',
+                   data_dir='@NOMINATIM_DATADIR@',
+                   config_dir='@NOMINATIM_CONFIGDIR@',
+                   phpcgi_path='@PHPCGI_BIN@'))
--- a/cmake/tool.tmpl
+++ b/cmake/tool.tmpl
@@ -0,0 +1,20 @@
+#!/usr/bin/env python3
+import sys
+import os
+
+sys.path.insert(1, '@CMAKE_SOURCE_DIR@')
+
+os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
+
+from nominatim import cli
+from nominatim import version
+
+version.GIT_COMMIT_HASH = '@GIT_HASH@'
+
+exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
+                   osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
+                   phplib_dir='@CMAKE_SOURCE_DIR@/lib-php',
+                   sqllib_dir='@CMAKE_SOURCE_DIR@/lib-sql',
+                   data_dir='@CMAKE_SOURCE_DIR@/data',
+                   config_dir='@CMAKE_SOURCE_DIR@/settings',
+                   phpcgi_path='@PHPCGI_BIN@'))
--- a/cmake/website.tmpl
+++ b/cmake/website.tmpl
@@ -1,3 +0,0 @@
-<?php
-require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
-require_once(CONST_BasePath.'/@script_source@');
--- a/data-sources/country-grid/README.md
+++ b/data-sources/country-grid/README.md
@@ -1,77 +0,0 @@
-# Fallback Country Boundaries
-
-Each place is assigned a `country_code` and partition. Partitions derive from `country_code`.
-
-Nominatim imports two pre-generated files
-
-   * `data/country_name.sql` (country code, name, default language, partition)
-   * `data/country_osm_grid.sql` (country code, geometry)
-
-before creating places in the database. This helps with fast lookups and missing data (e.g. if the data the user wants to import doesn't contain any country places).
-
-The number of countries in the world can change (South Sudan created 2011, Germany reunification), so can their boundaries. This document explain how the pre-generated files can be updated.
-
-
-
-## Country code
-
-Each place is assigned a two letter country_code based on its location, e.g. `gb` for Great Britain. Or `NULL` if no suitable country is found (usually it's in open water then).
-
-In `sql/functions.sql: get_country_code(geometry)` the place's center is checked against
-
-   1. country places already imported from the user's data file. Places are imported by rank low-to-high. Lowest rank 2 is countries so most places should be matched. Still the data file might be incomplete.
-   2. if unmatched: OSM grid boundaries
-   3. if still unmatched: OSM grid boundaries, but allow a small distance
-
-
-
-## Partitions
-
-Each place is assigned partition, which is a number 0..250. 0 is fallback/other.
-
-During place indexing (`sql/functions.sql: placex_insert()`) a place is assigned the partition based on its country code (`sql/functions.sql: get_partition(country_code)`). It checks in the `country_name` table.
-
-Most countries have their own partition, some share a partition. Thus partition counts vary greatly.
-
-Several database tables are split by partition to allow queries to run against less indices and improve caching.
-
-   * `location_area_large_<partition>`
-   * `search_name_<partition>`
-   * `location_road_<partition>`
-
-
-
-
-
-## Data files
-
-### data/country_name.sql
-
-Export from existing database table plus manual changes. `country_default_language_code` most taken from [https://wiki.openstreetmap.org/wiki/Nominatim/Country_Codes](), see `utils/country_languages.php`.
-
-
-
-### data/country_osm_grid.sql
-
-`country_grid.sql` merges territories by country. Then uses `function.sql: quad_split_geometry` to split each country into multiple [Quadtree](https://en.wikipedia.org/wiki/Quadtree) polygons for faster point-in-polygon lookups.
-
-To visualize one country as geojson feature collection, e.g. for loading into [geojson.io](http://geojson.io/):
-
-```
-- http://www.postgresonline.com/journal/archives/267-Creating-GeoJSON-Feature-Collections-with-JSON-and-PostGIS-functions.html
-
-SELECT row_to_json(fc)
-FROM (
-  SELECT 'FeatureCollection' As type, array_to_json(array_agg(f)) As features
-  FROM (
-    SELECT 'Feature' As type,
-    ST_AsGeoJSON(lg.geometry)::json As geometry,
-    row_to_json((country_code, area)) As properties
-    FROM country_osm_grid As lg where country_code='mx'
-  ) As f
-) As fc;
-```
-
-`cat /tmp/query.sql | psql -At nominatim > /tmp/mexico.quad.geojson`
-
-![mexico](mexico.quad.png)
--- a/data-sources/country-grid/country_grid.sql
+++ b/data-sources/country-grid/country_grid.sql
@@ -1,33 +0,0 @@
-- Script to build a calculated country grid from existing tables
-DROP TABLE IF EXISTS tmp_country_osm_grid;
-CREATE TABLE tmp_country_osm_grid as select country_name.country_code,st_union(placex.geometry) as geometry from country_name,
-  placex
-  where (lower(placex.country_code) = country_name.country_code)
-    and placex.rank_search < 16 and st_area(placex.geometry) > 0 
-  group by country_name.country_code;
-ALTER TABLE tmp_country_osm_grid add column area double precision;
-UPDATE tmp_country_osm_grid set area = st_area(geometry::geography);
-
-- compare old and new
-select country_code, round, round(log(area)) from (select distinct country_code,round(log(area)) from country_osm_grid order by country_code) as x 
-  left outer join tmp_country_osm_grid using (country_code) where area is null or round(log(area)) != round;
-
-DROP TABLE IF EXISTS new_country_osm_grid;
-CREATE TABLE new_country_osm_grid as select country_code,area,quad_split_geometry(geometry,0.5,20) as geometry from tmp_country_osm_grid;
-CREATE INDEX new_idx_country_osm_grid_geometry ON new_country_osm_grid USING GIST (geometry);
-
-- Sometimes there are problems calculating area due to invalid data - optionally recalc
-UPDATE new_country_osm_grid set area = sum from (select country_code,sum(case when st_area(geometry::geography) = 'NaN' THEN 0 ELSE st_area(geometry::geography) END) 
- from new_country_osm_grid group by country_code) as x where x.country_code = new_country_osm_grid.country_code;
-
-- compare old and new
-select country_code, x.round, y.round from (select distinct country_code,round(log(area)) from country_osm_grid order by country_code) as x
-  left outer join (select distinct country_code,round(log(area)) from new_country_osm_grid order by country_code) as y
-    using (country_code) where x.round != y.round;
-
-- Flip the new table in
-BEGIN;
-DROP TABLE IF EXISTS country_osm_grid;
-ALTER TABLE new_country_osm_grid rename to country_osm_grid;
-ALTER INDEX new_idx_country_osm_grid_geometry RENAME TO idx_country_osm_grid_geometry;
-COMMIT;
--- a/data-sources/country-grid/mexico.quad.png
+++ b/data-sources/country-grid/mexico.quad.png
--- a/data-sources/gb-postcodes/README.md
+++ b/data-sources/gb-postcodes/README.md
@@ -1,56 +0,0 @@
-# GB Postcodes
-
-
-The server [importing instructions](https://www.nominatim.org/release-docs/latest/admin/Import-and-Update/) allow optionally download [`gb_postcode_data.sql.gz`](https://www.nominatim.org/data/gb_postcode_data.sql.gz). This document explains how the file got created.
-
-## GB vs UK
-
-GB (Great Britain) is more correct as the Ordnance Survey dataset doesn't contain postcodes from Northern Ireland.
-
-## Importing separately after the initial import
-
-If you forgot to download the file, or have a new version, you can import it separately:
-
-1. Import the downloaded `gb_postcode_data.sql.gz` file.
-
-2. Run the SQL query `SELECT count(getorcreate_postcode_id(postcode)) FROM gb_postcode;`. This will update the search index.
-
-3. Run `utils/setup.php --calculate-postcodes` from the build directory. This will copy data form the `gb_postcode` table to the `location_postcodes` table.
-
-
-
-## Converting Code-Point Open data
-
-1. Download from [Code-Point® Open](https://www.ordnancesurvey.co.uk/business-and-government/products/code-point-open.html). It requires an email address where a download link will be send to.
-
-2. `unzip codepo_gb.zip`
-
-    Unpacked you'll see a directory of CSV files.
-
-        $ more codepo_gb/Data/CSV/n.csv
-        "N1 0AA",10,530626,183961,"E92000001","E19000003","E18000007","","E09000019","E05000368"
-        "N1 0AB",10,530559,183978,"E92000001","E19000003","E18000007","","E09000019","E05000368"
-
-    The coordinates are "Northings" and "Eastings" in [OSGB 1936](http://epsg.io/1314) projection. They can be projected to WGS84 like this
-
-        SELECT ST_AsText(ST_Transform(ST_SetSRID('POINT(530626 183961)'::geometry,27700), 4326));
-        POINT(-0.117872733220225 51.5394424719303)
-
-    [-0.117872733220225 51.5394424719303 on OSM map](https://www.openstreetmap.org/?mlon=-0.117872733220225&mlat=51.5394424719303&zoom=16)
-
-
-
-3. Create database, import CSV files, add geometry column, dump into file
-
-        DBNAME=create_gb_postcode_file
-        createdb $DBNAME
-        echo 'CREATE EXTENSION postgis' | psql $DBNAME
-
-        cat data/gb_postcode_table.sql | psql $DBNAME      
-        cat codepo_gb/Data/CSV/*.csv | ./data-sources/gb-postcodes/convert_codepoint.php | psql $DBNAME
-        cat codepo_gb/Doc/licence.txt | iconv -f iso-8859-1 -t utf-8 | dos2unix | sed 's/^/-- /g' > gb_postcode_data.sql
-        pg_dump -a -t gb_postcode $DBNAME | grep -v '^--' >> gb_postcode_data.sql
-      
-        gzip -9 -f gb_postcode_data.sql
-        ls -lah gb_postcode_data.*
-        # dropdb $DBNAME
--- a/data-sources/gb-postcodes/convert_codepoint.php
+++ b/data-sources/gb-postcodes/convert_codepoint.php
@@ -1,37 +0,0 @@
-#!/usr/bin/env php
-<?php
-
-echo <<< EOT
-
-ALTER TABLE gb_postcode ADD COLUMN easting bigint;
-ALTER TABLE gb_postcode ADD COLUMN northing bigint;
-
-TRUNCATE gb_postcode;
-
-COPY gb_postcode (id, postcode, easting, northing) FROM stdin;
-
-EOT;
-
-$iCounter = 0;
-while ($sLine = fgets(STDIN)) {
-    $aColumns = str_getcsv($sLine);
-
-    // insert space before the third last position
-    // https://stackoverflow.com/a/9144834
-    $postcode = $aColumns[0];
-    $postcode = preg_replace('/\s*(...)$/', ' $1', $postcode);
-
-    echo join("\t", array($iCounter, $postcode, $aColumns[2], $aColumns[3]))."\n";
-
-    $iCounter = $iCounter + 1;
-}
-
-echo <<< EOT
-\.
-
-UPDATE gb_postcode SET geometry=ST_Transform(ST_SetSRID(CONCAT('POINT(', easting, ' ', northing, ')')::geometry, 27700), 4326);
-
-ALTER TABLE gb_postcode DROP COLUMN easting;
-ALTER TABLE gb_postcode DROP COLUMN northing;
-
-EOT;
--- a/data-sources/us-tiger/README.md
+++ b/data-sources/us-tiger/README.md
@@ -1,26 +0,0 @@
-# US TIGER address data
-
-Convert [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)/Line dataset of the US Census Bureau to SQL files which can be imported by Nominatim. The created tables in the Nominatim database are separate from OpenStreetMap tables and get queried at search time separately.
-
-The dataset gets updated once per year. Downloading is prone to be slow (can take a full day) and converting them can take hours as well.
-
-Replace '2019' with the current year throughout.
-
-  1. Install the GDAL library and python bindings and the unzip tool
-
-        # Ubuntu:
-        sudo apt-get install python3-gdal unzip
-
-  2. Get the TIGER 2019 data. You will need the EDGES files
-     (3,233 zip files, 11GB total).
-
-         wget -r ftp://ftp2.census.gov/geo/tiger/TIGER2019/EDGES/
-
-  3. Convert the data into SQL statements. Adjust the file paths in the scripts as needed
-
-        cd data-sources/us-tiger
-        ./convert.sh <input-path> <output-path>
-
-  4. Maybe: package the created files
-  
-        tar -czf tiger2019-nominatim-preprocessed.tar.gz tiger
--- a/data-sources/us-tiger/convert.sh
+++ b/data-sources/us-tiger/convert.sh
@@ -1,48 +0,0 @@
-#!/bin/bash
-
-INPATH=$1
-OUTPATH=$2
-
-if [[ ! -d "$INPATH" ]]; then
-    echo "input path does not exist"
-    exit 1
-fi
-
-if [[ ! -d "$OUTPATH" ]]; then
-    echo "output path does not exist"
-    exit 1
-fi
-
-INREGEX='_([0-9]{5})_edges.zip'
-WORKPATH="$OUTPATH/tmp-workdir/"
-mkdir -p "$WORKPATH"
-
-
-
-INFILES=($INPATH/*.zip)
-echo "Found ${#INFILES[*]} files."
-
-for F in ${INFILES[*]}; do
-    # echo $F
-
-    if [[ "$F" =~ $INREGEX ]]; then
-        COUNTYID=${BASH_REMATCH[1]}
-        SHAPEFILE="$WORKPATH/$(basename $F '.zip').shp"
-        SQLFILE="$OUTPATH/$COUNTYID.sql"
-
-        unzip -o -q -d "$WORKPATH" "$F"
-        if [[ ! -e "$SHAPEFILE" ]]; then
-            echo "Unzip failed. $SHAPEFILE not found."
-            exit 1
-        fi
-
-        ./tiger_address_convert.py "$SHAPEFILE" "$SQLFILE"
-
-        rm $WORKPATH/*
-    fi
-done
-
-OUTFILES=($OUTPATH/*.sql)
-echo "Wrote ${#OUTFILES[*]} files."
-
-rmdir $WORKPATH
--- a/data-sources/us-tiger/tiger_address_convert.py
+++ b/data-sources/us-tiger/tiger_address_convert.py
@@ -1,620 +0,0 @@
-#!/usr/bin/python3
-# Tiger road data to OSM conversion script
-# Creates Karlsruhe-style address ways beside the main way
-# based on the Massachusetts GIS script by christopher schmidt
-
-#BUGS:
-# On very tight curves, a loop may be generated in the address way.
-# It would be nice if the ends of the address ways were not pulled back from dead ends
-
-
-# Ways that include these mtfccs should not be uploaded
-# H1100 Connector
-# H3010 Stream/River
-# H3013 Braided Stream
-# H3020 Canal, Ditch or Aqueduct
-# L4130 Point-to-Point Line
-# L4140 Property/Parcel Line (Including PLSS)
-# P0001 Nonvisible Linear Legal/Statistical Boundary
-# P0002 Perennial Shoreline
-# P0003 Intermittent Shoreline
-# P0004 Other non-visible bounding Edge (e.g., Census water boundary, boundary of an areal feature)
-ignoremtfcc = [ "H1100", "H3010", "H3013", "H3020", "L4130", "L4140", "P0001", "P0002", "P0003", "P0004" ]
-
-# Sets the distance that the address ways should be from the main way, in feet.
-address_distance = 30
-
-# Sets the distance that the ends of the address ways should be pulled back from the ends of the main way, in feet
-address_pullback = 45
-
-import sys, os.path, json
-try:
-    from osgeo import ogr
-    from osgeo import osr
-except:
-    import ogr
-    import osr
-
-# https://www.census.gov/geo/reference/codes/cou.html 
-# tiger_county_fips.json was generated from the following:
-# wget https://www2.census.gov/geo/docs/reference/codes/files/national_county.txt
-# cat national_county.txt | perl -F, -naE'($F[0] ne 'AS') && $F[3] =~ s/ ((city|City|County|District|Borough|City and Borough|Municipio|Municipality|Parish|Island|Census Area)(?:, |\Z))+//; say qq(  "$F[1]$F[2]": "$F[3], $F[0]",)'
-json_fh = open(os.path.dirname(sys.argv[0]) + "/tiger_county_fips.json")
-county_fips_data = json.load(json_fh)
-
-def parse_shp_for_geom_and_tags( filename ):
-    #ogr.RegisterAll()
-
-    dr = ogr.GetDriverByName("ESRI Shapefile")
-    poDS = dr.Open( filename )
-
-    if poDS == None:
-        raise "Open failed."
-
-    poLayer = poDS.GetLayer( 0 )
-
-    fieldNameList = []
-    layerDefinition = poLayer.GetLayerDefn()
-    for i in range(layerDefinition.GetFieldCount()):
-        fieldNameList.append(layerDefinition.GetFieldDefn(i).GetName())
-    # sys.stderr.write(",".join(fieldNameList))
-
-    poLayer.ResetReading()
-
-    ret = []
-
-    poFeature = poLayer.GetNextFeature()
-    while poFeature:
-        tags = {}
-        
-        # WAY ID
-        tags["tiger:way_id"] = int( poFeature.GetField("TLID") )
-        
-        # FEATURE IDENTIFICATION
-        mtfcc = poFeature.GetField("MTFCC");
-        if mtfcc != None:
-
-            if mtfcc == "L4010":        #Pipeline
-                tags["man_made"] = "pipeline"
-            if mtfcc == "L4020":        #Powerline
-                tags["power"] = "line"
-            if mtfcc == "L4031":        #Aerial Tramway/Ski Lift
-                tags["aerialway"] = "cable_car"
-            if mtfcc == "L4110":        #Fence Line
-                tags["barrier"] = "fence"
-            if mtfcc == "L4125":        #Cliff/Escarpment
-                tags["natural"] = "cliff"
-            if mtfcc == "L4165":        #Ferry Crossing
-                tags["route"] = "ferry"
-            if mtfcc == "R1011":        #Railroad Feature (Main, Spur, or Yard)
-                tags["railway"] = "rail"
-                ttyp = poFeature.GetField("TTYP")
-                if ttyp != None:
-                    if ttyp == "S":
-                        tags["service"] = "spur"
-                    if ttyp == "Y":
-                        tags["service"] = "yard"
-                    tags["tiger:ttyp"] = ttyp
-            if mtfcc == "R1051":        #Carline, Streetcar Track, Monorail, Other Mass Transit Rail)
-                tags["railway"] = "light_rail"
-            if mtfcc == "R1052":        #Cog Rail Line, Incline Rail Line, Tram
-                tags["railway"] = "incline"
-            if mtfcc == "S1100":
-                tags["highway"] = "primary"
-            if mtfcc == "S1200":
-                tags["highway"] = "secondary"
-            if mtfcc == "S1400":
-                tags["highway"] = "residential"
-            if mtfcc == "S1500":
-                tags["highway"] = "track"
-            if mtfcc == "S1630":        #Ramp
-                tags["highway"] = "motorway_link"
-            if mtfcc == "S1640":        #Service Drive usually along a limited access highway
-                tags["highway"] = "service"
-            if mtfcc == "S1710":        #Walkway/Pedestrian Trail
-                tags["highway"] = "path"
-            if mtfcc == "S1720":
-                tags["highway"] = "steps"
-            if mtfcc == "S1730":        #Alley
-                tags["highway"] = "service"
-                tags["service"] = "alley"
-            if mtfcc == "S1740":        #Private Road for service vehicles (logging, oil, fields, ranches, etc.)
-                tags["highway"] = "service"
-                tags["access"] = "private"
-            if mtfcc == "S1750":        #Private Driveway
-                tags["highway"] = "service"
-                tags["access"] = "private"
-                tags["service"] = "driveway"
-            if mtfcc == "S1780":        #Parking Lot Road
-                tags["highway"] = "service"
-                tags["service"] = "parking_aisle"
-            if mtfcc == "S1820":        #Bike Path or Trail
-                tags["highway"] = "cycleway"
-            if mtfcc == "S1830":        #Bridle Path
-                tags["highway"] = "bridleway"
-            tags["tiger:mtfcc"] = mtfcc
-
-        # FEATURE NAME
-        if poFeature.GetField("FULLNAME"):
-            #capitalizes the first letter of each word
-            name = poFeature.GetField( "FULLNAME" )
-            tags["name"] = name
-
-            #Attempt to guess highway grade
-            if name[0:2] == "I-":
-                tags["highway"] = "motorway"
-            if name[0:3] == "US ":
-                tags["highway"] = "primary"
-            if name[0:3] == "US-":
-                tags["highway"] = "primary"
-            if name[0:3] == "Hwy":
-                if tags["highway"] != "primary":
-                    tags["highway"] = "secondary"
-
-        # TIGER 2017 no longer contains this field
-        if 'DIVROAD' in fieldNameList:
-            divroad = poFeature.GetField("DIVROAD")
-            if divroad != None:
-                if divroad == "Y" and "highway" in tags and tags["highway"] == "residential":
-                    tags["highway"] = "tertiary"
-                tags["tiger:separated"] = divroad
-
-        statefp = poFeature.GetField("STATEFP")
-        countyfp = poFeature.GetField("COUNTYFP")
-        if (statefp != None) and (countyfp != None):
-            county_name = county_fips_data.get(statefp + '' + countyfp)
-            if county_name:
-                tags["tiger:county"] = county_name
-
-        # tlid = poFeature.GetField("TLID")
-        # if tlid != None:
-        #     tags["tiger:tlid"] = tlid
-
-        lfromadd = poFeature.GetField("LFROMADD")
-        if lfromadd != None:
-            tags["tiger:lfromadd"] = lfromadd
-
-        rfromadd = poFeature.GetField("RFROMADD")
-        if rfromadd != None:
-            tags["tiger:rfromadd"] = rfromadd
-
-        ltoadd = poFeature.GetField("LTOADD")
-        if ltoadd != None:
-            tags["tiger:ltoadd"] = ltoadd
-
-        rtoadd = poFeature.GetField("RTOADD")
-        if rtoadd != None:
-            tags["tiger:rtoadd"] = rtoadd
-
-        zipl = poFeature.GetField("ZIPL")
-        if zipl != None:
-            tags["tiger:zip_left"] = zipl
-
-        zipr = poFeature.GetField("ZIPR")
-        if zipr != None:
-            tags["tiger:zip_right"] = zipr
-
-        if mtfcc not in ignoremtfcc:
-            # COPY DOWN THE GEOMETRY
-            geom = []
-            
-            rawgeom = poFeature.GetGeometryRef()
-            for i in range( rawgeom.GetPointCount() ):
-                geom.append( (rawgeom.GetX(i), rawgeom.GetY(i)) )
-    
-            ret.append( (geom, tags) )
-        poFeature = poLayer.GetNextFeature()
-        
-    return ret
-
-
-# ====================================
-# to do read .prj file for this data
-# Change the Projcs_wkt to match your datas prj file.
-# ====================================
-projcs_wkt = \
-"""GEOGCS["GCS_North_American_1983",
-        DATUM["D_North_American_1983",
-        SPHEROID["GRS_1980",6378137,298.257222101]],
-        PRIMEM["Greenwich",0],
-        UNIT["Degree",0.017453292519943295]]"""
-
-from_proj = osr.SpatialReference()
-from_proj.ImportFromWkt( projcs_wkt )
-
-# output to WGS84
-to_proj = osr.SpatialReference()
-to_proj.SetWellKnownGeogCS( "EPSG:4326" )
-
-tr = osr.CoordinateTransformation( from_proj, to_proj )
-
-import math
-def length(segment, nodelist):
-    '''Returns the length (in feet) of a segment'''
-    first = True
-    distance = 0
-    lat_feet = 364613  #The approximate number of feet in one degree of latitude
-    for point in segment:
-        pointid, (lat, lon) = nodelist[ round_point( point ) ]
-        if first:
-            first = False
-        else:
-            #The approximate number of feet in one degree of longitute
-            lrad = math.radians(lat)
-            lon_feet = 365527.822 * math.cos(lrad) - 306.75853 * math.cos(3 * lrad) + 0.3937 * math.cos(5 * lrad)
-            distance += math.sqrt(((lat - previous[0])*lat_feet)**2 + ((lon - previous[1])*lon_feet)**2)
-        previous = (lat, lon)
-    return distance
-
-def addressways(waylist, nodelist, first_id):
-    id = first_id
-    lat_feet = 364613  #The approximate number of feet in one degree of latitude
-    distance = float(address_distance)
-    ret = []
-
-    for waykey, segments in waylist.items():
-        waykey = dict(waykey)
-        rsegments = []
-        lsegments = []
-        for segment in segments:
-            lsegment = []
-            rsegment = []
-            lastpoint = None
-
-            # Don't pull back the ends of very short ways too much
-            seglength = length(segment, nodelist)
-            if seglength < float(address_pullback) * 3.0:
-                pullback = seglength / 3.0
-            else:
-                pullback = float(address_pullback)
-            if "tiger:lfromadd" in waykey:
-                lfromadd = waykey["tiger:lfromadd"]
-            else:
-                lfromadd = None
-            if "tiger:ltoadd" in waykey:
-                ltoadd = waykey["tiger:ltoadd"]
-            else:
-                ltoadd = None
-            if "tiger:rfromadd" in waykey:
-                rfromadd = waykey["tiger:rfromadd"]
-            else: 
-                rfromadd = None
-            if "tiger:rtoadd" in waykey:
-                rtoadd = waykey["tiger:rtoadd"]
-            else:
-                rtoadd = None
-            if rfromadd != None and rtoadd != None:
-                right = True
-            else:
-                right = False
-            if lfromadd != None and ltoadd != None:
-                left = True
-            else:
-                left = False
-            if left or right:
-                first = True
-                firstpointid, firstpoint = nodelist[ round_point( segment[0] ) ]
-
-                finalpointid, finalpoint = nodelist[ round_point( segment[len(segment) - 1] ) ]
-                for point in segment:
-                    pointid, (lat, lon) = nodelist[ round_point( point ) ]
-
-                    #The approximate number of feet in one degree of longitute
-                    lrad = math.radians(lat)
-                    lon_feet = 365527.822 * math.cos(lrad) - 306.75853 * math.cos(3 * lrad) + 0.3937 * math.cos(5 * lrad)
-
-#Calculate the points of the offset ways
-                    if lastpoint != None:
-                        #Skip points too close to start
-                        if math.sqrt((lat * lat_feet - firstpoint[0] * lat_feet)**2 + (lon * lon_feet - firstpoint[1] * lon_feet)**2) < pullback:
-                            #Preserve very short ways (but will be rendered backwards)
-                            if pointid != finalpointid:
-                                continue
-                        #Skip points too close to end
-                        if math.sqrt((lat * lat_feet - finalpoint[0] * lat_feet)**2 + (lon * lon_feet - finalpoint[1] * lon_feet)**2) < pullback:
-                            #Preserve very short ways (but will be rendered backwards)
-                            if (pointid != firstpointid) and (pointid != finalpointid):
-                                continue
-
-                        X = (lon - lastpoint[1]) * lon_feet
-                        Y = (lat - lastpoint[0]) * lat_feet
-                        if Y != 0:
-                            theta = math.pi/2 - math.atan( X / Y)
-                            Xp = math.sin(theta) * distance
-                            Yp = math.cos(theta) * distance
-                        else:
-                            Xp = 0
-                            if X > 0:
-                                Yp = -distance
-                            else:
-                                Yp = distance
-
-                        if Y > 0:
-                            Xp = -Xp
-                        else:
-                            Yp = -Yp
-                                
-                        if first:
-                            first = False
-                            dX =  - (Yp * (pullback / distance)) / lon_feet #Pull back the first point
-                            dY = (Xp * (pullback / distance)) / lat_feet
-                            if left:
-                                lpoint = (lastpoint[0] + (Yp / lat_feet) - dY, lastpoint[1] + (Xp / lon_feet) - dX)
-                                lsegment.append( (id, lpoint) )
-                                id += 1
-                            if right:
-                                rpoint = (lastpoint[0] - (Yp / lat_feet) - dY, lastpoint[1] - (Xp / lon_feet) - dX)
-                                rsegment.append( (id, rpoint) )
-                                id += 1
-
-                        else:
-                            #round the curves
-                            if delta[1] != 0:
-                                theta = abs(math.atan(delta[0] / delta[1]))
-                            else:
-                                theta = math.pi / 2
-                            if Xp != 0:
-                                theta = theta - abs(math.atan(Yp / Xp))
-                            else: theta = theta - math.pi / 2
-                            r = 1 + abs(math.tan(theta/2))
-                            if left:
-                                lpoint = (lastpoint[0] + (Yp + delta[0]) * r / (lat_feet * 2), lastpoint[1] + (Xp + delta[1]) * r / (lon_feet * 2))
-                                lsegment.append( (id, lpoint) )
-                                id += 1
-                            if right:
-                                rpoint = (lastpoint[0] - (Yp + delta[0]) * r / (lat_feet * 2), lastpoint[1] - (Xp + delta[1]) * r / (lon_feet * 2))
-                                
-                                rsegment.append( (id, rpoint) )
-                                id += 1
-
-                        delta = (Yp, Xp)
-
-                    lastpoint = (lat, lon)
-
-
-#Add in the last node
-                dX =  - (Yp * (pullback / distance)) / lon_feet
-                dY = (Xp * (pullback / distance)) / lat_feet
-                if left:
-                    lpoint = (lastpoint[0] + (Yp + delta[0]) / (lat_feet * 2) + dY, lastpoint[1] + (Xp + delta[1]) / (lon_feet * 2) + dX )
-                    lsegment.append( (id, lpoint) )
-                    id += 1
-                if right:
-                    rpoint = (lastpoint[0] - Yp / lat_feet + dY, lastpoint[1] - Xp / lon_feet + dX)
-                    rsegment.append( (id, rpoint) )
-                    id += 1
-
-#Generate the tags for ways and nodes
-                zipr = ''
-                zipl = ''
-                name = ''
-                county = ''
-                if "tiger:zip_right" in waykey:
-                    zipr = waykey["tiger:zip_right"]
-                if "tiger:zip_left" in waykey:
-                    zipl = waykey["tiger:zip_left"]
-                if "name" in waykey:
-                    name = waykey["name"]
-                if "tiger:county" in waykey:
-                    county = waykey["tiger:county"]
-                if "tiger:separated" in waykey: # No longer set in Tiger-2017
-                    separated = waykey["tiger:separated"]
-                else:
-                    separated = "N"
-
-#Write the nodes of the offset ways
-                if right:
-                    rlinestring = [];
-                    for i, point in rsegment:
-                        rlinestring.append( "%f %f" % (point[1], point[0]) )
-                if left:
-                    llinestring = [];
-                    for i, point in lsegment:
-                        llinestring.append( "%f %f" % (point[1], point[0]) )
-                if right:
-                    rsegments.append( rsegment )
-                if left:
-                    lsegments.append( lsegment )
-                rtofromint = right        #Do the addresses convert to integers?
-                ltofromint = left        #Do the addresses convert to integers?
-                if right:
-                    try: rfromint = int(rfromadd)
-                    except:
-                        print("Non integer address: %s" % rfromadd)
-                        rtofromint = False
-                    try: rtoint = int(rtoadd)
-                    except:
-                        print("Non integer address: %s" % rtoadd)
-                        rtofromint = False
-                if left:
-                    try: lfromint = int(lfromadd)
-                    except:
-                        print("Non integer address: %s" % lfromadd)
-                        ltofromint = False
-                    try: ltoint = int(ltoadd)
-                    except:
-                        print("Non integer address: %s" % ltoadd)
-                        ltofromint = False
-                if right:
-                    id += 1
-
-                    interpolationtype = "all";
-                    if rtofromint:
-                        if (rfromint % 2) == 0 and (rtoint % 2) == 0:
-                            if separated == "Y":        #Doesn't matter if there is another side
-                                interpolationtype = "even";
-                            elif ltofromint and (lfromint % 2) == 1 and (ltoint % 2) == 1:
-                                interpolationtype = "even";
-                        elif (rfromint % 2) == 1 and (rtoint % 2) == 1:
-                            if separated == "Y":        #Doesn't matter if there is another side
-                                interpolationtype = "odd";
-                            elif ltofromint and (lfromint % 2) == 0 and (ltoint % 2) == 0:
-                                interpolationtype = "odd";
-
-                    ret.append( "SELECT tiger_line_import(ST_GeomFromText('LINESTRING(%s)',4326), %s, %s, %s, %s, %s, %s);" %
-                                ( ",".join(rlinestring), sql_quote(rfromadd), sql_quote(rtoadd), sql_quote(interpolationtype), sql_quote(name), sql_quote(county), sql_quote(zipr) ) )
-
-                if left:
-                    id += 1
-
-                    interpolationtype = "all";
-                    if ltofromint:
-                        if (lfromint % 2) == 0 and (ltoint % 2) == 0:
-                            if separated == "Y":
-                                interpolationtype = "even";
-                            elif rtofromint and (rfromint % 2) == 1 and (rtoint % 2) == 1:
-                                interpolationtype = "even";
-                        elif (lfromint % 2) == 1 and (ltoint % 2) == 1:
-                            if separated == "Y":
-                                interpolationtype = "odd";
-                            elif rtofromint and (rfromint %2 ) == 0 and (rtoint % 2) == 0:
-                                interpolationtype = "odd";
-
-                    ret.append( "SELECT tiger_line_import(ST_GeomFromText('LINESTRING(%s)',4326), %s, %s, %s, %s, %s, %s);" %
-                                ( ",".join(llinestring), sql_quote(lfromadd), sql_quote(ltoadd), sql_quote(interpolationtype), sql_quote(name), sql_quote(county), sql_quote(zipl) ) )
-
-    return ret
-
-def sql_quote( string ):
-    return "'" + string.replace("'", "''") + "'"
-
-def unproject( point ):
-    pt = tr.TransformPoint( point[0], point[1] )
-    return (pt[1], pt[0])
-
-def round_point( point, accuracy=8 ):
-    return tuple( [ round(x,accuracy) for x in point ] )
-
-def compile_nodelist( parsed_gisdata, first_id=1 ):
-    nodelist = {}
-    
-    i = first_id
-    for geom, tags in parsed_gisdata:
-        if len( geom )==0:
-            continue
-        
-        for point in geom:
-            r_point = round_point( point )
-            if r_point not in nodelist:
-                nodelist[ r_point ] = (i, unproject( point ))
-                i += 1
-            
-    return (i, nodelist)
-
-def adjacent( left, right ):
-    left_left = round_point(left[0])
-    left_right = round_point(left[-1])
-    right_left = round_point(right[0])
-    right_right = round_point(right[-1])
-    
-    return ( left_left == right_left or
-             left_left == right_right or
-             left_right == right_left or
-             left_right == right_right )
-             
-def glom( left, right ):
-    left = list( left )
-    right = list( right )
-    
-    left_left = round_point(left[0])
-    left_right = round_point(left[-1])
-    right_left = round_point(right[0])
-    right_right = round_point(right[-1])
-    
-    if left_left == right_left:
-        left.reverse()
-        return left[0:-1] + right
-        
-    if left_left == right_right:
-        return right[0:-1] + left
-        
-    if left_right == right_left:
-        return left[0:-1] + right
-        
-    if left_right == right_right:
-        right.reverse()
-        return left[0:-1] + right
-        
-    raise 'segments are not adjacent'
-
-def glom_once( segments ):
-    if len(segments)==0:
-        return segments
-    
-    unsorted = list( segments )
-    x = unsorted.pop(0)
-    
-    while len( unsorted ) > 0:
-        n = len( unsorted )
-        
-        for i in range(0, n):
-            y = unsorted[i]
-            if adjacent( x, y ):
-                y = unsorted.pop(i)
-                x = glom( x, y )
-                break
-                
-        # Sorted and unsorted lists have no adjacent segments
-        if len( unsorted ) == n:
-            break
-            
-    return x, unsorted
-    
-def glom_all( segments ):
-    unsorted = segments
-    chunks = []
-    
-    while unsorted != []:
-        chunk, unsorted = glom_once( unsorted )
-        chunks.append( chunk )
-        
-    return chunks
-        
-                
-
-def compile_waylist( parsed_gisdata ):
-    waylist = {}
-    
-    #Group by tiger:way_id
-    for geom, tags in parsed_gisdata:
-        way_key = tags.copy()
-        way_key = ( way_key['tiger:way_id'], tuple( [(k,v) for k,v in way_key.items()] ) )
-        
-        if way_key not in waylist:
-            waylist[way_key] = []
-            
-        waylist[way_key].append( geom )
-    
-    ret = {}
-    for (way_id, way_key), segments in waylist.items():
-        ret[way_key] = glom_all( segments )
-    return ret
-            
-
-def shape_to_sql( shp_filename, sql_filename ):
-    
-    print("parsing shpfile %s" % shp_filename)
-    parsed_features = parse_shp_for_geom_and_tags( shp_filename )
-    
-    print("compiling nodelist")
-    i, nodelist = compile_nodelist( parsed_features )
-    
-    print("compiling waylist")
-    waylist = compile_waylist( parsed_features )
-
-    print("preparing address ways")
-    sql_lines = addressways(waylist, nodelist, i)
-
-    print("writing %s" % sql_filename)
-    fp = open( sql_filename, "w" )
-    fp.write( "\n".join( sql_lines ) )
-    fp.close()
-    
-if __name__ == '__main__':
-    import sys, os.path
-    if len(sys.argv) < 3:
-        print("%s input.shp output.sql" % sys.argv[0])
-        sys.exit()
-    shp_filename = sys.argv[1]
-    sql_filename = sys.argv[2]
-    shape_to_sql(shp_filename, sql_filename)
--- a/data-sources/us-tiger/tiger_county_fips.json
+++ b/data-sources/us-tiger/tiger_county_fips.json
--- a/data-sources/wikipedia-wikidata/README.md
+++ b/data-sources/wikipedia-wikidata/README.md
@@ -1,58 +0,0 @@
-## Add Wikipedia and Wikidata to Nominatim
-
-OSM contributors frequently tag items with links to Wikipedia and Wikidata. Nominatim can use the page ranking of Wikipedia pages to help indicate the relative importance of osm features. This is done by calculating an importance score between 0 and 1 based on the number of inlinks to an article for a location. If two places have the same name and one is more important than the other, the wikipedia score often points to the correct place. 
-
-These scripts extract and prepare both Wikipedia page rank and Wikidata links for use in Nominatim.  
-
-#### Create a new postgres DB for Processing
-
-Due to the size of initial and intermediate tables, processing can be done in an external database:
-```
-CREATE DATABASE wikiprocessingdb;
-```
---
-Wikipedia
---  
-
-Processing these data requires a large amount of disk space (~1TB) and considerable time (>24 hours).
-
-#### Import & Process Wikipedia tables
-
-This step downloads and converts [Wikipedia](https://dumps.wikimedia.org/) page data SQL dumps to postgreSQL files which can be imported and processed with pagelink information from Wikipedia language sites to calculate importance scores.
-
- The script will processes data from whatever set of Wikipedia languages are specified in the initial languages array
-
- Note that processing the top 40 Wikipedia languages can take over a day, and will add nearly 1TB to the processing database. The final output tables will be approximately 11GB and 2GB in size
-
-To download, convert, and import the data, then process summary statistics and compute importance scores, run:
-```
-./wikipedia_import.sh
-```
---
-Wikidata
---
-
-This script downloads and processes Wikidata to enrich the previously created Wikipedia tables for use in Nominatim.
-
-#### Import & Process Wikidata
-
-This step downloads and converts [Wikidata](https://dumps.wikimedia.org/wikidatawiki/) page data SQL dumps to postgreSQL files which can be processed and imported into Nominatim database. Also utilizes Wikidata Query Service API to discover and include place types.
-
- Script presumes that the user has already processed Wikipedia tables as specified above
-
- Script requires wikidata_place_types.txt and wikidata_place_type_levles.csv
-
- script requires the [jq json parser](https://stedolan.github.io/jq/)
-
- Script processes data from whatever set of Wikipedia languages are specified in the initial languages array
-
- Script queries Wikidata Query Service API and imports all instances of place types listed in wikidata_place_types.txt
-
- Script updates wikipedia_articles table with extracted wikidata 
-
-By including Wikidata in the wikipedia_articles table, new connections can be made on the fly from the Nominatim placex table to wikipedia_article importance scores. 
-
-To download, convert, and import the data, then process required items, run:
-``` 
-./wikidata_import.sh
-```
--- a/data-sources/wikipedia-wikidata/import_wikidata.sh
+++ b/data-sources/wikipedia-wikidata/import_wikidata.sh
@@ -1,95 +0,0 @@
-#!/bin/bash
-
-psqlcmd() {
-     psql wikiprocessingdb
-}
-
-mysql2pgsqlcmd() {
-     ./mysql2pgsql.perl /dev/stdin /dev/stdout
-}
-
-
-# list the languages to process (refer to List of Wikipedias here: https://en.wikipedia.org/wiki/List_of_Wikipedias)
-
-language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
-
-
-# get a few wikidata dump tables
-
-wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-geo_tags.sql.gz
-wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-page.sql.gz
-wget https://dumps.wikimedia.org/wikidatawiki/latest/wikidatawiki-latest-wb_items_per_site.sql.gz
-
-
-# import wikidata tables
-
-gzip -dc wikidatawiki-latest-geo_tags.sql.gz | mysql2pgsqlcmd | psqlcmd
-gzip -dc wikidatawiki-latest-page.sql.gz | mysql2pgsqlcmd | psqlcmd
-gzip -dc wikidatawiki-latest-wb_items_per_site.sql.gz | mysql2pgsqlcmd | psqlcmd
-
-
-# get wikidata places from wikidata query API
-
-while read F  ; do
-    wget "https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query=SELECT ?item WHERE{?item wdt:P31*/wdt:P279*wd:$F;}" -O $F.json
-    jq -r '.results | .[] | .[] | [.item.value] | @csv' $F.json >> $F.txt
-    awk -v qid=$F '{print $0 ","qid}' $F.txt | sed -e 's!"http://www.wikidata.org/entity/!!' | sed 's/"//g' >> $F.csv
-    cat $F.csv >> wikidata_place_dump.csv
-    rm $F.json $F.txt $F.csv
-done < wikidata_place_types.txt
-
-
-# import wikidata places
-
-echo "CREATE TABLE wikidata_place_dump (item text, instance_of text);"  | psqlcmd
-echo "COPY wikidata_place_dump (item, instance_of) FROM  '/srv/nominatim/Nominatim/data-sources/wikipedia-wikidata/wikidata_place_dump.csv' DELIMITER ',' CSV;"  | psqlcmd
-
-echo "CREATE TABLE wikidata_place_type_levels (place_type text, level integer);" | psqlcmd
-echo "COPY wikidata_place_type_levels (place_type, level) FROM '/srv/nominatim/Nominatim/data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv' DELIMITER ',' CSV HEADER;" | psqlcmd
-
-
-# create derived tables
-
-echo "CREATE TABLE geo_earth_primary AS SELECT gt_page_id, gt_lat, gt_lon FROM geo_tags WHERE gt_globe = 'earth' AND gt_primary = 1 AND NOT( gt_lat < -90 OR gt_lat > 90 OR gt_lon < -180 OR gt_lon > 180 OR gt_lat=0 OR gt_lon=0) ;" | psqlcmd
-echo "CREATE TABLE geo_earth_wikidata AS SELECT DISTINCT geo_earth_primary.gt_page_id, geo_earth_primary.gt_lat, geo_earth_primary.gt_lon, page.page_title, page.page_namespace FROM geo_earth_primary LEFT OUTER JOIN page ON (geo_earth_primary.gt_page_id = page.page_id) ORDER BY geo_earth_primary.gt_page_id;" | psqlcmd
-
-echo "ALTER TABLE wikidata_place_dump ADD COLUMN ont_level integer, ADD COLUMN lat numeric(11,8), ADD COLUMN lon numeric(11,8);" | psqlcmd
-echo "UPDATE wikidata_place_dump SET ont_level = wikidata_place_type_levels.level FROM wikidata_place_type_levels WHERE wikidata_place_dump.instance_of = wikidata_place_type_levels.place_type;" | psqlcmd
-
-echo "CREATE TABLE wikidata_places AS SELECT DISTINCT ON (item) item, instance_of, MAX(ont_level) AS ont_level, lat, lon FROM wikidata_place_dump GROUP BY item, instance_of, ont_level, lat, lon ORDER BY item;" | psqlcmd
-echo "UPDATE wikidata_places SET lat = geo_earth_wikidata.gt_lat, lon = geo_earth_wikidata.gt_lon FROM geo_earth_wikidata WHERE wikidata_places.item = geo_earth_wikidata.page_title" | psqlcmd
-
-
-# process language pages 
-
-echo "CREATE TABLE wikidata_pages (item text, instance_of text, lat numeric(11,8), lon numeric(11,8), ips_site_page text, language text );" | psqlcmd
-
-for i in "${language[@]}"
-do
-   echo "CREATE TABLE wikidata_${i}_pages as select wikidata_places.item, wikidata_places.instance_of, wikidata_places.lat, wikidata_places.lon, wb_items_per_site.ips_site_page FROM wikidata_places LEFT JOIN wb_items_per_site ON (CAST (( LTRIM(wikidata_places.item, 'Q')) AS INTEGER) = wb_items_per_site.ips_item_id) WHERE ips_site_id = '${i}wiki' AND LEFT(wikidata_places.item,1) = 'Q' order by wikidata_places.item;" | psqlcmd
-   echo "ALTER TABLE wikidata_${i}_pages ADD COLUMN language text;" | psqlcmd
-   echo "UPDATE wikidata_${i}_pages SET language = '${i}';" | psqlcmd
-   echo "INSERT INTO wikidata_pages SELECT item, instance_of, lat, lon, ips_site_page, language FROM wikidata_${i}_pages;" | psqlcmd
-done
-
-echo "ALTER TABLE wikidata_pages ADD COLUMN wp_page_title text;" | psqlcmd
-echo "UPDATE wikidata_pages SET wp_page_title = REPLACE(ips_site_page, ' ', '_');" | psqlcmd
-echo "ALTER TABLE wikidata_pages DROP COLUMN ips_site_page;" | psqlcmd
-
-
-# add wikidata to wikipedia_article table
-
-echo "UPDATE wikipedia_article SET lat = wikidata_pages.lat, lon = wikidata_pages.lon, wd_page_title = wikidata_pages.item, instance_of = wikidata_pages.instance_of FROM wikidata_pages WHERE wikipedia_article.language = wikidata_pages.language AND wikipedia_article.title  = wikidata_pages.wp_page_title;" | psqlcmd
-echo "CREATE TABLE wikipedia_article_slim AS SELECT * FROM wikipedia_article WHERE wikidata_id IS NOT NULL;" | psqlcmd
-echo "ALTER TABLE wikipedia_article RENAME TO wikipedia_article_full;" | psqlcmd
-echo "ALTER TABLE wikipedia_article_slim RENAME TO wikipedia_article;" | psqlcmd
-
-
-# clean up intermediate tables
-
-echo "DROP TABLE wikidata_place_dump;" | psqlcmd
-echo "DROP TABLE geo_earth_primary;" | psqlcmd
-for i in "${language[@]}"
-do
-    echo "DROP TABLE wikidata_${i}_pages;" | psqlcmd
-done
--- a/data-sources/wikipedia-wikidata/import_wikipedia.sh
+++ b/data-sources/wikipedia-wikidata/import_wikipedia.sh
@@ -1,77 +0,0 @@
-#!/bin/bash
-
-psqlcmd() {
-     psql wikiprocessingdb
-}
-
-mysql2pgsqlcmd() {
-     ./mysql2pgsql.perl /dev/stdin /dev/stdout
-}
-
-
-# list the languages to process (refer to List of Wikipedias here: https://en.wikipedia.org/wiki/List_of_Wikipedias)
-
-language=( "ar" "bg" "ca" "cs" "da" "de" "en" "es" "eo" "eu" "fa" "fr" "ko" "hi" "hr" "id" "it" "he" "lt" "hu" "ms" "nl" "ja" "no" "pl" "pt" "kk" "ro" "ru" "sk" "sl" "sr" "fi" "sv" "tr" "uk" "vi" "vo" "war" "zh" )
-
-
-# create wikipedia calculation tables
-
-echo "CREATE TABLE linkcounts (language text, title text, count integer, sumcount integer, lat double precision, lon double precision);"  | psqlcmd
-echo "CREATE TABLE wikipedia_article (language text NOT NULL, title text NOT NULL, langcount integer, othercount integer, totalcount integer, lat double precision, lon double precision, importance double precision, title_en text, osm_type character(1), osm_id bigint );" | psqlcmd
-echo "CREATE TABLE wikipedia_redirect (language text, from_title text, to_title text );"  | psqlcmd
-
-
-# download individual wikipedia language tables
-
-for i in "${language[@]}"
-do
-    wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-page.sql.gz
-    wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-pagelinks.sql.gz
-    wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-langlinks.sql.gz
-    wget https://dumps.wikimedia.org/${i}wiki/latest/${i}wiki-latest-redirect.sql.gz
-done
-
-
-# import individual wikipedia language tables
-
-for i in "${language[@]}"
-do
-    gzip -dc ${i}wiki-latest-pagelinks.sql.gz | sed "s/\`pagelinks\`/\`${i}pagelinks\`/g" | mysql2pgsqlcmd | psqlcmd
-    gzip -dc ${i}wiki-latest-page.sql.gz | sed "s/\`page\`/\`${i}page\`/g" | mysql2pgsqlcmd | psqlcmd
-    gzip -dc ${i}wiki-latest-langlinks.sql.gz | sed "s/\`langlinks\`/\`${i}langlinks\`/g" | mysql2pgsqlcmd | psqlcmd
-    gzip -dc ${i}wiki-latest-redirect.sql.gz | sed "s/\`redirect\`/\`${i}redirect\`/g" | mysql2pgsqlcmd | psqlcmd
-done
-
-
-# process language tables and associated pagelink counts
-
-for i in "${language[@]}"
-do
-    echo "create table ${i}pagelinkcount as select pl_title as title,count(*) as count from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | psqlcmd
-    echo "insert into linkcounts select '${i}',pl_title,count(*) from ${i}pagelinks where pl_namespace = 0 group by pl_title;" | psqlcmd
-    echo "insert into wikipedia_redirect select '${i}',page_title,rd_title from ${i}redirect join ${i}page on (rd_from = page_id) where page_namespace = 0 and rd_namespace = 0;" | psqlcmd
-    echo "alter table ${i}pagelinkcount add column othercount integer;" | psqlcmd
-    echo "update ${i}pagelinkcount set othercount = 0;" | psqlcmd
-    for j in "${language[@]}"
-    do
-        echo "update ${i}pagelinkcount set othercount = ${i}pagelinkcount.othercount + x.count from (select page_title as title,count from ${i}langlinks join ${i}page on (ll_from = page_id) join ${j}pagelinkcount on (ll_lang = '${j}' and ll_title = title)) as x where x.title = ${i}pagelinkcount.title;" | psqlcmd
-    done
-    echo "insert into wikipedia_article select '${i}', title, count, othercount, count+othercount from ${i}pagelinkcount;" | psqlcmd
-done
-
-
-# calculate importance score for each wikipedia page
-
-echo "update wikipedia_article set importance = log(totalcount)/log((select max(totalcount) from wikipedia_article))" | psqlcmd
-
-
-# clean up intermediate tables to conserve space
-
-for i in "${language[@]}"
-do
-    echo "DROP TABLE ${i}pagelinks;" | psqlcmd
-    echo "DROP TABLE ${i}page;" | psqlcmd
-    echo "DROP TABLE ${i}langlinks;" | psqlcmd
-    echo "DROP TABLE ${i}redirect;" | psqlcmd
-    echo "DROP TABLE ${i}pagelinkcount;" | psqlcmd
-done
--- a/data-sources/wikipedia-wikidata/mysql2pgsql.perl
+++ b/data-sources/wikipedia-wikidata/mysql2pgsql.perl
@@ -1,951 +0,0 @@
-#!/usr/bin/perl -w
-# mysql2pgsql
-# MySQL to PostgreSQL dump file converter
-#
-# For usage: perl mysql2pgsql.perl --help
-#
-# ddl statments are changed but none or only minimal real data
-# formatting are done.
-# data consistency is up to the DBA.
-#
-# (c) 2004-2007 Jose M Duarte and Joseph Speigle ... gborg
-#
-# (c) 2000-2004 Maxim Rudensky  <fonin@omnistaronline.com>
-# (c) 2000 Valentine Danilchuk  <valdan@ziet.zhitomir.ua>
-# All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-# 1. Redistributions of source code must retain the above copyright
-#    notice, this list of conditions and the following disclaimer.
-# 2. Redistributions in binary form must reproduce the above copyright
-#    notice, this list of conditions and the following disclaimer in the
-#    documentation and/or other materials provided with the distribution.
-# 3. All advertising materials mentioning features or use of this software
-#    must display the following acknowledgement:
-# This product includes software developed by the Max Rudensky
-# and its contributors.
-# 4. Neither the name of the author nor the names of its contributors
-#    may be used to endorse or promote products derived from this software
-#    without specific prior written permission.
-# THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-# ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-# SUCH DAMAGE.
-
-use Getopt::Long;
-
-use POSIX;
-
-use strict;
-use warnings;
-
-
-# main sections
-# -------------
-# 1 variable declarations
-# 2 subroutines
-# 3 get commandline options and specify help statement
-# 4 loop through file and process
-# 5. print_plpgsql function prototype
-
-#################################################################
-#  1.  variable declarations
-#################################################################
-# command line options
-my( $ENC_IN, $ENC_OUT, $PRESERVE_CASE, $HELP, $DEBUG, $SCHEMA, $LOWERCASE, $CHAR2VARCHAR, $NODROP, $SEP_FILE, $opt_debug, $opt_help, $opt_schema, $opt_preserve_case, $opt_char2varchar, $opt_nodrop, $opt_sepfile, $opt_enc_in, $opt_enc_out );
-# variables for constructing pre-create-table entities
-my $pre_create_sql='';    # comments, 'enum' constraints preceding create table statement
-my $auto_increment_seq= '';    # so we can easily substitute it if we need a default value
-my $create_sql='';    # all the datatypes in the create table section
-my $post_create_sql='';   # create indexes, foreign keys, table comments
-my $function_create_sql = '';  # for the set (function,trigger) and CURRENT_TIMESTAMP ( function,trigger )
-#  constraints
-my ($type, $column_valuesStr, @column_values, $value );
-my %constraints=(); #  holds values constraints used to emulate mysql datatypes (e.g. year, set)
-# datatype conversion variables
-my ( $index,$seq);
-my ( $column_name, $col, $quoted_column);
-my ( @year_holder, $year, $constraint_table_name);
-my $table="";   # table_name for create sql statements
-my $table_no_quotes="";   # table_name for create sql statements
-my $sl = '^\s+\w+\s+';  # matches the column name
-my $tables_first_timestamp_column= 1;  #  decision to print warnings about default_timestamp not being in postgres
-my $mysql_numeric_datatypes = "TINYINT|SMALLINT|MEDIUMINT|INT|INTEGER|BIGINT|REAL|DOUBLE|FLOAT|DECIMAL|NUMERIC";
-my $mysql_datetime_datatypes = "|DATE|TIME|TIMESTAMP|DATETIME|YEAR";
-my $mysql_text_datatypes = "CHAR|VARCHAR|BINARY|VARBINARY|TINYBLOB|BLOB|MEDIUMBLOB|LONGBLOB|TINYTEXT|TEXT|MEDIUMTEXT|LONGTEXT|ENUM|SET";
-my $mysql_datatypesStr =  $mysql_numeric_datatypes . "|". $mysql_datetime_datatypes . "|". $mysql_text_datatypes ;
-# handling INSERT INTO statements
-my $rowRe = qr{
-    \(                  # opening parens
-        (               #  (start capture)
-            (?:         #  (start group)
-            '           # string start
-                [^'\\]*     # up to string-end or backslash (escape)
-                (?:     #  (start group)
-                \\.     # gobble escaped character
-                [^'\\]*     # up to string-end of backslash
-                )*      #  (end group, repeat zero or more)
-            '           # string end
-            |           #  (OR)
-            .*?         # everything else (not strings)
-            )*          #  (end group, repeat zero or more)
-        )               #  (end capture)
-    \)                  # closing parent
-}x;
-
-my ($insert_table, $valueString);
-#
-########################################################
-# 2.  subroutines
-#
-# get_identifier
-# print_post_create_sql()
-# quote_and_lc()
-# make_plpgsql($table,$column_name) -- at end of file
-########################################################
-
-# returns an identifier with the given suffix doing controlled
-# truncation if necessary
-sub get_identifier($$$) {
-    my ($table, $col, $suffix) = @_;
-    my $name = '';
-    $table=~s/\"//g; # make sure that $table doesn't have quotes so we don't end up with redundant quoting
-    # in the case of multiple columns
-    my @cols = split(/,/,$col);
-    $col =~ s/,//g;
-    # in case all columns together too long we have to truncate them
-    if (length($col) > 55) {
-        my $totaltocut = length($col)-55;
-        my $tocut = ceil($totaltocut / @cols);
-        @cols = map {substr($_,0,abs(length($_)-$tocut))} @cols;
-        $col="";
-        foreach (@cols){
-            $col.=$_;
-        }
-    }
-
-    my $max_table_length = 63 - length("_${col}_$suffix");
-
-    if (length($table) > $max_table_length) {
-        $table = substr($table, length($table) - $max_table_length, $max_table_length);
-    }
-    return quote_and_lc("${table}_${col}_${suffix}");
-}
-
-
-#
-#
-# called when we encounter next CREATE TABLE statement
-# also called at EOF to print out for last table
-# prints comments, indexes, foreign key constraints (the latter 2 possibly to a separate file)
-sub print_post_create_sql() {
-    my ( @create_idx_comments_constraints_commandsArr, $stmts, $table_field_combination);
-    my %stmts;
-    # loop to check for duplicates in $post_create_sql
-    # Needed because of duplicate key declarations ( PRIMARY KEY and KEY), auto_increment columns
-
-    @create_idx_comments_constraints_commandsArr = split(';\n?', $post_create_sql);
-    if ($SEP_FILE) {
-        open(SEP_FILE, ">>:encoding($ENC_OUT)", $SEP_FILE) or die "Unable to open $SEP_FILE for output: $!\n";
-    }
-
-    foreach (@create_idx_comments_constraints_commandsArr) {
-        if (m/CREATE INDEX "*(\S+)"*\s/i) {  #  CREATE INDEX korean_english_wordsize_idx ON korean_english USING btree  (wordsize);
-            $table_field_combination =  $1;
-            # if this particular table_field_combination was already used do not print the statement:
-            if ($SEP_FILE) {
-                print SEP_FILE "$_;\n" if !defined($stmts{$table_field_combination});
-            } else {
-                print OUT "$_;\n" if !defined($stmts{$table_field_combination});
-            }
-            $stmts{$table_field_combination} = 1;
-        }
-        elsif (m/COMMENT/i) {  # COMMENT ON object IS 'text'; but comment may be part of table name so use 'elsif'
-            print OUT "$_;\n"
-        } else {  # foreign key constraint  or comments (those preceded by -- )
-            if ($SEP_FILE) {
-                print SEP_FILE "$_;\n";
-            } else {
-                print OUT "$_;\n"
-            }
-        }
-    }
-
-    if ($SEP_FILE) {
-        close SEP_FILE;
-    }
-    $post_create_sql='';
-    # empty %constraints for next " create table" statement
-}
-
-# quotes a string or a multicolumn string (comma separated)
-# and optionally lowercase (if LOWERCASE is set)
-# lowercase .... if user wants default postgres behavior
-# quotes .... to preserve keywords and to preserve case when case-sensitive tables are to be used
-sub quote_and_lc($)
-{
-    my $col = shift;
-    if ($LOWERCASE) {
-        $col = lc($col);
-    }
-    if ($col =~ m/,/) {
-        my @cols = split(/,\s?/, $col);
-        @cols = map {"\"$_\""} @cols;
-        return join(', ', @cols);
-    } else {
-        return "\"$col\"";
-    }
-}
-
-########################################################
-# 3.  get commandline options and maybe print help
-########################################################
-
-GetOptions("help", "debug"=> \$opt_debug, "schema=s" => \$SCHEMA, "preserve_case" => \$opt_preserve_case, "char2varchar" => \$opt_char2varchar, "nodrop" => \$opt_nodrop, "sepfile=s" => \$opt_sepfile, "enc_in=s" => \$opt_enc_in, "enc_out=s" => \$opt_enc_out );
-
-$HELP = $opt_help || 0;
-$DEBUG = $opt_debug || 0;
-$PRESERVE_CASE = $opt_preserve_case || 0;
-if ($PRESERVE_CASE == 1) { $LOWERCASE = 0; }
-else { $LOWERCASE = 1; }
-$CHAR2VARCHAR = $opt_char2varchar || 0;
-$NODROP = $opt_nodrop || 0;
-$SEP_FILE = $opt_sepfile || 0;
-$ENC_IN = $opt_enc_in || 'utf8';
-$ENC_OUT = $opt_enc_out || 'utf8';
-
-if (($HELP) || ! defined($ARGV[0]) || ! defined($ARGV[1])) {
-    print "\n\nUsage: perl $0 {--help --debug --preserve_case --char2varchar --nodrop --schema --sepfile --enc_in --enc_out } mysql.sql pg.sql\n";
-    print "\t* OPTIONS WITHOUT ARGS\n";
-    print "\t--help:  prints this message \n";
-    print "\t--debug: output the commented-out mysql line above the postgres line in pg.sql \n";
-    print "\t--preserve_case: prevents automatic case-lowering of column and table names\n";
-    print "\t\tIf you want to preserve case, you must set this flag. For example,\n";
-    print "\t\tIf your client application quotes table and column-names and they have cases in them, set this flag\n";
-    print "\t--char2varchar: converts all char fields to varchar\n";
-    print "\t--nodrop: strips out DROP TABLE statements\n";
-    print "\t\totherise harmless warnings are printed by psql when the dropped table does not exist\n";
-    print "\n\t* OPTIONS WITH ARGS\n";
-    print "\t--schema: outputs a line into the postgres sql file setting search_path \n";
-    print "\t--sepfile: output foreign key constraints and indexes to a separate file so that it can be\n";
-    print "\t\timported after large data set is inserted from another dump file\n";
-    print "\t--enc_in: encoding of mysql in file (default utf8) \n";
-    print "\t--enc_out: encoding of postgres out file (default utf8) \n";
-    print "\n\t* REQUIRED ARGUMENTS\n";
-    if (defined ($ARGV[0])) {
-        print "\tmysql.sql ($ARGV[0])\n";
-    } else {
-        print "\tmysql.sql (undefined)\n";
-    }
-    if (defined ($ARGV[1])) {
-        print "\tpg.sql ($ARGV[1])\n";
-    } else {
-        print "\tpg.sql (undefined)\n";
-    }
-    print "\n";
-    exit 1;
-}
-########################################################
-# 4.  process through mysql_dump.sql file
-# in a big loop
-########################################################
-
-# open in and out files
-open(IN,"<:encoding($ENC_IN)", $ARGV[0]) || die "can't open mysql dump file $ARGV[0]";
-open(OUT,">:encoding($ENC_OUT)", $ARGV[1]) || die "can't open pg dump file $ARGV[1]";
-
-# output header
-print OUT "--\n";
-print OUT "-- Generated from mysql2pgsql.perl\n";
-print OUT "-- http://gborg.postgresql.org/project/mysql2psql/\n";
-print OUT "-- (c) 2001 - 2007 Jose M. Duarte, Joseph Speigle\n";
-print OUT "--\n";
-print OUT "\n";
-print OUT "-- warnings are printed for drop tables if they do not exist\n";
-print OUT "-- please see http://archives.postgresql.org/pgsql-novice/2004-10/msg00158.php\n\n";
-print OUT "-- ##############################################################\n";
-
-if ($SCHEMA ) {
-    print OUT "set search_path='" . $SCHEMA . "'\\g\n" ;
-}
-
-# loop through mysql file  on a per-line basis
-while(<IN>) {
-
-##############     flow     #########################
-# (the lines are directed to different string variables at different times)
-#
-# handle drop table , unlock, connect statements
-# if ( start of create table)   {
-#   print out post_create table (indexes, foreign key constraints, comments from previous table)
-#   add drop table statement if !$NODROP to pre_create_sql
-#   next;
-# }
-# else if ( inside create table) {
-#   add comments in this portion to create_sql
-#   if ( end of create table) {
-#      delete mysql-unique CREATE TABLE commands
-#      print pre_create_sql
-#      print the constraint tables for set and year datatypes
-#      print create_sql
-#      print function_create_sql (this is for the enum columns only)
-#      next;
-#   }
-#   do substitutions
-#    -- NUMERIC DATATYPES
-#    -- CHARACTER DATATYPES
-#    -- DATE AND TIME DATATYPES
-#    -- KEY AND UNIQUE CREATIONS
-#    and append them to create_sql
-# } else {
-#   print inserts on-the-spot (this script only changes default timestamp of 0000-00-00)
-# }
-# LOOP until EOF
-#
-########################################################
-
-
-if (!/^\s*insert into/i) { # not inside create table so don't worry about data corruption
-    s/`//g;  #  '`pgsql uses no backticks to denote table name (CREATE TABLE `sd`) or around field
-            # and table names like  mysql
-            # doh!  we hope all dashes and special chars are caught by the regular expressions :)
-}
-if (/^\s*USE\s*([^;]*);/) {
-    print OUT "\\c ". $1;
-    next;
-}
-if (/^(UN)?LOCK TABLES/i  || /drop\s+table/i ) {
-
-    # skip
-    # DROP TABLE is added when we see the CREATE TABLE
-    next;
-}
-if (/(create\s+table\s+)([-_\w]+)\s/i) { #  example: CREATE TABLE `english_english`
-    print_post_create_sql();   # for last table
-    $tables_first_timestamp_column= 1;  #  decision to print warnings about default_timestamp not being in postgres
-    $create_sql = '';
-    $table_no_quotes = $2 ;
-    $table=quote_and_lc($2);
-    if ( !$NODROP )  {  # always print drop table if user doesn't explicitly say not to
-        #  to drop a table that is referenced by a view or a foreign-key constraint of another table,
-        #  CASCADE must be specified. (CASCADE will remove a dependent view entirely, but in the
-        #  in the foreign-key case it will only remove the foreign-key constraint, not the other table entirely.)
-        #  (source: 8.1.3 docs, section "drop table")
-        warn "table $table will be dropped CASCADE\n";
-        $pre_create_sql .= "DROP TABLE $table CASCADE;\n";    # custom dumps may be missing the 'dump' commands
-    }
-
-    s/(create\s+table\s+)([-_\w]+)\s/$1 $table /i;
-    if ($DEBUG) {
-        $create_sql .=  '-- ' . $_;
-    }
-    $create_sql .= $_;
-    next;
-}
-if ($create_sql ne "") {         # we are inside create table statement so lets process datatypes
-    # print out comments or empty lines in context
-    if ($DEBUG) {
-        $create_sql .=  '-- ' . $_;
-    }
-    if (/^#/ || /^$/ || /^\s*--/) {
-        s/^#/--/;   #  Two hyphens (--) is the SQL-92 standard indicator for comments
-        $create_sql.=$_;
-        next;
-    }
-
-    if (/\).*;/i) {    # end of create table squence
-
-        s/INSERT METHOD[=\s+][^;\s]+//i;
-        s/PASSWORD=[^;\s]+//i;
-        s/ROW_FORMAT=(?:DEFAULT|DYNAMIC|FIXED|COMPRESSED|REDUNDANT|COMPACT)+//i;
-        s/KEY_BLOCK_SIZE=8//i;
-        s/DELAY KEY WRITE=[^;\s]+//i;
-        s/INDEX DIRECTORY[=\s+][^;\s]+//i;
-        s/DATA DIRECTORY=[^;\s]+//i;
-        s/CONNECTION=[^;\s]+//i;
-        s/CHECKSUM=[^;\s]+//i;
-        s/Type=[^;\s]+//i; # ISAM ,   # older versions
-        s/COLLATE=[^;\s]+//i;         # table's collate
-        s/COLLATE\s+[^;\s]+//i;         # table's collate
-        # possible AUTO_INCREMENT starting index, it is used in mysql 5.0.26, not sure since which version
-        if (/AUTO_INCREMENT=(\d+)/i) {
-        # should take < ----  ) ENGINE=MyISAM AUTO_INCREMENT=16 DEFAULT CHARSET=latin1;
-        # and should ouput --->  CREATE SEQUENCE "rhm_host_info_id_seq" START WITH 16;
-        my $start_value = $1;
-        print $auto_increment_seq . "--\n";
-        # print $pre_create_sql . "--\n";
-        $pre_create_sql =~ s/(CREATE SEQUENCE $auto_increment_seq )/$1 START WITH $start_value /;
-    }
-        s/AUTO_INCREMENT=\d+//i;
-        s/PACK_KEYS=\d//i;            # mysql 5.0.22
-        s/DEFAULT CHARSET=[^;\s]+//i; #  my mysql version is 4.1.11
-        s/ENGINE\s*=\s*[^;\s]+//i;   #  my mysql version is 4.1.11
-        s/ROW_FORMAT=[^;\s]+//i;   #  my mysql version is 5.0.22
-        s/KEY_BLOCK_SIZE=8//i; 
-        s/MIN_ROWS=[^;\s]+//i;
-        s/MAX_ROWS=[^;\s]+//i;
-        s/AVG_ROW_LENGTH=[^;\s]+//i;
-        if (/COMMENT='([^']*)'/) {  # ) ENGINE=MyISAM DEFAULT CHARSET=utf8 COMMENT='must be country zones';
-            $post_create_sql.="COMMENT ON TABLE $table IS '$1'\;"; # COMMENT ON table_name IS 'text';
-            s/COMMENT='[^']*'//i;
-        }
-        $create_sql =~ s/,$//g;    # strip last , inside create table
-        # make sure we end in a comma, as KEY statments are turned
-        # into post_create_sql indices
-        # they often are the last line so leaving a 'hanging comma'
-        my @array = split("\n", $create_sql);
-        for (my $a = $#array; $a >= 0; $a--) {  #loop backwards
-            if ($a == $#array  && $array[$a] =~ m/,\s*$/) {    # for last line
-                $array[$a] =~ s/,\s*$//;
-                next;
-            }
-            if ($array[$a] !~ m/create table/i) {  # i.e. if there was more than one column in table
-                if ($a != $#array  && $array[$a] !~ m/,\s*$/  ) {  # for second to last
-                    $array[$a] =~ s/$/,/;
-                    last;
-                }
-                elsif ($a != $#array  && $array[$a] =~ m/,\s*$/ ) {  # for second to last
-                    last;
-                }
-            }
-        }
-        $create_sql = join("\n", @array) . "\n";
-        $create_sql .=  $_;
-
-        # put comments out first
-        print OUT $pre_create_sql;
-
-        # create separate table to reference and to hold mysql's possible set data-type
-        # values.  do that table's creation before create table
-        # definition
-        foreach $column_name (keys %constraints) {
-            $type=$constraints{$column_name}{'type'};
-            $column_valuesStr = $constraints{$column_name}{'values'};
-            $constraint_table_name = get_identifier(${table},${column_name} ,"constraint_table");
-            if ($type eq 'set') {
-                print OUT qq~DROP TABLE $constraint_table_name  CASCADE\\g\n~ ;
-                print OUT qq~create table $constraint_table_name  ( set_values varchar UNIQUE)\\g\n~ ;
-                $function_create_sql .= make_plpgsql($table,$column_name);
-            } elsif ($type eq 'year')  {
-                print OUT qq~DROP TABLE $constraint_table_name  CASCADE\\g\n~ ;
-                print OUT qq~create table $constraint_table_name  ( year_values varchar UNIQUE)\\g\n~ ;
-            }
-            @column_values = split /,/, $column_valuesStr;
-            foreach $value (@column_values) {
-                print OUT qq~insert into $constraint_table_name   values (  $value  )\\g\n~; # ad ' for ints and varchars
-            }
-        }
-
-        $create_sql =~ s/double double/double precision/g;
-
-        # print create table and reset create table vars
-        # when moving from each "create table" to "insert" part of dump
-        print OUT $create_sql;
-        print OUT $function_create_sql;
-        $pre_create_sql="";
-        $auto_increment_seq="";
-        $create_sql="";
-        $function_create_sql='';
-        %constraints=();
-        # the post_create_sql for this table is output at the beginning of the next table def
-        # in case we want to make indexes after doing inserting
-        next;
-    }
-    if (/^\s*(\w+)\s+.*COMMENT\s*'([^']*)'/) {  #`zone_country_id` int(11) COMMENT 'column comment here',
-        $quoted_column=quote_and_lc($1);
-        $post_create_sql.="COMMENT ON COLUMN $table"."."." $quoted_column IS '$2'\;"; # COMMENT ON table_name.column_name IS 'text';
-        s/COMMENT\s*'[^']*'//i;
-    }
-
-
-    # NUMERIC DATATYPES
-    #
-    # auto_increment -> sequences
-    # UNSIGNED conversions
-    # TINYINT
-    # SMALLINT
-    # MEDIUMINT
-    # INT, INTEGER
-    # BIGINT
-    #
-    # DOUBLE [PRECISION], REAL
-    # DECIMAL(M,D), NUMERIC(M,D)
-    # FLOAT(p)
-    # FLOAT
-
-    s/(\w*int)\(\d+\)/$1/g;  # hack of the (n) stuff for e.g. mediumint(2) int(3)
-
-    if (/^(\s*)(\w+)\s*.*numeric.*auto_increment/i) {         # int,auto_increment -> serial
-        $seq = get_identifier($table, $2, 'seq');
-        $quoted_column=quote_and_lc($2);
-        # Smash datatype to int8 and autogenerate the sequence.
-        s/^(\s*)(\w+)\s*.*NUMERIC(.*)auto_increment([^,]*)/$1 $quoted_column serial8 $4/ig;
-        $create_sql.=$_;
-        next;
-    }
-    if (/^\s*(\w+)\s+.*int.*auto_increment/i) {  #  example: data_id mediumint(8) unsigned NOT NULL auto_increment,
-        $seq = get_identifier($table, $1, 'seq');
-        $quoted_column=quote_and_lc($1);
-        s/(\s*)(\w+)\s+.*int.*auto_increment([^,]*)/$1 $quoted_column serial8 $3/ig;
-        $create_sql.=$_;
-        next;
-    }
-
-
-
-
-    # convert UNSIGNED to CHECK constraints
-    if (m/^(\s*)(\w+)\s+((float|double precision|double|real|decimal|numeric))(.*)unsigned/i) {
-        $quoted_column = quote_and_lc($2);
-        s/^(\s*)(\w+)\s+((float|double precision|double|real|decimal|numeric))(.*)unsigned/$1 $quoted_column $3 $4 CHECK ($quoted_column >= 0)/i;
-    }
-    # example:  `wordsize` tinyint(3) unsigned default NULL,
-    if (m/^(\s+)(\w+)\s+(\w+)\s+unsigned/i) {
-        $quoted_column=quote_and_lc($2);
-        s/^(\s+)(\w+)\s+(\w+)\s+unsigned/$1 $quoted_column $3 CHECK ($quoted_column >= 0)/i;
-    }
-    if (m/^(\s*)(\w+)\s+(bigint.*)unsigned/) {
-        $quoted_column=quote_and_lc($2);
-        #  see http://archives.postgresql.org/pgsql-general/2005-07/msg01178.php
-        #  and see http://www.postgresql.org/docs/8.2/interactive/datatype-numeric.html
-        # see  http://dev.mysql.com/doc/refman/5.1/en/numeric-types.html  max size == 20 digits
-        s/^(\s*)(\w+)\s+bigint(.*)unsigned/$1 $quoted_column NUMERIC (20,0) CHECK ($quoted_column >= 0)/i;
-
-    }
-
-    # int type conversion
-    # TINYINT    (signed) -128 to 127 (unsigned) 0   255
-    #  SMALLINT A small integer. The signed range is -32768 to 32767. The unsigned range is 0 to 65535.
-    #  MEDIUMINT  A medium-sized integer. The signed range is -8388608 to 8388607. The unsigned range is 0 to 16777215.
-    #  INT A normal-size integer. The signed range is -2147483648 to 2147483647. The unsigned range is 0 to 4294967295.
-    # BIGINT The signed range is -9223372036854775808 to 9223372036854775807. The unsigned range is 0 to 18446744073709551615
-    # for postgres see http://www.postgresql.org/docs/8.2/static/datatype-numeric.html#DATATYPE-INT
-    s/^(\s+"*\w+"*\s+)tinyint/$1 smallint/i;
-    s/^(\s+"*\w+"*\s+)mediumint/$1 integer/i;
-
-    # the floating point types
-    #   double -> double precision
-    #   double(n,m) -> double precision
-    #   float - no need for conversion
-    #   float(n) - no need for conversion
-    #   float(n,m) -> double precision
-
-    s/(^\s*\w+\s+)double(\(\d+,\d+\))?/$1float/i;
-    s/float(\(\d+,\d+\))/float/i;
-
-    #
-    # CHARACTER TYPES
-    #
-    # set
-    # enum
-    # binary(M), VARBINARy(M), tinyblob, tinytext,
-    # bit
-    # char(M), varchar(M)
-    # blob -> text
-    # mediumblob
-    # longblob, longtext
-    # text -> text
-    # mediumtext
-    # longtext
-    #  mysql docs: A BLOB is a binary large object that can hold a variable amount of data.
-
-    # set
-    # For example, a column specified as SET('one', 'two') NOT NULL can have any of these values:
-    # ''
-    # 'one'
-    # 'two'
-    # 'one,two'
-    if (/(\w*)\s+set\(((?:['"]\w+['"]\s*,*)+(?:['"]\w+['"])*)\)(.*)$/i) { # example:  `au_auth` set('r','w','d') NOT NULL default '',
-        $column_name = $1;
-        $constraints{$column_name}{'values'} = $2;  # 'abc','def', ...
-        $constraints{$column_name}{'type'} = "set";  # 'abc','def', ...
-        $_ =  qq~ $column_name varchar , ~;
-        $column_name = quote_and_lc($1);
-        $create_sql.=$_;
-        next;
-
-    }
-    if (/(\S*)\s+enum\(((?:['"][^'"]+['"]\s*,)+['"][^'"]+['"])\)(.*)$/i) { # enum handling
-        #  example:  `test` enum('?','+','-') NOT NULL default '?'
-        # $2  is the values of the enum 'abc','def', ...
-        $quoted_column=quote_and_lc($1);
-        #  "test" NOT NULL default '?' CONSTRAINT test_test_constraint CHECK ("test" IN ('?','+','-'))
-        $_ = qq~ $quoted_column varchar CHECK ($quoted_column IN ( $2 ))$3\n~;  # just assume varchar?
-        $create_sql.=$_;
-        next;
-    }
-    # Take care of "binary" option for char and varchar
-    # (pre-4.1.2, it indicated a byte array; from 4.1.2, indicates
-    # a binary collation)
-    s/(?:var)?char(?:\(\d+\))? (?:byte|binary)/text/i;
-    if (m/(?:var)?binary\s*\(\d+\)/i) {   #  c varBINARY(3) in Mysql
-        warn "WARNING in table '$table' '$_':  binary type is converted to bytea (unsized) for Postgres\n";
-    }
-    s/(?:var)?binary(?:\(\d+\))?/text/i;   #  c varBINARY(3) in Mysql
-    s/bit(?:\(\d+\))?/bytea/i;   #  bit datatype -> bytea
-
-    # large datatypes
-    s/\w*blob/bytea/gi;
-    s/tinytext/text/gi;
-    s/mediumtext/text/gi;
-    s/longtext/text/gi;
-
-    # char -> varchar -- if specified as a command line option
-    # PostgreSQL would otherwise pad with spaces as opposed
-    # to MySQL! Your user interface may depend on this!
-    if ($CHAR2VARCHAR) {
-        s/(^\s+\S+\s+)char/${1}varchar/gi;
-    }
-
-    # nuke column's collate and character set
-    s/(\S+)\s+character\s+set\s+\w+/$1/gi;
-    s/(\S+)\s+collate\s+\w+/$1/gi;
-
-    #
-    # DATE AND TIME TYPES
-    #
-    # date  time
-    # year
-    # datetime
-    # timestamp
-
-    # date  time
-    # these are the same types in postgres, just do the replacement of 0000-00-00 date
-
-    if (m/default '(\d+)-(\d+)-(\d+)([^']*)'/i) { # we grab the year, month and day
-        # NOTE: times of 00:00:00 are possible and are okay
-        my $time = '';
-        my $year=$1;
-        my $month= $2;
-        my $day = $3;
-        if ($4) {
-            $time = $4;
-        }
-        if ($year eq "0000") { $year = '1970'; }
-        if ($month eq "00") { $month = '01'; }
-        if ($day eq "00") { $day = '01'; }
-        s/default '[^']+'/default '$year-$month-$day$time'/i; # finally we replace with $datetime
-    }
-
-    # convert mysql's year datatype to a constraint
-    if (/(\w*)\s+year\(4\)(.*)$/i) { # can be integer OR string 1901-2155
-        $constraint_table_name = get_identifier($table,$1 ,"constraint_table");
-        $column_name=quote_and_lc($1);
-        @year_holder = ();
-        $year='';
-        for (1901 .. 2155) {
-                $year = "'$_'";
-            unless ($year =~ /2155/) { $year .= ','; }
-             push( @year_holder, $year);
-        }
-        $constraints{$column_name}{'values'} = join('','',@year_holder);   # '1901','1902', ...
-        $constraints{$column_name}{'type'} = "year";
-        $_ =  qq~ $column_name varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES $constraint_table_name ("year_values") $2\n~;
-        $create_sql.=$_;
-        next;
-    } elsif (/(\w*)\s+year\(2\)(.*)$/i) { # same for a 2-integer string
-        $constraint_table_name = get_identifier($table,$1 ,"constraint_table");
-        $column_name=quote_and_lc($1);
-        @year_holder = ();
-        $year='';
-        for (1970 .. 2069) {
-            $year = "'$_'";
-            if ($year =~ /2069/) { next; }
-            push( @year_holder, $year);
-        }
-        push( @year_holder, '0000');
-        $constraints{$column_name}{'values'} = join(',',@year_holder);   # '1971','1972', ...
-        $constraints{$column_name}{'type'} = "year";  # 'abc','def', ...
-        $_ =  qq~ $1 varchar CONSTRAINT ${table}_${column_name}_constraint REFERENCES $constraint_table_name ("year_values") $2\n~;
-        $create_sql.=$_;
-        next;
-    }
-
-    # datetime
-    # Default on a dump from MySQL 5.0.22 is in the same form as datetime so let it flow down
-    # to the timestamp section and deal with it there
-    s/(${sl})datetime /$1timestamp without time zone /i;
-
-    # change not null datetime field to null valid ones
-    # (to support remapping of "zero time" to null
-    # s/($sl)datetime not null/$1timestamp without time zone/i;
-
-
-    # timestamps
-    #
-    # nuke datetime representation (not supported in PostgreSQL)
-    # change default time of 0000-00-00 to 1970-01-01
-
-    # we may possibly need to create a trigger to provide
-    # equal functionality with ON UPDATE CURRENT TIMESTAMP
-
-
-    if (m/${sl}timestamp/i) {
-        if ( m/ON UPDATE CURRENT_TIMESTAMP/i )  {  # the ... default CURRENT_TIMESTAMP  only applies for blank inserts, not updates
-            s/ON UPDATE CURRENT_TIMESTAMP//i ;
-            m/^\s*(\w+)\s+timestamp/i ;
-            # automatic trigger creation
-            $table_no_quotes =~ s/"//g;
-$function_create_sql .= " CREATE OR REPLACE FUNCTION update_". $table_no_quotes . "() RETURNS trigger AS '
-BEGIN
-    NEW.$1 := CURRENT_TIMESTAMP;
-    RETURN NEW;
-END;
-' LANGUAGE 'plpgsql';
-
-- before INSERT is handled by 'default CURRENT_TIMESTAMP'
-CREATE TRIGGER add_current_date_to_".$table_no_quotes." BEFORE UPDATE ON ". $table . " FOR EACH ROW EXECUTE PROCEDURE
-update_".$table_no_quotes."();\n";
-
-        }
-        if ($tables_first_timestamp_column && m/DEFAULT NULL/i) {
-            # DEFAULT NULL is the same as DEFAULT CURRENT_TIMESTAMP for the first TIMESTAMP  column. (MYSQL manual)
-            s/($sl)(timestamp\s+)default null/$1 $2 DEFAULT CURRENT_TIMESTAMP/i;
-        }
-        $tables_first_timestamp_column= 0;
-        if (m/${sl}timestamp\s*\(\d+\)/i) {   # fix for timestamps with width spec not handled (ID: 1628)
-            warn "WARNING for in table '$table' '$_': your default timestamp width is being ignored for table $table \n";
-            s/($sl)timestamp(?:\(\d+\))/$1datetime/i;
-        }
-    } # end timestamp section
-
-    # KEY AND UNIQUE CREATIONS
-    #
-    # unique
-    if ( /^\s+unique\s+\(([^(]+)\)/i ) { #  example    UNIQUE `name` (`name`), same as UNIQUE KEY
-        #  POSTGRESQL:  treat same as mysql unique
-        $quoted_column = quote_and_lc($1);
-        s/\s+unique\s+\(([^(]+)\)/ unique ($quoted_column) /i;
-            $create_sql.=$_;
-        next;
-        } elsif ( /^\s+unique\s+key\s*(\w+)\s*\(([^(]+)\)/i ) { #  example    UNIQUE KEY `name` (`name`)
-            #  MYSQL: unique  key: allows null=YES, allows duplicates=NO (*)
-            #  ... new ... UNIQUE KEY `unique_fullname` (`fullname`)  in my mysql v. Ver 14.12 Distrib 5.1.7-beta
-            #  POSTGRESQL:  treat same as mysql unique
-        # just quote columns
-        $quoted_column = quote_and_lc($2);
-            s/\s+unique\s+key\s*(\w+)\s*\(([^(]+)\)/ unique ($quoted_column) /i;
-            $create_sql.=$_;
-        # the index corresponding to the 'key' is automatically created
-            next;
-    }
-    # keys
-    if ( /^\s+fulltext key\s+/i) { # example:  FULLTEXT KEY `commenttext` (`commenttext`)
-    # that is key as a word in the first check for a match
-        # the tsvector datatype is made for these types of things
-        # example mysql file:
-        #  what is tsvector datatype?
-        #  http://www.sai.msu.su/~megera/postgres/gist/tsearch/V2/docs/tsearch-V2-intro.html
-        warn "dba must do fulltext key transformation for $table\n";
-        next;
-    }
-    if ( /^(\s+)constraint (\S+) foreign key \((\S+)\) references (\S+) \((\S+)\)(.*)/i ) {
-        $quoted_column =quote_and_lc($3);
-        $col=quote_and_lc($5);
-        $post_create_sql .= "ALTER TABLE $table ADD FOREIGN KEY ($quoted_column) REFERENCES " . quote_and_lc($4) . " ($col);\n";
-        next;
-    }
-    if ( /^\s*primary key\s*\(([^)]+)\)([,\s]+)/i ) { #  example    PRIMARY KEY (`name`)
-        # MYSQL: primary key: allows null=NO , allows duplicates=NO
-        #  POSTGRESQL: When an index is declared unique, multiple table rows with equal indexed values will not be
-        #       allowed. Null values are not considered equal.
-        #  POSTGRESQL quote's source: 8.1.3 docs section 11.5 "unique indexes"
-        #  so, in postgres, we need to add a NOT NULL to the UNIQUE constraint
-        # and, primary key (mysql) == primary key (postgres) so that we *really* don't need change anything
-        $quoted_column = quote_and_lc($1);
-        s/(\s*)primary key\s+\(([^)]+)\)([,\s]+)/$1 primary key ($quoted_column)$3/i;
-        # indexes are automatically created for unique columns
-        $create_sql.=$_;
-        next;
-    } elsif (m/^\s+key\s[-_\s\w]+\((.+)\)/i    ) {     # example:   KEY `idx_mod_english_def_word` (`word`),
-        # regular key: allows null=YES, allows duplicates=YES
-        # MYSQL:   KEY is normally a synonym for INDEX.  http://dev.mysql.com/doc/refman/5.1/en/create-table.html
-        #
-        #  * MySQL: ALTER TABLE {$table} ADD KEY $column ($column)
-        #  * PostgreSQL: CREATE INDEX {$table}_$column_idx ON {$table}($column) // Please note the _idx "extension"
-        #    PRIMARY KEY (`postid`),
-        #    KEY `ownerid` (`ownerid`)
-        # create an index for everything which has a key listed for it.
-        my $col = $1;
-        # TODO we don't have a translation for the substring syntax in text columns in MySQL (e.g. "KEY my_idx (mytextcol(20))")
-        # for now just getting rid of the brackets and numbers (the substring specifier):
-        $col=~s/\(\d+\)//g;
-        $quoted_column = quote_and_lc($col);
-        if ($col =~ m/,/) {
-            $col =  s/,/_/;
-        }
-        $index = get_identifier($table, $col, 'idx');
-        $post_create_sql.="CREATE INDEX $index ON $table USING btree ($quoted_column)\;";
-        # just create index do not add to create table statement
-        next;
-    }
-
-    # handle 'key' declared at end of column
-    if (/\w+.*primary key/i) {   # mysql: key is normally just a synonym for index
-    # just leave as is ( postgres has primary key type)
-
-
-    } elsif (/(\w+\s+(?:$mysql_datatypesStr)\s+.*)key/i) {   # mysql: key is normally just a synonym for index
-    # I can't find a reference for 'key' in a postgres command without using the word 'primary key'
-        s/$1key/$1/i ;
-        $index = get_identifier($table, $1, 'idx');
-        $quoted_column =quote_and_lc($1);
-        $post_create_sql.="CREATE INDEX $index ON $table USING btree ($quoted_column) \;";
-        $create_sql.=$_;
-    }
-
-
-
-    # do we really need this anymore?
-    # remap colums with names of existing system attribute
-    if (/"oid"/i) {
-        s/"oid"/"_oid"/g;
-        print STDERR "WARNING: table $table uses column \"oid\" which is renamed to \"_oid\"\nYou should fix application manually! Press return to continue.";
-        my $wait=<STDIN>;
-    }
-
-    s/oid/_oid/i if (/key/i && /oid/i); # fix oid in key
-
-    # FINAL QUOTING OF ALL COLUMNS
-    # quote column names which were not already quoted
-    # perhaps they were not quoted because they were not explicitly handled
-    if (!/^\s*"(\w+)"(\s+)/i) {
-        /^(\s*)(\w+)(\s+)(.*)$/i ;
-        $quoted_column= quote_and_lc($2);
-        s/^(\s*)(\w+)(\s+)(.*)$/$1 $quoted_column $3 $4 /;
-    }
-    $create_sql.=$_;
-    #  END of if ($create_sql ne "") i.e. were inside create table statement so processed datatypes
-}
-# add "not in create table" comments or empty lines to pre_create_sql
-elsif (/^#/ || /^$/ || /^\s*--/) {
-    s/^#/--/;   #  Two hyphens (--) is the SQL-92 standard indicator for comments
-    $pre_create_sql .=  $_ ;  # printed above create table statement
-    next;
-}
-elsif (/^\s*insert into/i) { # not inside create table and doing insert
-    # fix mysql's zero/null value for timestamps
-    s/'0000-00-00/'1970-01-01/gi;
-    # commented out to fix bug "Field contents interpreted as a timestamp", what was the point of this line anyway?
-    #s/([12]\d\d\d)([01]\d)([0-3]\d)([0-2]\d)([0-6]\d)([0-6]\d)/'$1-$2-$3 $4:$5:$6'/;
-
-    #---- fix data in inserted data: (from MS world)
-    s!\x96!-!g;    # --
-    s!\x93!"!g;    # ``
-    s!\x94!"!g;    # ''
-    s!\x85!... !g;    # \ldots
-    s!\x92!`!g;
-
-    print OUT $pre_create_sql;    # print comments preceding the insert section
-    $pre_create_sql="";
-    $auto_increment_seq = "";
-
-    s/'((?:[^'\\]++|\\.)*+)'(?=[),])/E'$1'/g;
-    # for the E'' see http://www.postgresql.org/docs/8.2/interactive/release-8-1.html
-    s!\\\\!\\\\\\\\!g;      # replace \\ with ]\\\\
-
-    # split 'extended' INSERT INTO statements to something PostgreSQL can  understand
-    ( $insert_table,  $valueString) = $_ =~ m/^INSERT\s+INTO\s+['`"]*(.*?)['`"]*\s+VALUES\s*(.*)/i;
-    $insert_table = quote_and_lc($insert_table);
-
-    s/^INSERT INTO.*?\);//i;  # hose the statement which is to be replaced whether a run-on or not
-    # guarantee table names are quoted
-    print OUT qq(INSERT INTO $insert_table VALUES $valueString \n);
-
-} else {
-    print OUT $_ ;  #  example: /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
-}
-#  keep looping and get next line of IN file
-
-} # END while(<IN>)
-
-print_post_create_sql();   # in case there is extra from the last table
-
-#################################################################
-#  5.  print_plgsql function prototype
-#      emulate the set datatype with the following plpgsql function
-#      looks ugly so putting at end of file
-#################################################################
-#
-sub make_plpgsql {
-my ($table,$column_name) = ($_[0],$_[1]);
-$table=~s/\"//g; # make sure that $table doesn't have quotes so we don't end up with redundant quoting
-my $constraint_table = get_identifier($table,$column_name ,"constraint_table");
-return "
-- this function is called by the insert/update trigger
-- it checks if the INSERT/UPDATE for the 'set' column
-- contains members which comprise a valid mysql set
-- this TRIGGER function therefore acts like a constraint
--  provided limited functionality for mysql's set datatype
-- just verifies and matches for string representations of the set at this point
-- though the set datatype uses bit comparisons, the only supported arguments to our
-- set datatype are VARCHAR arguments
-- to add a member to the set add it to the ".$table."_".$column_name." table
-CREATE OR REPLACE FUNCTION check_".$table."_".$column_name."_set(  ) RETURNS TRIGGER AS \$\$\n
-DECLARE
----
-arg_str VARCHAR ;
-argx VARCHAR := '';
-nobreak INT := 1;
-rec_count INT := 0;
-psn INT := 0;
-str_in VARCHAR := NEW.$column_name;
----
-BEGIN
----
-IF str_in IS NULL THEN RETURN NEW ; END IF;
-arg_str := REGEXP_REPLACE(str_in, '\\',\\'', ',');  -- str_in is CONSTANT
-arg_str := REGEXP_REPLACE(arg_str, '^\\'', '');
-arg_str := REGEXP_REPLACE(arg_str, '\\'\$', '');
-- RAISE NOTICE 'arg_str %',arg_str;
-psn := POSITION(',' in arg_str);
-IF psn > 0 THEN
-    psn := psn - 1; -- minus-1 from comma position
-    -- RAISE NOTICE 'psn %',psn;
-    argx := SUBSTRING(arg_str FROM 1 FOR psn);  -- get one set member
-    psn := psn + 2; -- go to first starting letter
-    arg_str := SUBSTRING(arg_str FROM psn);   -- hack it off
-ELSE
-    psn := 0; -- minus-1 from comma position
-    argx := arg_str;
-END IF;
-- RAISE NOTICE 'argx %',argx;
-- RAISE NOTICE 'new arg_str: %',arg_str;
-WHILE nobreak LOOP
-    EXECUTE 'SELECT count(*) FROM $constraint_table WHERE set_values = ' || quote_literal(argx) INTO rec_count;
-    IF rec_count = 0 THEN RAISE EXCEPTION 'one of the set values was not found';
-    END IF;
-    IF psn > 0 THEN
-        psn := psn - 1; -- minus-1 from comma position
-        -- RAISE NOTICE 'psn %',psn;
-        argx := SUBSTRING(arg_str FROM 1 FOR psn);  -- get one set member
-        psn := psn + 2; -- go to first starting letter
-        arg_str := SUBSTRING(arg_str FROM psn);   -- hack it off
-        psn := POSITION(',' in arg_str);
-    ELSE nobreak = 0;
-    END IF;
-    -- RAISE NOTICE 'next argx % and next arg_str %', argx, arg_str;
-END LOOP;
-RETURN NEW;
----
-END;
-\$\$ LANGUAGE 'plpgsql' VOLATILE;
-
-drop trigger set_test ON $table;
-- make a trigger for each set field
-- make trigger and hard-code in column names
-- see http://archives.postgresql.org/pgsql-interfaces/2005-02/msg00020.php
-CREATE   TRIGGER    set_test
-BEFORE   INSERT OR   UPDATE  ON $table   FOR  EACH  ROW
-EXECUTE  PROCEDURE  check_".$table."_".$column_name."_set();\n";
-} #  end sub make_plpgsql();
-
--- a/data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv
+++ b/data-sources/wikipedia-wikidata/wikidata_place_type_levels.csv
@@ -1,199 +0,0 @@
-place_type,level
-Q9842,4
-Q9430,3
-Q928830,4
-Q9259,1
-Q91028,5
-Q8514,2
-Q8502,2
-Q83405,3
-Q82794,2
-Q820477,1
-Q811979,1
-Q8072,2
-Q79007,2
-Q786014,3
-Q75848,2
-Q75520,2
-Q728937,4
-Q7275,2
-Q719456,3
-Q7075,3
-Q697295,4
-Q6852233,2
-Q682943,3
-Q665487,5
-Q655686,3
-Q643589,5
-Q641226,2
-Q631305,2
-Q6256,2
-Q6023295,2
-Q5773747,5
-Q56061,1
-Q55659167,4
-Q55488,4
-Q55465477,3
-Q54050,2
-Q532,3
-Q53060,2
-Q52177058,4
-Q515716,5
-Q5153984,4
-Q515,3
-Q5144960,5
-Q5119,4
-Q5119,4
-Q5107,2
-Q5084,4
-Q5031071,4
-Q5003624,2
-Q4989906,1
-Q4976993,3
-Q486972,1
-Q486972,2
-Q483110,3
-Q4830453,4
-Q47521,3
-Q473972,1
-Q46831,2
-Q46614560,5
-Q44782,3
-Q44613,4
-Q44539,4
-Q44494,2
-Q44377,2
-Q4421,2
-Q43501,2
-Q4286337,3
-Q42523,3
-Q41176,2
-Q40357,3
-Q4022,4
-Q40080,2
-Q39816,2
-Q39715,3
-Q39614,1
-Q3957,3
-Q3947,4
-Q3914,3
-Q38723,2
-Q38720,3
-Q3623867,5
-Q35666,2
-Q355304,3
-Q35509,2
-Q35112127,3
-Q34985575,4
-Q34876,5
-Q34763,2
-Q34627,4
-Q3455524,3
-Q34442,4
-Q33837,2
-Q33506,3
-Q32815,4
-Q3257686,2
-Q3240715,2
-Q3191695,5
-Q3153117,2
-Q30198,2
-Q30139652,3
-Q294422,3
-Q2870166,3
-Q27686,3
-Q274153,3
-Q271669,1
-Q2659904,2
-Q24529780,2
-Q24354,3
-Q2354973,4
-Q23442,2
-Q23413,3
-Q23397,3
-Q2327515,4
-Q2311958,5
-Q22927291,6
-Q22698,1
-Q2175765,4
-Q205495,4
-Q204832,3
-Q2042028,2
-Q202216,6
-Q1970725,3
-Q194203,5
-Q194195,2
-Q190429,2
-Q185187,3
-Q185113,2
-Q183366,2
-Q1799794,1
-Q1788454,4
-Q1785071,3
-Q1777138,3
-Q177634,2
-Q177380,2
-Q174814,4
-Q174782,2
-Q17350442,2
-Q17343829,3
-Q17334923,0
-Q17018380,3
-Q16970,4
-Q16917,3
-Q16831714,4
-Q165,3
-Q160742,4
-Q159719,3
-Q159334,4
-Q15640612,5
-Q15324,2
-Q15284,5
-Q15243209,6
-Q152081,1
-Q15195406,4
-Q1500350,5
-Q149621,5
-Q14757767,4
-Q14350,3
-Q1410668,3
-Q1394476,3
-Q1377575,2
-Q1353183,3
-Q134447,4
-Q133215,3
-Q133056,2
-Q13221722,3
-Q13220204,2
-Q1311958,4
-Q1303167,3
-Q130003,3
-Q12518,2
-Q12516,3
-Q1248784,3
-Q123705,3
-Q12323,3
-Q12284,4
-Q12280,4
-Q121359,2
-Q1210950,2
-Q11755880,3
-Q11707,3
-Q11315,3
-Q11303,3
-Q1115575,4
-Q1107656,1
-Q10864048,1
-Q1076486,2
-Q105731,3
-Q105190,3
-Q1048525,3
-Q102496,5
-Q28872924,1
-Q15617994,1
-Q159313,2
-Q24398318,3
-Q327333,2
-Q43229,1
-Q860861,1
-Q4989906,1
--- a/data-sources/wikipedia-wikidata/wikidata_place_types.txt
+++ b/data-sources/wikipedia-wikidata/wikidata_place_types.txt
@@ -1,195 +0,0 @@
-Q9842
-Q9430
-Q928830
-Q9259
-Q91028
-Q8514
-Q8502
-Q83405
-Q82794
-Q820477
-Q811979
-Q8072
-Q79007
-Q786014
-Q75848
-Q75520
-Q728937
-Q7275
-Q719456
-Q7075
-Q697295
-Q6852233
-Q682943
-Q665487
-Q655686
-Q643589
-Q641226
-Q631305
-Q6256
-Q6023295
-Q5773747
-Q56061
-Q55659167
-Q55488
-Q55465477
-Q54050
-Q532
-Q53060
-Q52177058
-Q515716
-Q5153984
-Q515
-Q5144960
-Q5119
-Q5107
-Q5084
-Q5031071
-Q5003624
-Q4989906
-Q4976993
-Q486972
-Q483110
-Q4830453
-Q47521
-Q473972
-Q46831
-Q46614560
-Q44782
-Q44613
-Q44539
-Q44494
-Q44377
-Q4421
-Q43501
-Q4286337
-Q42523
-Q41176
-Q40357
-Q4022
-Q40080
-Q39816
-Q39715
-Q39614
-Q3957
-Q3947
-Q3914
-Q38723
-Q38720
-Q3623867
-Q35666
-Q355304
-Q35509
-Q35112127
-Q34985575
-Q34876
-Q34763
-Q34627
-Q3455524
-Q34442
-Q33837
-Q33506
-Q32815
-Q3257686
-Q3240715
-Q3191695
-Q3153117
-Q30198
-Q30139652
-Q294422
-Q2870166
-Q27686
-Q274153
-Q271669
-Q2659904
-Q24529780
-Q24354
-Q2354973
-Q23442
-Q23413
-Q23397
-Q2327515
-Q2311958
-Q22927291
-Q22698
-Q2175765
-Q205495
-Q204832
-Q2042028
-Q202216
-Q1970725
-Q194203
-Q194195
-Q190429
-Q185187
-Q185113
-Q183366
-Q1799794
-Q1788454
-Q1785071
-Q1777138
-Q177634
-Q177380
-Q174814
-Q174782
-Q17350442
-Q17343829
-Q17334923
-Q17018380
-Q16970
-Q16917
-Q16831714
-Q165
-Q160742
-Q159719
-Q159334
-Q15640612
-Q15324
-Q15284
-Q15243209
-Q152081
-Q15195406
-Q1500350
-Q149621
-Q14757767
-Q14350
-Q1410668
-Q1394476
-Q1377575
-Q1353183
-Q134447
-Q133215
-Q133056
-Q13221722
-Q13220204
-Q1311958
-Q1303167
-Q130003
-Q12518
-Q12516
-Q1248784
-Q123705
-Q12323
-Q12284
-Q12280
-Q121359
-Q1210950
-Q11755880
-Q11707
-Q11315
-Q11303
-Q1115575
-Q1107656
-Q10864048
-Q1076486
-Q105731
-Q105190
-Q1048525
-Q102496
-Q28872924
-Q15617994
-Q159313
-Q24398318
-Q327333
-Q43229
-Q860861
--- a/data-sources/wikipedia-wikidata/wikidata_places.md
+++ b/data-sources/wikipedia-wikidata/wikidata_places.md
@@ -1,200 +0,0 @@
-
-## Wikidata place types and related OSM Tags
-
-Wikidata does not have any official ontologies, however the [DBpedia project](https://wiki.dbpedia.org/) has created an [ontology](https://wiki.dbpedia.org/services-resources/ontology) that covered [place types](http://mappings.dbpedia.org/server/ontology/classes/#Place). The table below used the DBpedia place ontology as a starting point, and is provided as a cross-reference to the relevant OSM tags.
-
-The Wikidata place types listed in the table below can be used in conjunction with the [Wikidata Query Service](https://query.wikidata.org/) to retrieve instances of those place types from the Wikidata knowledgebase. 
-
-```
-SELECT ?item ?lat ?lon
-WHERE {
-  ?item wdt:P31*/wdt:P279*wd:Q9430; wdt:P625 ?pt.
-  ?item p:P625?loc.
-  ?loc psv:P625?cnode.
-  ?cnode wikibase:geoLatitude?lat.
-  ?cnode wikibase:geoLongitude?lon.
-}
-```
-
-An example json return for all instances of the Wikidata item "Q9430" (Ocean) can be seen at [json](https://query.wikidata.org/bigdata/namespace/wdq/sparql?format=json&query=SELECT?item?lat?lon%20WHERE{?item%20wdt:P31*/wdt:P279*wd:Q9430;wdt:P625?pt.?item%20p:P625?loc.?loc%20psv:P625?cnode.?cnode%20wikibase:geoLatitude?lat.?cnode%20wikibase:geoLongitude?lon.})
-
-**NOTE** the OSM tags listed are those listed in the wikidata entries, and not all the possible matches for tags within OSM.
-
-
-   title   |             concept                   |       OSM Tag     | 
-----------|---------------------------------------|------------------|
-[Q17334923](https://www.wikidata.org/entity/Q17334923)  | Location | | 
-[Q811979](https://www.wikidata.org/entity/Q811979)           | Architectural Structure | | 
-[Q194195](https://www.wikidata.org/entity/Q194195)   | Amusement park | 
-[Q204832](https://www.wikidata.org/entity/Q204832)   | Roller coaster | [attraction=roller_coaster](https://wiki.openstreetmap.org/wiki/Tag:attraction=roller_coaster) | 
-[Q2870166](https://www.wikidata.org/entity/Q2870166)   | Water ride | |
-[Q641226](https://www.wikidata.org/entity/Q641226)    | Arena | [amenity=events_centre](https://wiki.openstreetmap.org/wiki/Tag:amenity=events_centre) | 
-[Q41176](https://www.wikidata.org/entity/Q41176)     | Building | [building=yes](https://wiki.openstreetmap.org/wiki/Key:building) |
-[Q1303167](https://www.wikidata.org/entity/Q1303167)   | Barn | [building=barn](https://wiki.openstreetmap.org/wiki/Tag:building=barn) |
-[Q655686](https://www.wikidata.org/entity/Q655686)   | Commercial building | [building=commercial](https://wiki.openstreetmap.org/wiki/Tag:building=commercial) | 
-[Q4830453](https://www.wikidata.org/entity/Q4830453)   | Business | |
-[Q7075](https://www.wikidata.org/entity/Q7075)     | Library | [amenity=library](https://wiki.openstreetmap.org/wiki/Tag:amenity=library) |
-[Q133215](https://www.wikidata.org/entity/Q133215)   | Casino | [amenity=casino](https://wiki.openstreetmap.org/wiki/Tag:amenity=casino) | 
-[Q23413](https://www.wikidata.org/entity/Q23413)     | Castle | [historic=castle](https://wiki.openstreetmap.org/wiki/Tag:historic=castle) |
-[Q83405](https://www.wikidata.org/entity/Q83405)     | Factory | | 
-[Q53060](https://www.wikidata.org/entity/Q53060)     | Gate  | [barrier=gate](https://wiki.openstreetmap.org/wiki/Tag:barrier=gate) |cnode%20wikibase:geoLatitude?lat.?cnode%20wikibase:geoLongitude?lon.})
-[Q11755880](https://www.wikidata.org/entity/Q11755880)           | Residential Building  | [building=residential](https://wiki.openstreetmap.org/wiki/Tag:building=residential) | 
-[Q3947](https://www.wikidata.org/entity/Q3947)      | House  | [building=house](https://wiki.openstreetmap.org/wiki/Tag:building=house) |
-[Q35112127](https://www.wikidata.org/entity/Q35112127)           | Historic Building  | |
-[Q5773747](https://www.wikidata.org/entity/Q5773747)   | Historic house  | | 
-[Q38723](https://www.wikidata.org/entity/Q38723)           | Higher Education Institution  | 
-[Q3914](https://www.wikidata.org/entity/Q3914)      | School  | [amenity=school](https://wiki.openstreetmap.org/wiki/Tag:amenity=school) | 
-[Q9842](https://www.wikidata.org/entity/Q9842)      | Primary school  | | 
-[Q159334](https://www.wikidata.org/entity/Q159334)    | Secondary school  | | 
-[Q16917](https://www.wikidata.org/entity/Q16917)     | Hospital  | [amenity=hospital](https://wiki.openstreetmap.org/wiki/Tag:amenity=hospital), [healthcare=hospital](https://wiki.openstreetmap.org/wiki/Tag:healthcare=hospital), [building=hospital](https://wiki.openstreetmap.org/wiki/Tag:building=hospital) |
-[Q27686](https://www.wikidata.org/entity/Q27686)     | Hotel  | [tourism=hotel](https://wiki.openstreetmap.org/wiki/Tag:tourism=hotel), [building=hotel](https://wiki.openstreetmap.org/wiki/Tag:building=hotel) |
-[Q33506](https://www.wikidata.org/entity/Q33506)     | Museum  | [tourism=museum](https://wiki.openstreetmap.org/wiki/Tag:tourism=museum) |
-[Q40357](https://www.wikidata.org/entity/Q40357)     | Prison  | [amenity=prison](https://wiki.openstreetmap.org/wiki/Tag:amenity=prison) |
-[Q24398318](https://www.wikidata.org/entity/Q24398318)           | Religious Building  | |
-[Q160742](https://www.wikidata.org/entity/Q160742)    | Abbey  | |
-[Q16970](https://www.wikidata.org/entity/Q16970)     | Church (building)  | [building=church](https://wiki.openstreetmap.org/wiki/Tag:building=church) |
-[Q44613](https://www.wikidata.org/entity/Q44613)     | Monastery  | [amenity=monastery](https://wiki.openstreetmap.org/wiki/Tag:amenity=monastery) | 
-[Q32815](https://www.wikidata.org/entity/Q32815)     | Mosque  | [building=mosque](https://wiki.openstreetmap.org/wiki/Tag:building=mosque) | 
-[Q697295](https://www.wikidata.org/entity/Q697295)    | Shrine  | [building=shrine](https://wiki.openstreetmap.org/wiki/Tag:building=shrine) |
-[Q34627](https://www.wikidata.org/entity/Q34627)     | Synagogue  | [building=synagogue](https://wiki.openstreetmap.org/wiki/Tag:building=synagogue) |
-[Q44539](https://www.wikidata.org/entity/Q44539)     | Temple  | [building=temple](https://wiki.openstreetmap.org/wiki/Tag:building=temple) | 
-[Q11707](https://www.wikidata.org/entity/Q11707)     | Restaurant  | [amenity=restaurant](https://wiki.openstreetmap.org/wiki/Tag:amenity=restaurant) |
-[Q11315](https://www.wikidata.org/entity/Q11315)     | Shopping mall  | [shop=mall](https://wiki.openstreetmap.org/wiki/Tag:shop=mall), [shop=shopping_centre](https://wiki.openstreetmap.org/wiki/Tag:shop=shopping_centre) | 
-[Q11303](https://www.wikidata.org/entity/Q11303)     | Skyscraper  | |
-[Q17350442](https://www.wikidata.org/entity/Q17350442)           | Venue  | |
-[Q41253](https://www.wikidata.org/entity/Q41253)           | Movie Theater  | [amenity=cinema](https://wiki.openstreetmap.org/wiki/Tag:amenity=cinema) | 
-[Q483110](https://www.wikidata.org/entity/Q483110)    | Stadium  | [leisure=stadium](https://wiki.openstreetmap.org/wiki/Tag:leisure=stadium), [building=stadium](https://wiki.openstreetmap.org/wiki/Tag:building=stadium) |
-[Q24354](https://www.wikidata.org/entity/Q24354)     | Theater (structure)  | [amenity=theatre](https://wiki.openstreetmap.org/wiki/Tag:amenity=theatre) |
-[Q121359](https://www.wikidata.org/entity/Q121359)    | Infrastructure  | |
-[Q1248784](https://www.wikidata.org/entity/Q1248784)   | Airport  | |
-[Q12323](https://www.wikidata.org/entity/Q12323)     | Dam  | [waterway=dam](https://wiki.openstreetmap.org/wiki/Tag:waterway=dam) |
-[Q1353183](https://www.wikidata.org/entity/Q1353183)   | Launch pad  | | 
-[Q105190](https://www.wikidata.org/entity/Q105190)   | Levee  | [man_made=dyke](https://wiki.openstreetmap.org/wiki/Tag:man_made=dyke) |
-[Q105731](https://www.wikidata.org/entity/Q105731)    | Lock (water navigation)   | [lock=yes](https://wiki.openstreetmap.org/wiki/Key:lock) |
-[Q44782](https://www.wikidata.org/entity/Q44782)     | Port  | |
-[Q159719](https://www.wikidata.org/entity/Q159719)    | Power station  | [power=plant](https://wiki.openstreetmap.org/wiki/Tag:power=plant) |
-[Q174814](https://www.wikidata.org/entity/Q174814)    | Electrical substation   |  |
-[Q134447](https://www.wikidata.org/entity/Q134447)    | Nuclear power plant  | [plant:source=nuclear](https://wiki.openstreetmap.org/wiki/Tag:plant:source=nuclear) |
-[Q786014](https://www.wikidata.org/entity/Q786014)   | Rest area  | [highway=rest_area](https://wiki.openstreetmap.org/wiki/Tag:highway=rest_area), [highway=services](https://wiki.openstreetmap.org/wiki/Tag:highway=services) |
-[Q12280](https://www.wikidata.org/entity/Q12280)     | Bridge  | [bridge=* ](https://wiki.openstreetmap.org/wiki/Key:bridge), [man_made=bridge](https://wiki.openstreetmap.org/wiki/Tag:man_made=bridge) |
-[Q728937](https://www.wikidata.org/entity/Q728937)           | Railroad Line  | [railway=rail](https://wiki.openstreetmap.org/wiki/Tag:railway=rail) | 
-[Q1311958](https://www.wikidata.org/entity/Q1311958)           | Railway Tunnel  | | 
-[Q34442](https://www.wikidata.org/entity/Q34442)     | Road  | [highway=* ](https://wiki.openstreetmap.org/wiki/Key:highway), [route=road](https://wiki.openstreetmap.org/wiki/Tag:route=road) |
-[Q1788454](https://www.wikidata.org/entity/Q1788454)   | Road junction  |  | 
-[Q44377](https://www.wikidata.org/entity/Q44377)     | Tunnel  | [tunnel=* ](https://wiki.openstreetmap.org/wiki/Key:tunnel) |
-[Q5031071](https://www.wikidata.org/entity/Q5031071)  | Canal tunnel  | |
-[Q719456](https://www.wikidata.org/entity/Q719456)    | Station  | [public_transport=station](https://wiki.openstreetmap.org/wiki/Tag:public_transport=station) |
-[Q205495](https://www.wikidata.org/entity/Q205495)    | Filling station  | [amenity=fuel](https://wiki.openstreetmap.org/wiki/Tag:amenity=fuel) |
-[Q928830](https://www.wikidata.org/entity/Q928830)    | Metro station  | [station=subway](https://wiki.openstreetmap.org/wiki/Tag:station=subway) |
-[Q55488](https://www.wikidata.org/entity/Q55488)     | Train station  | [railway=station](https://wiki.openstreetmap.org/wiki/Tag:railway=station) |
-[Q2175765](https://www.wikidata.org/entity/Q2175765)   | Tram stop  | [railway=tram_stop](https://wiki.openstreetmap.org/wiki/Tag:railway=tram_stop), [public_transport=stop_position](https://wiki.openstreetmap.org/wiki/Tag:public_transport=stop_position) |
-[Q6852233](https://www.wikidata.org/entity/Q6852233)   | Military building  | |
-[Q44494](https://www.wikidata.org/entity/Q44494)     | Mill (grinding)  | |
-[Q185187](https://www.wikidata.org/entity/Q185187)    | Watermill  | [man_made=watermill](https://wiki.openstreetmap.org/wiki/Tag:man_made=watermill) |
-[Q38720](https://www.wikidata.org/entity/Q38720)     | Windmill  | [man_made=windmill](https://wiki.openstreetmap.org/wiki/Tag:man_made=windmill) | 
-[Q4989906](https://www.wikidata.org/entity/Q4989906)   | Monument  | [historic=monument](https://wiki.openstreetmap.org/wiki/Tag:historic=monument) |
-[Q5003624](https://www.wikidata.org/entity/Q5003624)   | Memorial  | [historic=memorial](https://wiki.openstreetmap.org/wiki/Tag:historic=memorial) |
-[Q271669](https://www.wikidata.org/entity/Q271669)   | Landform  | |
-[Q190429](https://www.wikidata.org/entity/Q190429)    | Depression (geology)  | |
-[Q17018380](https://www.wikidata.org/entity/Q17018380)  | Bight (geography)  | | 
-[Q54050](https://www.wikidata.org/entity/Q54050)     | Hill  | |
-[Q1210950](https://www.wikidata.org/entity/Q1210950)   | Channel (geography)  | |
-[Q23442](https://www.wikidata.org/entity/Q23442)    | Island  | [place=island](https://wiki.openstreetmap.org/wiki/Tag:place=island) | 
-[Q42523](https://www.wikidata.org/entity/Q42523)    | Atoll  | |
-[Q34763](https://www.wikidata.org/entity/Q34763)    | Peninsula  | | 
-[Q355304](https://www.wikidata.org/entity/Q355304)   | Watercourse  | |
-[Q30198](https://www.wikidata.org/entity/Q30198)    | Marsh  | [wetland=marsh](https://wiki.openstreetmap.org/wiki/Tag:wetland=marsh) |
-[Q75520](https://www.wikidata.org/entity/Q75520)    | Plateau  | |
-[Q2042028](https://www.wikidata.org/entity/Q2042028)  | Ravine  | |
-[Q631305](https://www.wikidata.org/entity/Q631305)   | Rock formation  | | 
-[Q12516](https://www.wikidata.org/entity/Q12516)    | Pyramid  | |
-[Q1076486](https://www.wikidata.org/entity/Q1076486) | Sports venue  |  |
-[Q682943](https://www.wikidata.org/entity/Q682943)   | Cricket field  | [sport=cricket](https://wiki.openstreetmap.org/wiki/Tag:sport=cricket) | 
-[Q1048525](https://www.wikidata.org/entity/Q1048525)  | Golf course  | [leisure=golf_course](https://wiki.openstreetmap.org/wiki/Tag:leisure=golf_course) |
-[Q1777138](https://www.wikidata.org/entity/Q1777138)  | Race track  | [highway=raceway](https://wiki.openstreetmap.org/wiki/Tag:highway=raceway) | 
-[Q130003](https://www.wikidata.org/entity/Q130003)   | Ski resort  | |
-[Q174782](https://www.wikidata.org/entity/Q174782)   | Town square  | [place=square](https://wiki.openstreetmap.org/wiki/Tag:place=square) |
-[Q12518](https://www.wikidata.org/entity/Q12518)    | Tower  | [building=tower](https://wiki.openstreetmap.org/wiki/Tag:building=tower), [man_made=tower](https://wiki.openstreetmap.org/wiki/Tag:man_made=tower) |
-[Q39715](https://www.wikidata.org/entity/Q39715)    | Lighthouse  | [man_made=lighthouse](https://wiki.openstreetmap.org/wiki/Tag:man_made=lighthouse) |
-[Q274153](https://www.wikidata.org/entity/Q274153)   | Water tower | [building=water_tower](https://wiki.openstreetmap.org/wiki/Tag:building=water_tower), [man_made=water_tower](https://wiki.openstreetmap.org/wiki/Tag:man_made=water_tower) |
-[Q43501](https://www.wikidata.org/entity/Q43501)    | Zoo  | [tourism=zoo](https://wiki.openstreetmap.org/wiki/Tag:tourism=zoo) | 
-[Q39614](https://www.wikidata.org/entity/Q39614)    | Cemetery  | [amenity=grave_yard](https://wiki.openstreetmap.org/wiki/Tag:amenity=grave_yard), [landuse=cemetery](https://wiki.openstreetmap.org/wiki/Tag:landuse=cemetery) |
-[Q152081](https://www.wikidata.org/entity/Q152081)   | Concentration camp  | |
-[Q1107656](https://www.wikidata.org/entity/Q1107656)  | Garden  | [leisure=garden](https://wiki.openstreetmap.org/wiki/Tag:leisure=garden) |
-[Q820477](https://www.wikidata.org/entity/Q820477)   | Mine |  | 
-[Q33837](https://www.wikidata.org/entity/Q33837) | Archipelago  | [place=archipelago](https://wiki.openstreetmap.org/wiki/Tag:place=archipelago) | 
-[Q40080](https://www.wikidata.org/entity/Q40080)    | Beach  | [natural=beach](https://wiki.openstreetmap.org/wiki/Tag:natural=beach) |
-[Q15324](https://www.wikidata.org/entity/Q15324)    | Body of water | [natural=water](https://wiki.openstreetmap.org/wiki/Tag:natural=water) | 
-[Q23397](https://www.wikidata.org/entity/Q23397)    | Lake  | [water=lake](https://wiki.openstreetmap.org/wiki/Tag:water=lake) | 
-[Q9430](https://www.wikidata.org/entity/Q9430)     | Ocean  | |
-[Q165](https://www.wikidata.org/entity/Q165)    | Sea  | |
-[Q47521](https://www.wikidata.org/entity/Q47521)    | Stream  | | 
-[Q12284](https://www.wikidata.org/entity/Q12284)    | Canal  | [waterway=canal](https://wiki.openstreetmap.org/wiki/Tag:waterway=canal) |
-[Q4022](https://www.wikidata.org/entity/Q4022)     | River  | [waterway=river](https://wiki.openstreetmap.org/wiki/Tag:waterway=river), [type=waterway](https://wiki.openstreetmap.org/wiki/Relation:waterway) |
-[Q185113](https://www.wikidata.org/entity/Q185113)   | Cape | [natural=cape](https://wiki.openstreetmap.org/wiki/Tag:natural=cape) | 
-[Q35509](https://www.wikidata.org/entity/Q35509)    | Cave  | [natural=cave_entrance](https://wiki.openstreetmap.org/wiki/Tag:natural=cave_entrance) | 
-[Q8514](https://www.wikidata.org/entity/Q8514)     | Desert  | | 
-[Q4421](https://www.wikidata.org/entity/Q4421)     | Forest  | [natural=wood](https://wiki.openstreetmap.org/wiki/Tag:natural=wood) |
-[Q35666](https://www.wikidata.org/entity/Q35666)    | Glacier  | [natural=glacier](https://wiki.openstreetmap.org/wiki/Tag:natural=glacier) |
-[Q177380](https://www.wikidata.org/entity/Q177380)   | Hot spring | | 
-[Q8502](https://www.wikidata.org/entity/Q8502)     | Mountain  | [natural=peak](https://wiki.openstreetmap.org/wiki/Tag:natural=peak) | 
-[Q133056](https://www.wikidata.org/entity/Q133056)   | Mountain pass  | | 
-[Q46831](https://www.wikidata.org/entity/Q46831)    | Mountain range  | |
-[Q39816](https://www.wikidata.org/entity/Q39816)    | Valley  | [natural=valley](https://wiki.openstreetmap.org/wiki/Tag:natural=valley) |
-[Q8072](https://www.wikidata.org/entity/Q8072)     | Volcano  | [natural=volcano](https://wiki.openstreetmap.org/wiki/Tag:natural=volcano) |
-[Q43229](https://www.wikidata.org/entity/Q43229)    | Organization  |  | 
-[Q327333](https://www.wikidata.org/entity/Q327333)   | Government agency  | [office=government](https://wiki.openstreetmap.org/wiki/Tag:office=government)|
-[Q22698](https://www.wikidata.org/entity/Q22698)    | Park | [leisure=park](https://wiki.openstreetmap.org/wiki/Tag:leisure=park) | 
-[Q159313](https://www.wikidata.org/entity/Q159313)   | Urban agglomeration | |
-[Q177634](https://www.wikidata.org/entity/Q177634)   | Community  | |
-[Q5107](https://www.wikidata.org/entity/Q5107)    | Continent | [place=continent](https://wiki.openstreetmap.org/wiki/Tag:place=continent) |
-[Q6256](https://www.wikidata.org/entity/Q6256)     | Country  | [place=country](https://wiki.openstreetmap.org/wiki/Tag:place=country) | 
-[Q75848](https://www.wikidata.org/entity/Q75848)    | Gated community | | 
-[Q3153117](https://www.wikidata.org/entity/Q3153117) | Intercommunality  | |
-[Q82794](https://www.wikidata.org/entity/Q82794)    | Region  | | 
-[Q56061](https://www.wikidata.org/entity/Q56061)    | Administrative division  | [boundary=administrative](https://wiki.openstreetmap.org/wiki/Tag:boundary=administrative)  | 
-[Q665487](https://www.wikidata.org/entity/Q665487)   | Diocese | | 
-[Q4976993](https://www.wikidata.org/entity/Q4976993)  | Parish | [boundary=civil_parish](https://wiki.openstreetmap.org/wiki/Tag:boundary=civil_parish) |
-[Q194203](https://www.wikidata.org/entity/Q194203)   | Arrondissements of France  | |
-[Q91028](https://www.wikidata.org/entity/Q91028)    | Arrondissements of Belgium  | | 
-[Q3623867](https://www.wikidata.org/entity/Q3623867)  | Arrondissements of Benin  | | 
-[Q2311958](https://www.wikidata.org/entity/Q2311958)  | Canton (country subdivision) | [political_division=canton](https://wiki.openstreetmap.org/wiki/FR:Cantons_in_France) |
-[Q643589](https://www.wikidata.org/entity/Q643589)   | Department |  | 
-[Q202216](https://www.wikidata.org/entity/Q202216)   | Overseas department and region  | |
-[Q149621](https://www.wikidata.org/entity/Q149621)   | District  | [place=district](https://wiki.openstreetmap.org/wiki/Tag:place=district) |
-[Q15243209](https://www.wikidata.org/wiki/Q15243209) | Historic district  | |
-[Q5144960](https://www.wikidata.org/entity/Q5144960)  | Microregion  | | 
-[Q15284](https://www.wikidata.org/entity/Q15284)    | Municipality  | |
-[Q515716](https://www.wikidata.org/entity/Q515716)   | Prefecture  | |
-[Q34876](https://www.wikidata.org/entity/Q34876)    | Province  | |
-[Q3191695](https://www.wikidata.org/entity/Q3191695)  | Regency (Indonesia)  | |
-[Q1970725](https://www.wikidata.org/entity/Q1970725)  | Natural region  | |
-[Q486972](https://www.wikidata.org/entity/Q486972)   | Human settlement  | | 
-[Q515](https://www.wikidata.org/entity/Q515)      | City  | [place=city](https://wiki.openstreetmap.org/wiki/Tag:place=city) |
-[Q5119](https://www.wikidata.org/entity/Q5119)     | Capital city | [capital=yes](https://wiki.openstreetmap.org/wiki/Key:capital) |
-[Q4286337](https://www.wikidata.org/entity/Q4286337)  | City district  | | 
-[Q1394476](https://www.wikidata.org/entity/Q1394476)  | Civil township  | | 
-[Q1115575](https://www.wikidata.org/entity/Q1115575)  | Civil parish  | [designation=civil_parish](https://wiki.openstreetmap.org/wiki/Tag:designation=civil_parish) |
-[Q5153984](https://www.wikidata.org/entity/Q5153984)  | Commune-level subdivisions  | |
-[Q123705](https://www.wikidata.org/entity/Q123705)   | Neighbourhood  | [place=neighbourhood](https://wiki.openstreetmap.org/wiki/Tag:place=neighbourhood) |
-[Q1500350](https://www.wikidata.org/entity/Q1500350)  | Townships of China  | |
-[Q17343829](https://www.wikidata.org/entity/Q17343829)           | Unincorporated Community  | |
-[Q3957](https://www.wikidata.org/entity/Q3957)     | Town  | [place=town](https://wiki.openstreetmap.org/wiki/Tag:place=town) | 
-[Q532](https://www.wikidata.org/entity/Q532)      | Village  | [place=village](https://wiki.openstreetmap.org/wiki/Tag:place=village) |
-[Q5084](https://www.wikidata.org/entity/Q5084)     | Hamlet   | [place=hamlet](https://wiki.openstreetmap.org/wiki/Tag:place=hamlet) | 
-[Q7275](https://www.wikidata.org/entity/Q7275)     | State  | | 
-[Q79007](https://www.wikidata.org/entity/Q79007)    | Street  | |
-[Q473972](https://www.wikidata.org/entity/Q473972)   | Protected area  | [boundary=protected_area](https://wiki.openstreetmap.org/wiki/Tag:boundary=protected_area) |
-[Q1377575](https://www.wikidata.org/entity/Q1377575)  | Wildlife refuge  | | 
-[Q1410668](https://www.wikidata.org/entity/Q1410668)  | National Wildlife Refuge  | [protection_title=National Wildlife Refuge](ownership=national), [ownership=national](https://wiki.openstreetmap.org/wiki/Tag:ownership=national)|
-[Q9259](https://www.wikidata.org/entity/Q9259)     | World Heritage Site  | |
-
---
-
-### Future Work
-
-The Wikidata improvements to Nominatim can be further enhanced by:
-
- continuing to add new Wikidata links to OSM objects
- increasing the number of place types accounted for in the wikipedia_articles table
- working to use place types in the wikipedia_article matching process
--- a/data/country_name.sql
+++ b/data/country_name.sql
--- a/data/gb_postcode_table.sql
+++ b/data/gb_postcode_table.sql
@@ -1,26 +0,0 @@
-- This data contains Ordnance Survey data © Crown copyright and database right 2010. 
-- Code-Point Open contains Royal Mail data © Royal Mail copyright and database right 2010.
-- OS data may be used under the terms of the OS OpenData licence:
-- http://www.ordnancesurvey.co.uk/oswebsite/opendata/licence/docs/licence.pdf
-
-SET statement_timeout = 0;
-SET client_encoding = 'UTF8';
-SET standard_conforming_strings = off;
-SET check_function_bodies = false;
-SET client_min_messages = warning;
-SET escape_string_warning = off;
-
-SET search_path = public, pg_catalog;
-
-SET default_tablespace = '';
-
-SET default_with_oids = false;
-
-CREATE TABLE gb_postcode (
-    id integer,
-    postcode character varying(9),
-    geometry geometry,
-    CONSTRAINT enforce_dims_geometry CHECK ((st_ndims(geometry) = 2)),
-    CONSTRAINT enforce_srid_geometry CHECK ((st_srid(geometry) = 4326))
-);
-
--- a/data/us_postcode_table.sql
+++ b/data/us_postcode_table.sql
@@ -1,16 +0,0 @@
-SET statement_timeout = 0;
-SET client_encoding = 'UTF8';
-SET check_function_bodies = false;
-SET client_min_messages = warning;
-
-SET search_path = public, pg_catalog;
-
-SET default_tablespace = '';
-
-SET default_with_oids = false;
-
-CREATE TABLE us_postcode (
-    postcode text,
-    x double precision,
-    y double precision
-);
--- a/data/words.sql
+++ b/data/words.sql
@@ -29787,7 +29787,7 @@ st	5557484

 -- prefill word table

-select count(make_keywords(v)) from (select distinct svals(name) as v from place) as w where v is not null;
+select count(precompute_words(v)) from (select distinct svals(name) as v from place) as w where v is not null;
 select count(getorcreate_housenumber_id(make_standard_name(v))) from (select distinct address->'housenumber' as v from place where address ? 'housenumber') as w;

 -- copy the word frequencies
--- a/docs/CMakeLists.txt
+++ b/docs/CMakeLists.txt
@@ -5,24 +5,31 @@

 configure_file(mkdocs.yml ../mkdocs.yml)
 file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/appendix)
-file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/data-sources)
+
+set (DOC_SOURCES
+     admin
+     develop
+     api
+     customize
+     index.md
+     extra.css
+     styles.css
+    )
+
+foreach (src ${DOC_SOURCES})
+    execute_process(
+        COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/${src} ${CMAKE_CURRENT_BINARY_DIR}/${src}
+    )
+endforeach()

 ADD_CUSTOM_TARGET(doc
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/admin ${CMAKE_CURRENT_BINARY_DIR}/admin
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/develop ${CMAKE_CURRENT_BINARY_DIR}/develop
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/api ${CMAKE_CURRENT_BINARY_DIR}/api
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/index.md ${CMAKE_CURRENT_BINARY_DIR}/index.md
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/extra.css ${CMAKE_CURRENT_BINARY_DIR}/extra.css
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${CMAKE_CURRENT_SOURCE_DIR}/data-sources/overview.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/overview.md
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/us-tiger/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/US-Tiger.md
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/gb-postcodes/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/GB-Postcodes.md
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/country-grid/README.md ${CMAKE_CURRENT_BINARY_DIR}/data-sources/Country-Grid.md
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/country-grid/mexico.quad.png ${CMAKE_CURRENT_BINARY_DIR}/data-sources/mexico.quad.png
-   COMMAND ${CMAKE_COMMAND} -E create_symlink ${PROJECT_SOURCE_DIR}/data-sources/wikipedia-wikidata/README.md  ${CMAKE_CURRENT_BINARY_DIR}/data-sources/Wikipedia-Wikidata.md
-   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Centos-7.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Centos-7.md
-   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-16.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-16.md
   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-18.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-18.md
-   COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
+   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
+   COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
+   COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
 )

-
+ADD_CUSTOM_TARGET(serve-doc
+    COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve
+    WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+)
--- a/docs/admin/Advanced-Installations.md
+++ b/docs/admin/Advanced-Installations.md
@@ -0,0 +1,216 @@
+# Advanced installations
+
+This page contains instructions for setting up multiple countries in 
+your Nominatim database. It is assumed that you have already successfully
+installed the Nominatim software itself, if not return to the 
+[installation page](Installation.md).
+
+## Importing multiple regions (without updates)
+
+To import multiple regions in your database you can simply give multiple
+OSM files to the import command:
+
+```
+nominatim import --osm-file file1.pbf --osm-file file2.pbf
+```
+
+If you already have imported a file and want to add another one, you can
+use the add-data function to import the additional data as follows:
+
+```
+nominatim add-data --file <FILE>
+nominatim refresh --postcodes
+nominatim index -j <NUMBER OF THREADS>
+```
+
+Please note that adding additional data is always significantly slower than
+the original import.
+
+## Importing multiple regions (with updates)
+
+If you want to import multiple regions _and_ be able to keep them up-to-date
+with updates, then you can use the scripts provided in the `utils` directory.
+
+These scripts will set up an `update` directory in your project directory,
+which has the following structure:
+
+```bash
+update
+    ├── europe
+    │   ├── andorra
+    │   │   └── sequence.state
+    │   └── monaco
+    │       └── sequence.state
+    └── tmp
+        └── europe
+                ├── andorra-latest.osm.pbf
+                └── monaco-latest.osm.pbf
+
+
+```
+
+The `sequence.state` files contain the sequence ID for each region. They will
+be used by pyosmium to get updates. The `tmp` folder is used for import dump and
+can be deleted once the import is complete.
+
+
+### Setting up multiple regions
+
+Create a project directory as described for the
+[simple import](Import.md#creating-the-project-directory). If necessary,
+you can also add an `.env` configuration with customized options. In particular,
+you need to make sure that `NOMINATIM_REPLICATION_UPDATE_INTERVAL` and
+`NOMINATIM_REPLICATION_RECHECK_INTERVAL` are set according to the update
+interval of the extract server you use.
+
+Copy the scripts `utils/import_multiple_regions.sh` and `utils/update_database.sh`
+into the project directory.
+
+Now customize both files as per your requirements
+
+1. List of countries. e.g.
+
+        COUNTRIES="europe/monaco europe/andorra"
+
+2. URL to the service providing the extracts and updates. eg:
+
+        BASEURL="https://download.geofabrik.de"
+        DOWNCOUNTRYPOSTFIX="-latest.osm.pbf"
+
+5. Followup in the update script can be set according to your installation.
+   E.g. for Photon,
+
+        FOLLOWUP="curl http://localhost:2322/nominatim-update"
+
+    will handle the indexing.
+
+
+To start the initial import, change into the project directory and run
+
+```
+    bash import_multiple_regions.sh
+```
+
+### Updating the database
+
+Change into the project directory and run the following command:
+
+    bash update_database.sh
+
+This will get diffs from the replication server, import diffs and index
+the database. The default replication server in the
+script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
+
+## Using an external PostgreSQL database
+
+You can install Nominatim using a database that runs on a different server when
+you have physical access to the file system on the other server. Nominatim
+uses a custom normalization library that needs to be made accessible to the
+PostgreSQL server. This section explains how to set up the normalization
+library.
+
+!!! note
+    The external module is only needed when using the legacy tokenizer.
+    If you have chosen the ICU tokenizer, then you can ignore this section
+    and follow the standard import documentation.
+
+### Option 1: Compiling the library on the database server
+
+The most sure way to get a working library is to compile it on the database
+server. From the prerequisites you need at least cmake, gcc and the
+PostgreSQL server package.
+
+Clone or unpack the Nominatim source code, enter the source directory and
+create and enter a build directory.
+
+```sh
+cd Nominatim
+mkdir build
+cd build
+```
+
+Now configure cmake to only build the PostgreSQL module and build it:
+
+```
+cmake -DBUILD_IMPORTER=off -DBUILD_API=off -DBUILD_TESTS=off -DBUILD_DOCS=off -DBUILD_OSM2PGSQL=off ..
+make
+```
+
+When done, you find the normalization library in `build/module/nominatim.so`.
+Copy it to a place where it is readable and executable by the PostgreSQL server
+process.
+
+### Option 2: Compiling the library on the import machine
+
+You can also compile the normalization library on the machine from where you
+run the import.
+
+!!! important
+    You can only do this when the database server and the import machine have
+    the same architecture and run the same version of Linux. Otherwise there is
+    no guarantee that the compiled library is compatible with the PostgreSQL
+    server running on the database server.
+
+Make sure that the PostgreSQL server package is installed on the machine
+**with the same version as on the database server**. You do not need to install
+the PostgreSQL server itself.
+
+Download and compile Nominatim as per standard instructions. Once done, you find
+the normalization library in `build/module/nominatim.so`. Copy the file to
+the database server at a location where it is readable and executable by the
+PostgreSQL server process.
+
+### Running the import
+
+On the client side you now need to configure the import to point to the
+correct location of the library **on the database server**. Add the following
+line to your your `.env` file:
+
+```php
+NOMINATIM_DATABASE_MODULE_PATH="<directory on the database server where nominatim.so resides>"
+```
+
+Now change the `NOMINATIM_DATABASE_DSN` to point to your remote server and continue
+to follow the [standard instructions for importing](Import.md).
+
+
+## Moving the database to another machine
+
+For some configurations it may be useful to run the import on one machine, then
+move the database to another machine and run the Nominatim service from there.
+For example, you might want to use a large machine to be able to run the import
+quickly but only want a smaller machine for production because there is not so
+much load. Or you might want to do the import once and then replicate the
+database to many machines.
+
+The important thing to keep in mind when transferring the Nominatim installation
+is that you need to transfer the database _and the project directory_. Both
+parts are essential for your installation.
+
+The Nominatim database can be transferred using the `pg_dump`/`pg_restore` tool.
+Make sure to use the same version of PostgreSQL and PostGIS on source and
+target machine.
+
+!!! note
+    Before creating a dump of your Nominatim database, consider running
+    `nominatim freeze` first. Your database looses the ability to receive further
+    data updates but the resulting database is only about a third of the size
+    of a full database.
+
+Next install Nominatim on the target machine by following the standard installation
+instructions. Again, make sure to use the same version as the source machine.
+
+Create a project directory on your destination machine and set up the `.env`
+file to match the configuration on the source machine. Finally run
+
+    nominatim refresh --website
+
+to make sure that the local installation of Nominatim will be used.
+
+If you are using the legacy tokenizer you might also have to switch to the
+PostgreSQL module that was compiled on your target machine. If you get errors
+that PostgreSQL cannot find or access `nominatim.so` then rerun
+
+   nominatim refresh --functions
+
+on the target machine to update the the location of the module.
--- a/docs/admin/Deployment.md
+++ b/docs/admin/Deployment.md
@@ -0,0 +1,148 @@
+# Deploying Nominatim
+
+The Nominatim API is implemented as a PHP application. The `website/` directory
+in the project directory contains the configured website. You can serve this
+in a production environment with any web server that is capable to run
+PHP scripts.
+
+This section gives a quick overview on how to configure Apache and Nginx to
+serve Nominatim. It is not meant as a full system administration guide on how
+to run a web service. Please refer to the documentation of
+[Apache](http://httpd.apache.org/docs/current/) and
+[Nginx](https://nginx.org/en/docs/)
+for background information on configuring the services.
+
+!!! Note
+    Throughout this page, we assume that your Nominatim project directory is
+    located in `/srv/nominatim-project` and that you have installed Nominatim
+    using the default installation prefix `/usr/local`. If you have put it
+    somewhere else, you need to adjust the commands and configuration
+    accordingly.
+
+    We further assume that your web server runs as user `www-data`. Older
+    versions of CentOS may still use the user name `apache`. You also need
+    to adapt the instructions in this case.
+
+## Making the website directory accessible
+
+You need to make sure that the `website` directory is accessible for the
+web server user. You can check that the permissions are correct by accessing
+on of the php files as the web server user:
+
+``` sh
+sudo -u www-data head -n 1 /srv/nominatim-project/website/search.php
+```
+
+If this shows a permission error, then you need to adapt the permissions of
+each directory in the path so that it is executable for `www-data`.
+
+If you have SELinux enabled, further adjustments may be necessary to give the
+web server access. At a minimum the following SELinux labelling should be done
+for Nominatim:
+
+``` sh
+sudo semanage fcontext -a -t httpd_sys_content_t "/usr/local/nominatim/lib/lib-php(/.*)?"
+sudo semanage fcontext -a -t httpd_sys_content_t "/srv/nominatim-project/website(/.*)?"
+sudo semanage fcontext -a -t lib_t "/srv/nominatim-project/module/nominatim.so"
+sudo restorecon -R -v /usr/local/lib/nominatim
+sudo restorecon -R -v /srv/nominatim-project
+```
+
+## Nominatim with Apache
+
+### Installing the required packages
+
+With Apache you can use the PHP module to run Nominatim.
+
+Under Ubuntu/Debian install them with:
+
+``` sh
+sudo apt install apache2 libapache2-mod-php
+```
+
+### Configuring Apache
+
+Make sure your Apache configuration contains the required permissions for the
+directory and create an alias:
+
+``` apache
+<Directory "/srv/nominatim-project/website">
+  Options FollowSymLinks MultiViews
+  AddType text/html   .php
+  DirectoryIndex search.php
+  Require all granted
+</Directory>
+Alias /nominatim /srv/nominatim-project/website
+```
+
+After making changes in the apache config you need to restart apache.
+The website should now be available on `http://localhost/nominatim`.
+
+## Nominatim with Nginx
+
+### Installing the required packages
+
+Nginx has no built-in PHP interpreter. You need to use php-fpm as a daemon for
+serving PHP cgi.
+
+On Ubuntu/Debian install nginx and php-fpm with:
+
+``` sh
+sudo apt install nginx php-fpm
+```
+
+### Configure php-fpm and Nginx
+
+By default php-fpm listens on a network socket. If you want it to listen to a
+Unix socket instead, change the pool configuration
+(`/etc/php/<php version>/fpm/pool.d/www.conf`) as follows:
+
+``` ini
+; Replace the tcp listener and add the unix socket
+listen = /var/run/php-fpm-nominatim.sock
+
+; Ensure that the daemon runs as the correct user
+listen.owner = www-data
+listen.group = www-data
+listen.mode = 0666
+```
+
+Tell nginx that php files are special and to fastcgi_pass to the php-fpm
+unix socket by adding the location definition to the default configuration.
+
+``` nginx
+root /srv/nominatim-project/website;
+index search.php;
+location / {
+    try_files $uri $uri/ @php;
+}
+
+location @php {
+    fastcgi_param SCRIPT_FILENAME "$document_root$uri.php";
+    fastcgi_param PATH_TRANSLATED "$document_root$uri.php";
+    fastcgi_param QUERY_STRING    $args;
+    fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
+    fastcgi_index index.php;
+    include fastcgi_params;
+}
+
+location ~ [^/]\.php(/|$) {
+    fastcgi_split_path_info ^(.+?\.php)(/.*)$;
+    if (!-f $document_root$fastcgi_script_name) {
+        return 404;
+    }
+    fastcgi_pass unix:/var/run/php-fpm-nominatim.sock;
+    fastcgi_index search.php;
+    include fastcgi.conf;
+}
+```
+
+Restart the nginx and php-fpm services and the website should now be available
+at `http://localhost/`.
+
+## Nominatim with other webservers
+
+Users have created instructions for other webservers:
+
+* [Caddy](https://github.com/osm-search/Nominatim/discussions/2580)
+
--- a/docs/admin/Faq.md
+++ b/docs/admin/Faq.md
@@ -16,27 +16,44 @@ was killed. If it looks like this:
 then you can resume with the following command:

 ```sh
-./utils/setup.php --index --create-search-indices --create-country-names
+nominatim import --continue indexing
 ```

 If the reported rank is 26 or higher, you can also safely add `--index-noanalyse`.


+### PostgreSQL crashed "invalid page in block"
+
+Usually serious problem, can be a hardware issue, not all data written to disc
+for example. Check PostgreSQL log file and search PostgreSQL issues/mailing
+list for hints.
+
+If it happened during index creation you can try rerunning the step with
+
+```sh
+nominatim import --continue indexing
+```
+
+Otherwise it's best to start the full setup from the beginning.
+
+
 ### PHP "open_basedir restriction in effect" warnings

    PHP Warning:  file_get_contents(): open_basedir restriction in effect.

-You need to adjust the [open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir) setting
-in your PHP configuration (`php.ini file`). By default this setting may look like this:
+You need to adjust the
+[open_basedir](https://www.php.net/manual/en/ini.core.php#ini.open-basedir)
+setting in your PHP configuration (`php.ini` file). By default this setting may
+look like this:

    open_basedir = /srv/http/:/home/:/tmp/:/usr/share/pear/

-Either add reported directories to the list or disable this setting temporarily by
-dding ";" at the beginning of the line. Don't forget to enable this setting again
-once you are done with the PHP command line operations.
+Either add reported directories to the list or disable this setting temporarily
+by adding ";" at the beginning of the line. Don't forget to enable this setting
+again once you are done with the PHP command line operations.


-### PHP timzeone warnings
+### PHP timezeone warnings

 The Apache log may contain lots of PHP warnings like this:
    `PHP Warning:  date_default_timezone_set() function.`
@@ -62,11 +79,11 @@ When running the import you may get a version mismatch:

 pg_config seems to use bad includes sometimes when multiple versions
 of PostgreSQL are available in the system. Make sure you remove the
-server development libraries (`postgresql-server-dev-9.5` on Ubuntu)
+server development libraries (`postgresql-server-dev-13` on Ubuntu)
 and recompile (`cmake .. && make`).


-## I see the error "ERROR: permission denied for language c"
+### I see the error "ERROR: permission denied for language c"

 `nominatim.so`, written in C, is required to be installed on the database
 server. Some managed database (cloud) services like Amazon RDS do not allow
@@ -76,7 +93,7 @@ on a non-managed machine.

 ### I see the error: "function transliteration(text) does not exist"

-Reinstall the nominatim functions with `setup.php --create--functions`
+Reinstall the nominatim functions with `nominatim refresh --functions`
 and check for any errors, e.g. a missing `nominatim.so` file.

 ### I see the error: "ERROR: mmap (remap) failed"
@@ -91,7 +108,8 @@ vboxfs.

 ### nominatim UPDATE failed: ERROR: buffer 179261 is not owned by resource owner Portal

-Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168) during the initial import of the database. It's
+Several users [reported this](https://github.com/openstreetmap/Nominatim/issues/1168)
+during the initial import of the database. It's
 something PostgreSQL internal Nominatim doesn't control. And PostgreSQL forums
 suggest it's threading related but definitely some kind of crash of a process.
 Users reported either rebooting the server, different hardware or just trying
@@ -103,29 +121,12 @@ The server cannot access your database. Add `&debug=1` to your URL
 to get the full error message.


-### On CentOS the website shows "Could not connect to server"
-
-`could not connect to server: No such file or directory`
-
-On CentOS v7 the PostgreSQL server is started with `systemd`.
-Check if `/usr/lib/systemd/system/httpd.service` contains a line `PrivateTmp=true`.
-If so then Apache cannot see the `/tmp/.s.PGSQL.5432` file. It's a good security feature,
-so use the [preferred solution](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
-
-However, you can solve this the quick and dirty way by commenting out that line and then run
-
-    sudo systemctl daemon-reload
-    sudo systemctl restart httpd
-
-
-### "must be an array or an object that implements Countable" warning in /usr/share/pear/DB.php
-
-The warning started with PHP 7.2. Make sure you have at least [version 1.9.3 of PEAR DB](https://github.com/pear/DB/releases)
-installed.
-
 ### Website reports "DB Error: insufficient permissions"

-The user the webserver, e.g. Apache, runs under needs to have access to the Nominatim database. You can find the user like [this](https://serverfault.com/questions/125865/finding-out-what-user-apache-is-running-as), for default Ubuntu operating system for example it's `www-data`.
+The user the webserver, e.g. Apache, runs under needs to have access to the
+Nominatim database. You can find the user like
+[this](https://serverfault.com/questions/125865/finding-out-what-user-apache-is-running-as),
+for default Ubuntu operating system for example it's `www-data`.

 1. Repeat the `createuser` step of the installation instructions.

@@ -150,7 +151,8 @@ Example error message

 The PostgreSQL database, i.e. user `postgres`, needs to have access to that file.

-The permission need to be read & executable by everybody, e.g.
+The permission need to be read & executable by everybody, but not writeable
+by everybody, e.g.

 ```
   -rwxr-xr-x 1 nominatim nominatim 297984 build/module/nominatim.so
@@ -158,58 +160,31 @@ The permission need to be read & executable by everybody, e.g.

 Try `chmod a+r nominatim.so; chmod a+x nominatim.so`.

-When running SELinux, make sure that the
-[context is set up correctly](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
+When you recently updated your operating system, updated PostgreSQL to
+a new version or moved files (e.g. the build directory) you should
+recreate `nominatim.so`. Try
+
+```
+   cd build
+   rm -r module/
+   cmake $main_Nominatim_path && make
+```

 ### Setup.php fails with "DB Error: extension not found"

 Make sure you have the PostgreSQL extensions "hstore" and "postgis" installed.
-See the installation instruction for a full list of required packages.
+See the installation instructions for a full list of required packages.


-### Setup.php reports "Cannot redeclare getDB()"
-
-`Cannot redeclare getDB() (previously declared in /your/path/Nominatim/lib/db.php:4)`
-
-The message is a bit misleading as PHP needs to load the file `DB.php` and
-instead re-loads Nominatim's `db.php`. To solve this make sure you
-have the [Pear module 'DB'](https://pear.php.net/package/DB/) installed.
-
-    sudo pear install DB
-
 ### I forgot to delete the flatnodes file before starting an import.

 That's fine. For each import the flatnodes file get overwritten.
-See [https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage]()
+See [https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage](https://help.openstreetmap.org/questions/52419/nominatim-flatnode-storage)
 for more information.


 ## Running your own instance

-### Can I import multiple countries and keep them up to date?
-
-You should use the extracts and updates from https://download.geofabrik.de.
-For the initial import, download the countries you need and merge them.
-See [OSM Help](https://help.openstreetmap.org/questions/48843/merging-two-or-more-geographical-areas-to-import-two-or-more-osm-files-in-nominatim)
-for examples how to do that. Use the resulting single osm file when
-running `setup.php`.
-
-For updates you need to download the change files for each country
-once per day and apply them **separately** using
-
-    ./utils/update.php --import-diff <filename> --index
-
-See [this issue](https://github.com/openstreetmap/Nominatim/issues/60#issuecomment-18679446)
-for a script that runs the updates using osmosis.
-
 ### Can I import negative OSM ids into Nominatim?

 See [this question of Stackoverflow](https://help.openstreetmap.org/questions/64662/nominatim-flatnode-with-negative-id).
-
-### Missing XML or text declaration
-
-The website might show: `XML Parsing Error: XML or text declaration not at start of entity Location.`
-
-Make sure there are no spaces at the beginning of your `settings/local.php` file.
-
-
--- a/docs/admin/Import-and-Update.md
+++ b/docs/admin/Import-and-Update.md
@@ -1,276 +0,0 @@
-# Importing and Updating the Database
-
-The following instructions explain how to create a Nominatim database
-from an OSM planet file and how to keep the database up to date. It
-is assumed that you have already successfully installed the Nominatim
-software itself, if not return to the [installation page](Installation.md).
-
-## Configuration setup in settings/local.php
-
-The Nominatim server can be customized via the file `settings/local.php`
-in the build directory. Note that this is a PHP file, so it must always
-start like this:
-
-    <?php
-
-without any leading spaces.
-
-There are lots of configuration settings you can tweak. Have a look
-at `settings/default.php` for a full list. Most should have a sensible default.
-
-#### Flatnode files
-
-If you plan to import a large dataset (e.g. Europe, North America, planet),
-you should also enable flatnode storage of node locations. With this
-setting enabled, node coordinates are stored in a simple file instead
-of the database. This will save you import time and disk storage.
-Add to your `settings/local.php`:
-
-    @define('CONST_Osm2pgsql_Flatnode_File', '/path/to/flatnode.file');
-
-Replace the second part with a suitable path on your system and make sure
-the directory exists. There should be at least 40GB of free space.
-
-## Downloading additional data
-
-### Wikipedia rankings
-
-Wikipedia can be used as an optional auxiliary data source to help indicate
-the importance of OSM features. Nominatim will work without this information
-but it will improve the quality of the results if this is installed.
-This data is available as a binary download:
-
-    cd $NOMINATIM_SOURCE_DIR/data
-    wget https://www.nominatim.org/data/wikipedia_article.sql.bin
-    wget https://www.nominatim.org/data/wikipedia_redirect.sql.bin
-
-Combined the 2 files are around 1.5GB and add around 30GB to the install
-size of Nominatim. They also increase the install time by an hour or so.
-
-*NOTE:* you'll need to download the Wikipedia rankings before performing
-the initial import of the data if you want the rankings applied to the
-loaded data.
-
-### Great Britain, USA postcodes
-
-Nominatim can use postcodes from an external source to improve searches that
-involve a GB or US postcode. This data can be optionally downloaded:
-
-    cd $NOMINATIM_SOURCE_DIR/data
-    wget https://www.nominatim.org/data/gb_postcode_data.sql.gz
-    wget https://www.nominatim.org/data/us_postcode_data.sql.gz
-
-## Choosing the Data to Import
-
-In its default setup Nominatim is configured to import the full OSM data
-set for the entire planet. Such a setup requires a powerful machine with
-at least 32GB of RAM and around 800GB of SSD hard disks. Depending on your
-use case there are various ways to reduce the amount of data imported. This
-section discusses these methods. They can also be combined.
-
-### Using an extract
-
-If you only need geocoding for a smaller region, then precomputed extracts
-are a good way to reduce the database size and import time.
-[Geofabrik](https://download.geofabrik.de) offers extracts for most countries.
-They even have daily updates which can be used with the update process described
-below. There are also
-[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading).
-
-Please be aware that some extracts are not cut exactly along the country
-boundaries. As a result some parts of the boundary may be missing which means
-that Nominatim cannot compute the areas for some administrative areas.
-
-### Dropping Data Required for Dynamic Updates
-
-About half of the data in Nominatim's database is not really used for serving
-the API. It is only there to allow the data to be updated from the latest
-changes from OSM. For many uses these dynamic updates are not really required.
-If you don't plan to apply updates, the dynamic part of the database can be
-safely dropped using the following command:
-
-```
-./utils/setup.php --drop
-```
-
-Note that you still need to provide for sufficient disk space for the initial
-import. So this option is particularly interesting if you plan to transfer the
-database or reuse the space later.
-
-### Reverse-only Imports
-
-If you only want to use the Nominatim database for reverse lookups or
-if you plan to use the installation only for exports to a
-[photon](https://photon.komoot.de/) database, then you can set up a database
-without search indexes. Add `--reverse-only` to your setup command above.
-
-This saves about 5% of disk space.
-
-### Filtering Imported Data
-
-Nominatim normally sets up a full search database containing administrative
-boundaries, places, streets, addresses and POI data. There are also other
-import styles available which only read selected data:
-
-* **settings/import-admin.style**
-  Only import administrative boundaries and places.
-* **settings/import-street.style**
-  Like the admin style but also adds streets.
-* **settings/import-address.style**
-  Import all data necessary to compute addresses down to house number level.
-* **settings/import-full.style**
-  Default style that also includes points of interest.
-
-The style can be changed with the configuration `CONST_Import_Style`.
-
-To give you an idea of the impact of using the different styles, the table
-below gives rough estimates of the final database size after import of a
-2018 planet and after using the `--drop` option. It also shows the time
-needed for the import on a machine with 32GB RAM, 4 CPUS and SSDs. Note that
-the given sizes are just an estimate meant for comparison of style requirements.
-Your planet import is likely to be larger as the OSM data grows with time.
-
-style     | Import time  |  DB size   |  after drop
----------|--------------|------------|------------
-admin     |    5h        |  190 GB    |   20 GB
-street    |   42h        |  400 GB    |  180 GB
-address   |   59h        |  500 GB    |  260 GB
-full      |   80h        |  575 GB    |  300 GB
-
-You can also customize the styles further. For an description of the
-style format see [the development section](../develop/Import.md).
-
-## Initial import of the data
-
-**Important:** first try the import with a small extract, for example from
-[Geofabrik](https://download.geofabrik.de).
-
-Download the data to import and load the data with the following command
-from the build directory:
-
-```sh
-./utils/setup.php --osm-file <data file> --all [--osm2pgsql-cache 28000] 2>&1 | tee setup.log
-```
-
-The `--osm2pgsql-cache` parameter is optional but strongly recommended for
-planet imports. It sets the node cache size for the osm2pgsql import part
-(see `-C` parameter in osm2pgsql help). As a rule of thumb, this should be
-about the same size as the file you are importing but never more than
-2/3 of RAM available. If your machine starts swapping reduce the size.
-
-Computing word frequency for search terms can improve the performance of
-forward geocoding in particular under high load as it helps PostgreSQL's query
-planner to make the right decisions. To recompute word counts run:
-
-```sh
-./utils/update.php --recompute-word-counts
-```
-
-This will take a couple of hours for a full planet installation. You can
-also defer that step to a later point in time when you realise that
-performance becomes an issue. Just make sure that updates are stopped before
-running this function.
-
-If you want to be able to search for places by their type through
-[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
-you also need to enable these key phrases like this:
-
-    ./utils/specialphrases.php --wiki-import > specialphrases.sql
-    psql -d nominatim -f specialphrases.sql
-
-Note that this command downloads the phrases from the wiki link above.
-
-
-## Installing Tiger housenumber data for the US
-
-Nominatim is able to use the official [TIGER](https://www.census.gov/geo/maps-data/data/tiger.html)
-address set to complement the OSM house number data in the US. You can add
-TIGER data to your own Nominatim instance by following these steps. The
-entire US adds about 10GB to your database.
-
-  1. Get preprocessed TIGER 2019 data and unpack it into the
-     data directory in your Nominatim sources:
-
-        cd Nominatim/data
-        wget https://nominatim.org/data/tiger2019-nominatim-preprocessed.tar.gz
-        tar xf tiger2019-nominatim-preprocessed.tar.gz
-
-    `data-source/us-tiger/README.md` explains how the data got preprocessed.
-
-  2. Import the data into your Nominatim database:
-
-        ./utils/setup.php --import-tiger-data
-
-  3. Enable use of the Tiger data in your `settings/local.php` by adding:
-
-         @define('CONST_Use_US_Tiger_Data', true);
-
-  4. Apply the new settings:
-
-```sh
-    ./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
-```
-
-
-## Updates
-
-There are many different ways to update your Nominatim database.
-The following section describes how to keep it up-to-date with Pyosmium.
-For a list of other methods see the output of `./utils/update.php --help`.
-
-#### Installing the newest version of Pyosmium
-
-It is recommended to install Pyosmium via pip. Make sure to use python3.
-Run (as the same user who will later run the updates):
-
-```sh
-pip3 install --user osmium
-```
-
-Nominatim needs a tool called `pyosmium-get-updates` which comes with
-Pyosmium. You need to tell Nominatim where to find it. Add the
-following line to your `settings/local.php`:
-
-    @define('CONST_Pyosmium_Binary', '/home/user/.local/bin/pyosmium-get-changes');
-
-The path above is fine if you used the `--user` parameter with pip.
-Replace `user` with your user name.
-
-#### Setting up the update process
-
-Next the update needs to be initialised. By default Nominatim is configured
-to update using the global minutely diffs.
-
-If you want a different update source you will need to add some settings
-to `settings/local.php`. For example, to use the daily country extracts
-diffs for Ireland from Geofabrik add the following:
-
-    // base URL of the replication service
-    @define('CONST_Replication_Url', 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates');
-    // How often upstream publishes diffs
-    @define('CONST_Replication_Update_Interval', '86400');
-    // How long to sleep if no update found yet
-    @define('CONST_Replication_Recheck_Interval', '900');
-
-To set up the update process now run the following command:
-
-    ./utils/update.php --init-updates
-
-It outputs the date where updates will start. Recheck that this date is
-what you expect.
-
-The `--init-updates` command needs to be rerun whenever the replication service
-is changed.
-
-#### Updating Nominatim
-
-The following command will keep your database constantly up to date:
-
-    ./utils/update.php --import-osmosis-all
-
-(Note that even though the old name "import-osmosis-all" has been kept for compatibility reasons, Osmosis is not required to run this - it uses pyosmium behind the scenes.)
-
-If you have imported multiple country extracts and want to keep them
-up-to-date, have a look at the script in
-[issue #60](https://github.com/openstreetmap/Nominatim/issues/60).
-
--- a/docs/admin/Import.md
+++ b/docs/admin/Import.md
@@ -0,0 +1,288 @@
+# Importing the Database
+
+The following instructions explain how to create a Nominatim database
+from an OSM planet file. It is assumed that you have already successfully
+installed the Nominatim software itself and the `nominatim` tool can be found
+in your `PATH`. If this is not the case, return to the
+[installation page](Installation.md).
+
+## Creating the project directory
+
+Before you start the import, you should create a project directory for your
+new database installation. This directory receives all data that is related
+to a single Nominatim setup: configuration, extra data, etc. Create a project
+directory apart from the Nominatim software and change into the directory:
+
+```
+mkdir ~/nominatim-planet
+cd ~/nominatim-planet
+```
+
+In the following, we refer to the project directory as `$PROJECT_DIR`. To be
+able to copy&paste instructions, you can export the appropriate variable:
+
+```
+export PROJECT_DIR=~/nominatim-planet
+```
+
+The Nominatim tool assumes per default that the current working directory is
+the project directory but you may explicitly state a different directory using
+the `--project-dir` parameter. The following instructions assume that you run
+all commands from the project directory.
+
+!!! tip "Migration Tip"
+
+    Nominatim used to be run directly from the build directory until version 3.6.
+    Essentially, the build directory functioned as the project directory
+    for the database installation. This setup still works and can be useful for
+    development purposes. It is not recommended anymore for production setups.
+    Create a project directory that is separate from the Nominatim software.
+
+### Configuration setup in `.env`
+
+The Nominatim server can be customized via an `.env` configuration file in the 
+project directory. This is a file in [dotenv](https://github.com/theskumar/python-dotenv)
+format which looks the same as variable settings in a standard shell environment.
+You can also set the same configuration via environment variables. All
+settings have a `NOMINATIM_` prefix to avoid conflicts with other environment
+variables.
+
+There are lots of configuration settings you can tweak. A full reference
+can be found in the chapter [Configuration Settings](../customize/Settings.md).
+Most should have a sensible default.
+
+#### Flatnode files
+
+If you plan to import a large dataset (e.g. Europe, North America, planet),
+you should also enable flatnode storage of node locations. With this
+setting enabled, node coordinates are stored in a simple file instead
+of the database. This will save you import time and disk storage.
+Add to your `.env`:
+
+    NOMINATIM_FLATNODE_FILE="/path/to/flatnode.file"
+
+Replace the second part with a suitable path on your system and make sure
+the directory exists. There should be at least 75GB of free space.
+
+## Downloading additional data
+
+### Wikipedia/Wikidata rankings
+
+Wikipedia can be used as an optional auxiliary data source to help indicate
+the importance of OSM features. Nominatim will work without this information
+but it will improve the quality of the results if this is installed.
+This data is available as a binary download. Put it into your project directory:
+
+    cd $PROJECT_DIR
+    wget https://www.nominatim.org/data/wikimedia-importance.sql.gz
+
+The file is about 400MB and adds around 4GB to the Nominatim database.
+
+!!! tip
+    If you forgot to download the wikipedia rankings, you can also add
+    importances after the import. Download the files, then run
+    `nominatim refresh --wiki-data --importance`. Updating importances for
+    a planet can take a couple of hours.
+
+### External postcodes
+
+Nominatim can use postcodes from an external source to improve searching with
+postcodes. We provide precomputed postcodes sets for the US (using TIGER data)
+and the UK (using the [CodePoint OpenData set](https://osdatahub.os.uk/downloads/open/CodePointOpen).
+This data can be optionally downloaded into the project directory:
+
+    cd $PROJECT_DIR
+    wget https://www.nominatim.org/data/gb_postcodes.csv.gz
+    wget https://www.nominatim.org/data/us_postcodes.csv.gz
+
+You can also add your own custom postcode sources, see
+[Customization of postcodes](../customize/Postcodes.md).
+
+## Choosing the data to import
+
+In its default setup Nominatim is configured to import the full OSM data
+set for the entire planet. Such a setup requires a powerful machine with
+at least 64GB of RAM and around 900GB of SSD hard disks. Depending on your
+use case there are various ways to reduce the amount of data imported. This
+section discusses these methods. They can also be combined.
+
+### Using an extract
+
+If you only need geocoding for a smaller region, then precomputed OSM extracts
+are a good way to reduce the database size and import time.
+[Geofabrik](https://download.geofabrik.de) offers extracts for most countries.
+They even have daily updates which can be used with the update process described
+[in the next section](Update.md). There are also
+[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading).
+
+Please be aware that some extracts are not cut exactly along the country
+boundaries. As a result some parts of the boundary may be missing which means
+that Nominatim cannot compute the areas for some administrative areas.
+
+### Dropping Data Required for Dynamic Updates
+
+About half of the data in Nominatim's database is not really used for serving
+the API. It is only there to allow the data to be updated from the latest
+changes from OSM. For many uses these dynamic updates are not really required.
+If you don't plan to apply updates, you can run the import with the
+`--no-updates` parameter. This will drop the dynamic part of the database as
+soon as it is not required anymore.
+
+You can also drop the dynamic part later using the following command:
+
+```
+nominatim freeze
+```
+
+Note that you still need to provide for sufficient disk space for the initial
+import. So this option is particularly interesting if you plan to transfer the
+database or reuse the space later.
+
+!!! warning
+    The datastructure for updates are also required when adding additional data
+    after the import, for example [TIGER housenumber data](../customize/Tiger.md).
+    If you plan to use those, you must not use the `--no-updates` parameter.
+    Do a normal import, add the external data and once you are done with
+    everything run `nominatim freeze`.
+
+
+### Reverse-only Imports
+
+If you only want to use the Nominatim database for reverse lookups or
+if you plan to use the installation only for exports to a
+[photon](https://photon.komoot.io/) database, then you can set up a database
+without search indexes. Add `--reverse-only` to your setup command above.
+
+This saves about 5% of disk space.
+
+### Filtering Imported Data
+
+Nominatim normally sets up a full search database containing administrative
+boundaries, places, streets, addresses and POI data. There are also other
+import styles available which only read selected data:
+
+* **admin**
+  Only import administrative boundaries and places.
+* **street**
+  Like the admin style but also adds streets.
+* **address**
+  Import all data necessary to compute addresses down to house number level.
+* **full**
+  Default style that also includes points of interest.
+* **extratags**
+  Like the full style but also adds most of the OSM tags into the extratags
+  column.
+
+The style can be changed with the configuration `NOMINATIM_IMPORT_STYLE`.
+
+To give you an idea of the impact of using the different styles, the table
+below gives rough estimates of the final database size after import of a
+2020 planet and after using the `--drop` option. It also shows the time
+needed for the import on a machine with 64GB RAM, 4 CPUS and NVME disks.
+Note that the given sizes are just an estimate meant for comparison of
+style requirements. Your planet import is likely to be larger as the
+OSM data grows with time.
+
+style     | Import time  |  DB size   |  after drop
+----------|--------------|------------|------------
+admin     |    4h        |  215 GB    |   20 GB
+street    |   22h        |  440 GB    |  185 GB
+address   |   36h        |  545 GB    |  260 GB
+full      |   54h        |  640 GB    |  330 GB
+extratags |   54h        |  650 GB    |  340 GB
+
+You can also customize the styles further.
+A [description of the style format](../customize/Import-Styles.md)
+can be found in the customization guide.
+
+## Initial import of the data
+
+!!! danger "Important"
+    First try the import with a small extract, for example from
+    [Geofabrik](https://download.geofabrik.de).
+
+Download the data to import. Then issue the following command
+from the **project directory** to start the import:
+
+```sh
+nominatim import --osm-file <data file> 2>&1 | tee setup.log
+```
+
+The **project directory** is the one that you have set up at the beginning.
+See [creating the project directory](#creating-the-project-directory).
+
+### Notes on full planet imports
+
+Even on a perfectly configured machine
+the import of a full planet takes around 2 days. Once you see messages
+with `Rank .. ETA` appear, the indexing process has started. This part takes
+the most time. There are 30 ranks to process. Rank 26 and 30 are the most complex.
+They take each about a third of the total import time. If you have not reached
+rank 26 after two days of import, it is worth revisiting your system
+configuration as it may not be optimal for the import.
+
+### Notes on memory usage
+
+In the first step of the import Nominatim uses [osm2pgsql](https://osm2pgsql.org)
+to load the OSM data into the PostgreSQL database. This step is very demanding
+in terms of RAM usage. osm2pgsql and PostgreSQL are running in parallel at 
+this point. PostgreSQL blocks at least the part of RAM that has been configured
+with the `shared_buffers` parameter during
+[PostgreSQL tuning](Installation.md#postgresql-tuning)
+and needs some memory on top of that. osm2pgsql needs at least 2GB of RAM for
+its internal data structures, potentially more when it has to process very large
+relations. In addition it needs to maintain a cache for node locations. The size
+of this cache can be configured with the parameter `--osm2pgsql-cache`.
+
+When importing with a flatnode file, it is best to disable the node cache
+completely and leave the memory for the flatnode file. Nominatim will do this
+by default, so you do not need to configure anything in this case.
+
+For imports without a flatnode file, set `--osm2pgsql-cache` approximately to
+the size of the OSM pbf file you are importing. The size needs to be given in
+MB. Make sure you leave enough RAM for PostgreSQL and osm2pgsql as mentioned
+above. If the system starts swapping or you are getting out-of-memory errors,
+reduce the cache size or even consider using a flatnode file.
+
+
+### Testing the installation
+
+Run this script to verify that all required tables and indices got created
+successfully.
+
+```sh
+nominatim admin --check-database
+```
+
+Now you can try out your installation by running:
+
+```sh
+nominatim serve
+```
+
+This runs a small test server normally used for development. You can use it
+to verify that your installation is working. Go to
+`http://localhost:8088/status.php` and you should see the message `OK`.
+You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
+
+Note that search query is not supported for reverse-only imports. You can run a
+reverse query, e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
+
+To run Nominatim via webservers like Apache or nginx, please read the
+[Deployment chapter](Deployment.md).
+
+## Adding search through category phrases
+
+If you want to be able to search for places by their type through
+[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
+you also need to import these key phrases like this:
+
+```sh
+nominatim special-phrases --import-from-wiki
+```
+
+Note that this command downloads the phrases from the wiki link above. You
+need internet access for the step.
+
+You can also import special phrases from a csv file, for more 
+information please see the [Customization part](../customize/Special-Phrases.md).
--- a/docs/admin/Installation.md
+++ b/docs/admin/Installation.md
@@ -4,9 +4,9 @@ This page contains generic installation instructions for Nominatim and its
 prerequisites. There are also step-by-step instructions available for
 the following operating systems:

+  * [Ubuntu 22.04](../appendix/Install-on-Ubuntu-22.md)
+  * [Ubuntu 20.04](../appendix/Install-on-Ubuntu-20.md)
  * [Ubuntu 18.04](../appendix/Install-on-Ubuntu-18.md)
-  * [Ubuntu 16.04](../appendix/Install-on-Ubuntu-16.md)
-  * [CentOS 7.2](../appendix/Install-on-Centos-7.md)

 These OS-specific instructions can also be found in executable form
 in the `vagrant/` directory.
@@ -16,143 +16,165 @@ and can't offer support.

  * [Docker](https://github.com/mediagis/nominatim-docker)
  * [Docker on Kubernetes](https://github.com/peter-evans/nominatim-k8s)
+  * [Kubernetes with Helm](https://github.com/robjuz/helm-charts/blob/master/charts/nominatim/README.md)
  * [Ansible](https://github.com/synthesio/infra-ansible-nominatim)

 ## Prerequisites

 ### Software

+!!! Warning
+    For larger installations you **must have** PostgreSQL 11+ and PostGIS 3+
+    otherwise import and queries will be slow to the point of being unusable.
+    Query performance has marked improvements with PostgreSQL 13+ and PostGIS 3.2+.
+
 For compiling:

  * [cmake](https://cmake.org/)
-  * [libxml2](http://xmlsoft.org/)
-  * a recent C++ compiler
-
-Nominatim comes with its own version of osm2pgsql. See the
-osm2pgsql README for additional dependencies required for compiling osm2pgsql.
-
-For running tests:
-
-  * [behave](http://pythonhosted.org/behave/)
-  * [Psycopg2](https://initd.org/psycopg)
-  * [nose](https://nose.readthedocs.io)
-  * [phpunit](https://phpunit.de)
+  * [expat](https://libexpat.github.io/)
+  * [proj](https://proj.org/)
+  * [bzip2](http://www.bzip.org/)
+  * [zlib](https://www.zlib.net/)
+  * [ICU](http://site.icu-project.org/)
+  * [Boost libraries](https://www.boost.org/), including system and filesystem
+  * PostgreSQL client libraries
+  * a recent C++ compiler (gcc 5+ or Clang 3.8+)

 For running Nominatim:

-  * [PostgreSQL](https://www.postgresql.org) (9.3 or later)
-  * [PostGIS](https://postgis.org) (2.2 or later)
+  * [PostgreSQL](https://www.postgresql.org) (9.6+ will work, 11+ strongly recommended)
+  * [PostGIS](https://postgis.net) (2.2+ will work, 3.0+ strongly recommended)
+  * [Python 3](https://www.python.org/) (3.6+)
+  * [Psycopg2](https://www.psycopg.org) (2.7+)
+  * [Python Dotenv](https://github.com/theskumar/python-dotenv)
+  * [psutil](https://github.com/giampaolo/psutil)
+  * [Jinja2](https://palletsprojects.com/p/jinja/)
+  * [PyICU](https://pypi.org/project/PyICU/)
+  * [PyYaml](https://pyyaml.org/) (5.1+)
+  * [datrie](https://github.com/pytries/datrie)
  * [PHP](https://php.net) (7.0 or later)
  * PHP-pgsql
  * PHP-intl (bundled with PHP)
-  * [PEAR::DB](https://pear.php.net/package/DB)
-  * a webserver (apache or nginx are recommended)
+  * PHP-cgi (for running queries from the command line)

 For running continuous updates:

-  * [pyosmium](https://osmcode.org/pyosmium/) (with Python 3)
+  * [pyosmium](https://osmcode.org/pyosmium/)
+
+For dependencies for running tests and building documentation, see
+the [Development section](../develop/Development-Environment.md).

 ### Hardware

 A minimum of 2GB of RAM is required or installation will fail. For a full
-planet import 32GB of RAM or more are strongly recommended.
+planet import 128GB of RAM or more are strongly recommended. Do not report
+out of memory problems if you have less than 64GB RAM.

-For a full planet install you will need at least 700GB of hard disk space
-(take into account that the OSM database is growing fast). SSD disks
-will help considerably to speed up import and queries.
+For a full planet install you will need at least 1TB of hard disk space.
+Take into account that the OSM database is growing fast.
+Fast disks are essential. Using NVME disks is recommended.

-On a 6-core machine with 32GB RAM and SSDs the import of a full planet takes
-a bit more than 2 days. Without SSDs 7-8 days are more realistic.
+Even on a well configured machine the import of a full planet takes
+around 2 days. On traditional spinning disks, 7-8 days are more realistic.

-
-## Setup of the server
-
-### PostgreSQL tuning
+## Tuning the PostgreSQL database

 You might want to tune your PostgreSQL installation so that the later steps
 make best use of your hardware. You should tune the following parameters in
 your `postgresql.conf` file.

-    shared_buffers (2GB)
-    maintenance_work_mem (10GB)
-    work_mem (50MB)
-    effective_cache_size (24GB)
+    shared_buffers = 2GB
+    maintenance_work_mem = (10GB)
+    autovacuum_work_mem = 2GB
+    work_mem = (50MB)
+    effective_cache_size = (24GB)
    synchronous_commit = off
-    checkpoint_segments = 100 # only for postgresql <= 9.4
+    max_wal_size = 1GB
    checkpoint_timeout = 10min
    checkpoint_completion_target = 0.9

 The numbers in brackets behind some parameters seem to work fine for
-32GB RAM machine. Adjust to your setup.
+64GB RAM machine. Adjust to your setup. A higher number for `max_wal_size`
+means that PostgreSQL needs to run checkpoints less often but it does require
+the additional space on your disk.
+
+Autovacuum must not be switched off because it ensures that the
+tables are frequently analysed. If your machine has very little memory,
+you might consider setting:
+
+    autovacuum_max_workers = 1
+
+and even reduce `autovacuum_work_mem` further. This will reduce the amount
+of memory that autovacuum takes away from the import process.

 For the initial import, you should also set:

    fsync = off
    full_page_writes = off

-Don't forget to reenable them after the initial import or you risk database
-corruption. Autovacuum must not be switched off because it ensures that the
-tables are frequently analysed.
-
-### Webserver setup
-
-The `website/` directory in the build directory contains the configured
-website. Include the directory into your webbrowser to serve php files
-from there.
-
-#### Configure for use with Apache
-
-Make sure your Apache configuration contains the required permissions for the
-directory and create an alias:
-
-    <Directory "/srv/nominatim/build/website">
-      Options FollowSymLinks MultiViews
-      AddType text/html   .php
-      DirectoryIndex search.php
-      Require all granted
-    </Directory>
-    Alias /nominatim /srv/nominatim/build/website
-
-`/srv/nominatim/build` should be replaced with the location of your
-build directory.
-
-After making changes in the apache config you need to restart apache.
-The website should now be available on http://localhost/nominatim.
-
-#### Configure for use with Nginx
-
-Use php-fpm as a deamon for serving PHP cgi. Install php-fpm together with nginx.
-
-By default php listens on a network socket. If you want it to listen to a
-Unix socket instead, change the pool configuration (`pool.d/www.conf`) as
-follows:
-
-    ; Comment out the tcp listener and add the unix socket
-    ;listen = 127.0.0.1:9000
-    listen = /var/run/php5-fpm.sock
-
-    ; Ensure that the daemon runs as the correct user
-    listen.owner = www-data
-    listen.group = www-data
-    listen.mode = 0666
-
-Tell nginx that php files are special and to fastcgi_pass to the php-fpm
-unix socket by adding the location definition to the default configuration.
-
-    root /srv/nominatim/build/website;
-    index search.php index.html;
-    location ~ [^/]\.php(/|$) {
-        fastcgi_split_path_info ^(.+?\.php)(/.*)$;
-        if (!-f $document_root$fastcgi_script_name) {
-            return 404;
-        }
-        fastcgi_pass unix:/var/run/php5-fpm.sock;
-        fastcgi_index search.php;
-        include fastcgi.conf;
-    }
-
-Restart the nginx and php5-fpm services and the website should now be available
-at `http://localhost/`.
+Don't forget to re-enable them after the initial import or you risk database
+corruption.


-Now continue with [importing the database](Import-and-Update.md).
+## Downloading and building Nominatim
+
+### Downloading the latest release
+
+You can download the [latest release from nominatim.org](https://nominatim.org/downloads/).
+The release contains all necessary files. Just unpack it.
+
+### Downloading the latest development version
+
+If you want to install latest development version from github, make sure to
+also check out the osm2pgsql subproject:
+
+```
+git clone --recursive https://github.com/openstreetmap/Nominatim.git
+```
+
+The development version does not include the country grid. Download it separately:
+
+```
+wget -O Nominatim/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz
+```
+
+### Building Nominatim
+
+The code must be built in a separate directory. Create the directory and
+change into it.
+
+```
+mkdir build
+cd build
+```
+
+Nominatim uses cmake and make for building. Assuming that you have created the
+build at the same level as the Nominatim source directory run:
+
+```
+cmake ../Nominatim
+make
+sudo make install
+```
+
+!!! warning
+    The default installation no longer compiles the PostgreSQL module that
+    is needed for the legacy tokenizer from older Nominatim versions. If you
+    are upgrading an older database or want to run the
+    [legacy tokenizer](../customize/Tokenizers.md#legacy-tokenizer) for
+    some other reason, you need to enable the PostgreSQL module via
+    cmake: `cmake -DBUILD_MODULE=on ../Nominatim`. To compile the module
+    you need to have the server development headers for PostgreSQL installed.
+    On Ubuntu/Debian run: `sudo apt install postgresql-server-dev-<postgresql version>`
+
+
+Nominatim installs itself into `/usr/local` per default. To choose a different
+installation directory add `-DCMAKE_INSTALL_PREFIX=<install root>` to the
+cmake command. Make sure that the `bin` directory is available in your path
+in that case, e.g.
+
+```
+export PATH=<install root>/bin:$PATH
+```
+
+Now continue with [importing the database](Import.md).
--- a/docs/admin/Maintenance.md
+++ b/docs/admin/Maintenance.md
@@ -0,0 +1,75 @@
+This chapter describes the various operations the Nominatim database administrator
+may use to clean and maintain the database. None of these operations is mandatory
+but they may help improve the performance and accuracy of results.
+
+
+## Updating postcodes
+
+Command: `nominatim refresh --postcodes`
+
+Postcode centroids (aka 'calculated postcodes') are generated by looking at all
+postcodes of a country, grouping them and calculating the geometric centroid.
+There is currently no logic to deal with extreme outliers (typos or other
+mistakes in OSM data). There is also no check if a postcodes adheres to a
+country's format, e.g. if Swiss postcodes are 4 digits.
+
+When running regular updates, postcodes results can be improved by running
+this command on a regular basis. Note that only the postcode table and the
+postcode search terms are updated. The postcode that is assigned to each place
+is only updated when the place is updated.
+
+The command takes around 70min to run on the planet and needs ca. 40GB of
+temporary disk space.
+
+
+## Updating word counts
+
+Command: `nominatim refresh --word-counts`
+
+Nominatim keeps frequency statistics about all search terms it indexes. These
+statistics are currently used to optimise queries to the database. Thus better
+statistics mean better performance. Word counts are created once after import
+and are usually sufficient even when running regular updates. You might want
+to rerun the statistics computation when adding larger amounts of new data,
+for example, when adding an additional country via `nominatim add-data`.
+
+
+## Forcing recomputation of places and areas
+
+Command: `nominatim refresh --data-object [NWR]<id> --data-area [NWR]<id>`
+
+When running replication updates, Nominatim tries to recompute the search
+and address information for all places that are affected by a change. But it
+needs to restrict the total number of changes to make sure it can keep up
+with the minutely updates. Therefore it will refrain from propagating changes
+that affect a lot of objects.
+
+The administrator may force an update of places in the database.
+`nominatim refresh --data-object` invalidates a single OSM object.
+`nominatim refresh --data-area` invalidates an OSM object and all dependent
+objects. That are usually the places that inside its area or around the
+center of the object. Both commands expect the OSM object as an argument
+of the form OSM type + OSM id. The type must be `N` (node), `W` (way) or
+`R` (relation).
+
+After invalidating the object, indexing must be run again. If continuous
+update are running in the background, the objects will be recomputed together
+with the next round of updates. Otherwise you need to run `nominatim index`
+to finish the recomputation.
+
+
+## Removing large deleted objects
+
+Nominatim refuses to delete very large areas because often these deletions are
+accidental and are reverted within hours. Instead the deletions are logged in
+the `import_polygon_delete` table and left to the administrator to clean up.
+
+There is currently no command to do that. You can use the following SQL
+query to force a deletion on all objects that have been deleted more than
+a certain timespan ago (here: 1 month):
+
+```sql
+SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
+WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
+      and age(p.indexed_date) > '1 month'::interval
+```
--- a/docs/admin/Migration.md
+++ b/docs/admin/Migration.md
@@ -1,10 +1,211 @@
 # Database Migrations

-This page describes database migrations necessary to update existing databases
-to newer versions of Nominatim.
+Since version 3.7.0 Nominatim offers automatic migrations. Please follow
+the following steps:

-SQL statements should be executed from the PostgreSQL commandline. Execute
-`psql nominatim` to enter command line mode.
+* stop any updates that are potentially running
+* update Nominatim to the newer version
+* go to your project directory and run `nominatim admin --migrate`
+* (optionally) restart updates
+
+Below you find additional migrations and hints about other structural and
+breaking changes. **Please read them before running the migration.**
+
+!!! note
+    If you are migrating from a version <3.6, then you still have to follow
+    the manual migration steps up to 3.6.
+
+## 4.0.0 -> 4.1.0
+
+### ICU tokenizer is the new default
+
+Nominatim now installs the [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer)
+by default. This only has an effect on newly installed databases. When
+updating older databases, it keeps its installed tokenizer. If you still
+run with the legacy tokenizer, make sure to compile Nominatim with the
+PostgreSQL module, see [Installation](Installation.md#building-nominatim).
+
+### geocodejson output changed
+
+The `type` field of the geocodejson output has changed. It now contains
+the address class of the object instead of the value of the OSM tag. If
+your client has used the `type` field, switch them to read `osm_value`
+instead.
+
+## 3.7.0 -> 4.0.0
+
+### NOMINATIM_PHRASE_CONFIG removed
+
+Custom blacklist configurations for special phrases now need to be handed
+with the `--config` parameter to `nominatim special-phrases`. Alternatively
+you can put your custom configuration in the project directory in a file
+named `phrase-settings.json`.
+
+Version 3.8 also removes the automatic converter for the php format of
+the configuration in older versions. If you are updating from Nominatim < 3.7
+and still work with a custom `phrase-settings.php`, you need to manually
+convert it into a json format.
+
+### PHP utils removed
+
+The old PHP utils have now been removed completely. You need to switch to
+the appropriate functions of the nominatim  command line tool. See
+[Introducing `nominatim` command line tool](#introducing-nominatim-command-line-tool)
+below.
+
+## 3.6.0 -> 3.7.0
+
+### New format and name of configuration file
+
+The configuration for an import is now saved in a `.env` file in the project
+directory. This file follows the dotenv format. For more information, see
+the [installation chapter](Import.md#configuration-setup-in-env).
+
+To migrate to the new system, create a new project directory, add the `.env`
+file and port your custom configuration from `settings/local.php`. Most
+settings are named similar and only have received a `NOMINATIM_` prefix.
+Use the default settings in `settings/env.defaults` as a reference.
+
+### New location for data files
+
+External data files for Wikipedia importance, postcodes etc. are no longer
+expected to reside in the source tree by default. Instead they will be searched
+in the project directory. If you have an automated setup script you must
+either adapt the download location or explicitly set the location of the
+files to the old place in your `.env`.
+
+### Introducing `nominatim` command line tool
+
+The various php utilities have been replaced with a single `nominatim`
+command line tool. Make sure to adapt any scripts. There is no direct 1:1
+matching between the old utilities and the commands of nominatim CLI. The
+following list gives you a list of nominatim sub-commands that contain
+functionality of each script:
+
+* ./utils/setup.php: `import`, `freeze`, `refresh`
+* ./utils/update.php: `replication`, `add-data`, `index`, `refresh`
+* ./utils/specialphrases.php: `special-phrases`
+* ./utils/check_import_finished.php: `admin`
+* ./utils/warm.php: `admin`
+* ./utils/export.php: `export`
+
+Try `nominatim <command> --help` for more information about each subcommand.
+
+`./utils/query.php` no longer exists in its old form. `nominatim search`
+provides a replacement but returns different output.
+
+### Switch to normalized house numbers
+
+The housenumber column in the placex table uses now normalized version.
+The automatic migration step will convert the column but this may take a
+very long time. It is advisable to take the machine offline while doing that.
+
+## 3.5.0 -> 3.6.0
+
+### Change of layout of search_name_* tables
+
+The table need a different index for nearest place lookup. Recreate the
+indexes using the following shell script:
+
+```bash
+for table in `psql -d nominatim -c "SELECT tablename FROM pg_tables WHERE tablename LIKE 'search_name_%'" -tA | grep -v search_name_blank`;
+do
+    psql -d nominatim -c "DROP INDEX idx_${table}_centroid_place; CREATE INDEX idx_${table}_centroid_place ON ${table} USING gist (centroid) WHERE ((address_rank >= 2) AND (address_rank <= 25)); DROP INDEX idx_${table}_centroid_street; CREATE INDEX idx_${table}_centroid_street ON ${table} USING gist (centroid) WHERE ((address_rank >= 26) AND (address_rank <= 27))";
+done
+```
+
+### Removal of html output
+
+The debugging UI is no longer directly provided with Nominatim. Instead we
+now provide a simple Javascript application. Please refer to
+[Setting up the Nominatim UI](Setup-Nominatim-UI.md) for details on how to
+set up the UI.
+
+The icons served together with the API responses have been moved to the
+nominatim-ui project as well. If you want to keep the `icon` field in the
+response, you need to set `CONST_MapIcon_URL` to the URL of the `/mapicon`
+directory of nominatim-ui.
+
+### Change order during indexing
+
+When reindexing places during updates, there is now a different order used
+which needs a different database index. Create it with the following SQL command:
+
+```sql
+CREATE INDEX idx_placex_pendingsector_rank_address
+  ON placex
+  USING BTREE (rank_address, geometry_sector)
+  WHERE indexed_status > 0;
+```
+
+You can then drop the old index with:
+
+```sql
+DROP INDEX idx_placex_pendingsector;
+```
+
+### Unused index
+
+This index has been unused ever since the query using it was changed two years ago. Saves about 12GB on a planet installation.
+
+```sql
+DROP INDEX idx_placex_geometry_reverse_lookupPoint;
+```
+
+### Switching to dotenv
+
+As part of the work changing the configuration format, the configuration for
+the website is now using a separate configuration file. To create the
+configuration file, run the following command after updating:
+
+```sh
+./utils/setup.php --setup-website
+```
+
+### Update SQL code
+
+To update the SQL code to the leatest version run:
+
+```
+./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
+```
+
+## 3.4.0 -> 3.5.0
+
+### New Wikipedia/Wikidata importance tables
+
+The `wikipedia_*` tables have a new format that also includes references to
+Wikidata. You need to update the computation functions and the tables as
+follows:
+
+  * download the new Wikipedia tables as described in the import section
+  * reimport the tables: `./utils/setup.php --import-wikipedia-articles`
+  * update the functions: `./utils/setup.php --create-functions --enable-diff-updates`
+  * create a new lookup index:
+```sql
+CREATE INDEX idx_placex_wikidata
+  ON placex
+  USING BTREE ((extratags -> 'wikidata'))
+  WHERE extratags ? 'wikidata'
+    AND class = 'place'
+    AND osm_type = 'N'
+    AND rank_search < 26;
+```
+  * compute importance: `./utils/update.php --recompute-importance`
+
+The last step takes about 10 hours on the full planet.
+
+Remove one function (it will be recreated in the next step):
+
+```sql
+DROP FUNCTION create_country(hstore,character varying);
+```
+
+Finally, update all SQL functions:
+
+```sh
+./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
+```

 ## 3.3.0 -> 3.4.0

@@ -23,6 +224,12 @@ CREATE INDEX idx_location_area_country_geometry ON location_area_country USING G
 CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id);
 ```

+Finally, update all SQL functions:
+
+```sh
+./utils/setup.php --create-functions --enable-diff-updates --create-partition-functions
+```
+
 ## 3.2.0 -> 3.3.0

 ### New database connection string (DSN) format
@@ -39,7 +246,7 @@ The new format is

 ### Natural Earth country boundaries no longer needed as fallback

-```
+```sql
 DROP TABLE country_naturalearthdata;
 ```

@@ -65,27 +272,37 @@ following command:
 The reverse algorithm has changed and requires new indexes. Run the following
 SQL statements to create the indexes:

-```
+```sql
 CREATE INDEX idx_placex_geometry_reverse_lookupPoint
-  ON placex USING gist (geometry)
-  WHERE (name is not null or housenumber is not null or rank_address between 26 and 27)
-    AND class not in ('railway','tunnel','bridge','man_made')
-    AND rank_address >= 26 AND indexed_status = 0 AND linked_place_id is null;
+  ON placex
+  USING gist (geometry)
+  WHERE (name IS NOT null or housenumber IS NOT null or rank_address BETWEEN 26 AND 27)
+    AND class NOT IN ('railway','tunnel','bridge','man_made')
+    AND rank_address >= 26
+    AND indexed_status = 0
+    AND linked_place_id IS null;
 CREATE INDEX idx_placex_geometry_reverse_lookupPolygon
  ON placex USING gist (geometry)
  WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon')
-    AND rank_address between 4 and 25 AND type != 'postcode'
-    AND name is not null AND indexed_status = 0 AND linked_place_id is null;
+    AND rank_address between 4 and 25
+    AND type != 'postcode'
+    AND name is not null
+    AND indexed_status = 0
+    AND linked_place_id is null;
 CREATE INDEX idx_placex_geometry_reverse_placeNode
  ON placex USING gist (geometry)
-  WHERE osm_type = 'N' AND rank_search between 5 and 25
-    AND class = 'place' AND type != 'postcode'
-    AND name is not null AND indexed_status = 0 AND linked_place_id is null;
+  WHERE osm_type = 'N'
+    AND rank_search between 5 and 25
+    AND class = 'place'
+    AND type != 'postcode'
+    AND name is not null
+    AND indexed_status = 0
+    AND linked_place_id is null;
 ```

 You also need to grant the website user access to the `country_osm_grid` table:

-```
+```sql
 GRANT SELECT ON table country_osm_grid to "www-user";
 ```

@@ -93,7 +310,7 @@ Replace the `www-user` with the user name of your website server if necessary.

 You can now drop the unused indexes:

-```
+```sql
 DROP INDEX idx_placex_reverse_geometry;
 ```

@@ -122,8 +339,8 @@ CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry);
 CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id);
 CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode);
 GRANT SELECT ON location_postcode TO "www-data";
-drop type if exists nearfeaturecentr cascade;
-create type nearfeaturecentr as (
+DROP TYPE IF EXISTS nearfeaturecentr CASCADE;
+CREATE TYPE nearfeaturecentr AS (
  place_id BIGINT,
  keywords int[],
  rank_address smallint,
--- a/docs/admin/Setup-Nominatim-UI.md
+++ b/docs/admin/Setup-Nominatim-UI.md
@@ -0,0 +1,177 @@
+# Setting up the Nominatim UI
+
+Nominatim is a search API, it does not provide a website interface on its
+own. [nominatim-ui](https://github.com/osm-search/nominatim-ui) offers a
+small website for testing your setup and inspecting the database content.
+
+This section provides a quick start how to use nominatim-ui with your
+installation. For more details, please also have a look at the
+[README of nominatim-ui](https://github.com/osm-search/nominatim-ui/blob/master/README.md).
+
+## Installing nominatim-ui
+
+We provide regular releases of nominatim-ui that contain the packaged website.
+They do not need any special installation. Just download, configure
+and run it. Grab the latest release from
+[nominatim-ui's Github release page](https://github.com/osm-search/nominatim-ui/releases)
+and unpack it. You can use `nominatim-ui-x.x.x.tar.gz` or `nominatim-ui-x.x.x.zip`.
+
+Next you need to adapt the UI to your installation. Custom settings need to be
+put into `dist/theme/config.theme.js`. At a minimum you need to
+set `Nominatim_API_Endpoint` to point to your Nominatim installation:
+
+    cd nominatim-ui
+    echo "Nominatim_Config.Nominatim_API_Endpoint='https://myserver.org/nominatim/';" > dist/theme/config.theme.js
+
+For the full set of available settings, have a look at `dist/config.defaults.js`.
+
+Then you can just test it locally by spinning up a webserver in the `dist`
+directory. For example, with Python:
+
+    cd nominatim-ui/dist
+    python3 -m http.server 8765
+
+The website is now available at `http://localhost:8765`.
+
+## Forwarding searches to nominatim-ui
+
+Nominatim used to provide the search interface directly by itself when
+`format=html` was requested. For all endpoints except for `/reverse` and
+`/lookup` this even used to be the default.
+
+The following section describes how to set up Apache or nginx, so that your
+users are forwarded to nominatim-ui when they go to URL that formerly presented
+the UI.
+
+### Setting up forwarding in Nginx
+
+First of all make nominatim-ui available under `/ui` on your webserver:
+
+``` nginx
+server {
+
+    # Here is the Nominatim setup as described in the Installation section
+
+    location /ui/ {
+        alias <full path to the nominatim-ui directory>/dist/;
+        index index.html;
+    }
+}
+```
+
+Now we need to find out if a URL should be forwarded to the UI. Add the
+following `map` commands *outside* the server section:
+
+``` nginx
+# Inspect the format parameter in the query arguments. We are interested
+# if it is set to html or something else or if it is missing completely.
+map $args $format {
+    default                  default;
+    ~(^|&)format=html(&|$)   html;
+    ~(^|&)format=            other;
+}
+
+# Determine from the URI and the format parameter above if forwarding is needed.
+map $uri/$format $forward_to_ui {
+    default               1;   # The default is to forward.
+    ~^/ui                 0;   # If the URI point to the UI already, we are done.
+    ~/other$              0;   # An explicit non-html format parameter. No forwarding.
+    ~/reverse.*/default   0;   # Reverse and lookup assume xml format when
+    ~/lookup.*/default    0;   #   no format parameter is given. No forwarding.
+}
+```
+
+The `$forward_to_ui` parameter can now be used to conditionally forward the
+calls:
+
+```
+# When no endpoint is given, default to search.
+# Need to add a rewrite so that the rewrite rules below catch it correctly.
+rewrite ^/$ /search;
+
+location @php {
+    # fastcgi stuff..
+    if ($forward_to_ui) {
+        rewrite ^(/[^/]*) https://yourserver.com/ui$1.html redirect;
+    }
+}
+
+location ~ [^/]\.php(/|$) {
+    # fastcgi stuff..
+    if ($forward_to_ui) {
+        rewrite (.*).php https://yourserver.com/ui$1.html redirect;
+    }
+}
+```
+
+!!! warning
+    Be aware that the rewrite commands are slightly different for URIs with and
+    without the .php suffix.
+
+Reload nginx and the UI should be available.
+
+### Setting up forwarding in Apache
+
+First of all make nominatim-ui available in the `ui/` subdirectory where
+Nominatim is installed. For example, given you have set up an alias under
+`nominatim` like this:
+
+``` apache
+Alias /nominatim /home/vagrant/build/website
+```
+
+you need to insert the following rules for nominatim-ui before that alias:
+
+```
+<Directory "/home/vagrant/nominatim-ui/dist">
+  DirectoryIndex search.html
+  Require all granted
+</Directory>
+
+Alias /nominatim/ui /home/vagrant/nominatim-ui/dist
+```
+
+Replace `/home/vagrant/nominatim-ui` with the directory where you have cloned
+nominatim-ui.
+
+!!! important
+    The alias for nominatim-ui must come before the alias for the Nominatim
+    website directory.
+
+To set up forwarding, the Apache rewrite module is needed. Enable it with:
+
+``` sh
+sudo a2enmod rewrite
+```
+
+Then add rewrite rules to the `Directory` directive of the Nominatim website
+directory like this:
+
+``` apache
+<Directory "/home/vagrant/build/website">
+  Options FollowSymLinks MultiViews
+  AddType text/html   .php
+  Require all granted
+
+  RewriteEngine On
+
+  # This must correspond to the URL where nominatim can be found.
+  RewriteBase "/nominatim/"
+
+  # If no endpoint is given, then use search.
+  RewriteRule ^(/|$)   "search.php"
+
+  # If format-html is explicitly requested, forward to the UI.
+  RewriteCond %{QUERY_STRING} "format=html"
+  RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
+
+  # If no format parameter is there then forward anything
+  # but /reverse and /lookup to the UI.
+  RewriteCond %{QUERY_STRING} "!format="
+  RewriteCond %{REQUEST_URI}  "!/lookup"
+  RewriteCond %{REQUEST_URI}  "!/reverse"
+  RewriteRule ^([^/]+)(.php)? ui/$1.html [R,END]
+</Directory>
+```
+
+Restart Apache and the UI should be available.
--- a/docs/admin/Update.md
+++ b/docs/admin/Update.md
@@ -0,0 +1,223 @@
+# Updating the Database
+
+There are many different ways to update your Nominatim database.
+The following section describes how to keep it up-to-date using
+an [online replication service for OpenStreetMap data](https://wiki.openstreetmap.org/wiki/Planet.osm/diffs)
+For a list of other methods to add or update data see the output of
+`nominatim add-data --help`.
+
+!!! important
+    If you have configured a flatnode file for the import, then you
+    need to keep this flatnode file around for updates.
+
+### Installing the newest version of Pyosmium
+
+The replication process uses
+[Pyosmium](https://docs.osmcode.org/pyosmium/latest/updating_osm_data.html)
+to download update data from the server.
+It is recommended to install Pyosmium via pip.
+Run (as the same user who will later run the updates):
+
+```sh
+pip3 install --user osmium
+```
+
+### Setting up the update process
+
+Next the update process needs to be initialised. By default Nominatim is configured
+to update using the global minutely diffs.
+
+If you want a different update source you will need to add some settings
+to `.env`. For example, to use the daily country extracts
+diffs for Ireland from Geofabrik add the following:
+
+    # base URL of the replication service
+    NOMINATIM_REPLICATION_URL="https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates"
+    # How often upstream publishes diffs (in seconds)
+    NOMINATIM_REPLICATION_UPDATE_INTERVAL=86400
+    # How long to sleep if no update found yet (in seconds)
+    NOMINATIM_REPLICATION_RECHECK_INTERVAL=900
+
+To set up the update process now run the following command:
+
+    nominatim replication --init
+
+It outputs the date where updates will start. Recheck that this date is
+what you expect.
+
+The `replication --init` command needs to be rerun whenever the replication
+service is changed.
+
+### Updating Nominatim
+
+Nominatim supports different modes how to retrieve the update data from the
+server. Which one you want to use depends on your exact setup and how often you
+want to retrieve updates.
+
+These instructions are for using a single source of updates. If you have
+imported multiple country extracts and want to keep them
+up-to-date, [Advanced installations section](Advanced-Installations.md)
+contains instructions to set up and update multiple country extracts.
+
+#### Continuous updates
+
+This is the easiest mode. Simply run the replication command without any
+parameters:
+
+    nominatim replication
+
+The update application keeps running forever and retrieves and applies
+new updates from the server as they are published.
+
+You can run this command as a simple systemd service. Create a service
+description like that in `/etc/systemd/system/nominatim-updates.service`:
+
+```
+[Unit]
+Description=Continuous updates of Nominatim
+
+[Service]
+WorkingDirectory=/srv/nominatim
+ExecStart=nominatim replication
+StandardOutput=append:/var/log/nominatim-updates.log
+StandardError=append:/var/log/nominatim-updates.error.log
+User=nominatim
+Group=nominatim
+Type=simple
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Replace the `WorkingDirectory` with your project directory. Also adapt user
+and group names as required.
+
+Now activate the service and start the updates:
+
+```
+sudo systemctl daemon-reload
+sudo systemctl enable nominatim-updates
+sudo systemctl start nominatim-updates
+```
+
+#### One-time mode
+
+When the `--once` parameter is given, then Nominatim will download exactly one
+batch of updates and then exit. This one-time mode still respects the
+`NOMINATIM_REPLICATION_UPDATE_INTERVAL` that you have set. If according to
+the update interval no new data has been published yet, it will go to sleep
+until the next expected update and only then attempt to download the next batch.
+
+The one-time mode is particularly useful if you want to run updates continuously
+but need to schedule other work in between updates. For example, the main
+service at osm.org uses it, to regularly recompute postcodes -- a process that
+must not be run while updates are in progress. Its update script
+looks like this:
+
+```sh
+#!/bin/bash
+
+# Switch to your project directory.
+cd /srv/nominatim
+
+while true; do
+  nominatim replication --once
+  if [ -f "/srv/nominatim/schedule-maintenance" ]; then
+    rm /srv/nominatim/schedule-maintenance
+    nominatim refresh --postcodes
+  fi
+done
+```
+
+A cron job then creates the file `/srv/nominatim/schedule-maintenance` once per night.
+
+##### One-time mode with systemd
+
+You can run the one-time mode with a systemd timer & service.
+
+Create a timer description like `/etc/systemd/system/nominatim-updates.timer`:
+
+```
+[Unit]
+Description=Timer to start updates of Nominatim
+
+[Timer]
+OnActiveSec=2
+OnUnitActiveSec=1min
+Unit=nominatim-updates.service
+
+[Install]
+WantedBy=multi-user.target
+```
+
+And then a similar service definition: `/etc/systemd/system/nominatim-updates.service`:
+
+```
+[Unit]
+Description=Single updates of Nominatim
+
+[Service]
+WorkingDirectory=/srv/nominatim
+ExecStart=nominatim replication --once
+StandardOutput=append:/var/log/nominatim-updates.log
+StandardError=append:/var/log/nominatim-updates.error.log
+User=nominatim
+Group=nominatim
+Type=simple
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Replace the `WorkingDirectory` with your project directory. Also adapt user and
+group names as required. `OnUnitActiveSec` defines how often the individual
+update command is run.
+
+Now activate the service and start the updates:
+
+```
+sudo systemctl daemon-reload
+sudo systemctl enable nominatim-updates.timer
+sudo systemctl start nominatim-updates.timer
+```
+
+You can stop future data updates, while allowing any current, in-progress
+update steps to finish, by running `sudo systemctl stop
+nominatim-updates.timer` and waiting until `nominatim-updates.service` isn't
+running (`sudo systemctl is-active nominatim-updates.service`). Current output
+from the update can be seen like above (`systemctl status
+nominatim-updates.service`).
+
+
+#### Catch-up mode
+
+With the `--catch-up` parameter, Nominatim will immediately try to download
+all changes from the server until the database is up-to-date. The catch-up mode
+still respects the parameter `NOMINATIM_REPLICATION_MAX_DIFF`. It downloads and
+applies the changes in appropriate batches until all is done.
+
+The catch-up mode is foremost useful to bring the database up to speed after the
+initial import. Give that the service usually is not in production at this
+point, you can temporarily be a bit more generous with the batch size and
+number of threads you use for the updates by running catch-up like this:
+
+```
+cd /srv/nominatim
+NOMINATIM_REPLICATION_MAX_DIFF=5000 nominatim replication --catch-up --threads 15
+```
+
+The catch-up mode is also useful when you want to apply updates at a lower
+frequency than what the source publishes. You can set up a cron job to run
+replication catch-up at whatever interval you desire.
+
+!!! hint
+    When running scheduled updates with catch-up, it is a good idea to choose
+    a replication source with an update frequency that is an order of magnitude
+    lower. For example, if you want to update once a day, use an hourly updated
+    source. This makes sure that you don't miss an entire day of updates when
+    the source is unexpectedly late to publish its update.
+
+    If you want to use the source with the same update frequency (e.g. a daily
+    updated source with daily updates), use the
+    continuous update mode. It ensures to re-request the newest update until it
+    is published.
--- a/docs/api/Details.md
+++ b/docs/api/Details.md
@@ -1,19 +1,22 @@
 # Place details

-Lookup details about a single place by id. The default output is HTML for debugging search logic and results.
+Show all details about a single place saved in the database.

-**The details page (including JSON output) exists for debugging only and must not be downloaded automatically**, see [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
+!!! warning
+    The details page exists for debugging only. You may not use it in scripts
+    or to automatically query details about a result.
+    See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).


 ## Parameters

 The details API supports the following two request formats:

-```
-  https://nominatim.openstreetmap.org/details?osmtype=[N|W|R]&osmid=<value>&class=<value>
+``` xml
+https://nominatim.openstreetmap.org/details?osmtype=[N|W|R]&osmid=<value>&class=<value>
 ```

-`osmtype` and `osmid` are required parameter. The type is one of node (N), way (W)
+`osmtype` and `osmid` are required parameters. The type is one of node (N), way (W)
 or relation (R). The id must be a number. The `class` parameter is optional and
 allows to distinguish between entries, when the corresponding OSM object has more
 than one main tag. For example, when a place is tagged with `tourism=hotel` and
@@ -23,36 +26,34 @@ to get exactly the one you want. If there are multiple places in the database
 but the `class` parameter is left out, then one of the places will be chosen
 at random and displayed.

-```
-  https://nominatim.openstreetmap.org/details?place_id=<value>
+``` xml
+https://nominatim.openstreetmap.org/details?place_id=<value>
 ```

-Placeids are assigned sequentially during Nominatim data import. The id for a place is different between Nominatim installation (servers) and changes when data gets reimported. Therefore it can't be used as permanent id and shouldn't be used in bug reports.
+Place IDs are assigned sequentially during Nominatim data import. The ID
+for a place is different between Nominatim installation (servers) and
+changes when data gets reimported. Therefore it cannot be used as
+a permanent id and shouldn't be used in bug reports.


 Additional optional parameters are explained below.

 ### Output format

-* `format=[html|json]`
-
-See [Place Output Formats](Output.md) for details on each format. (Default: html)
-
 * `json_callback=<string>`

 Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
-Only has an effect for JSON output formats.

 * `pretty=[0|1]`

-For JSON output will add indentation to make it more human-readable. (Default: 0)
+Add indentation to make it more human-readable. (Default: 0)


 ### Output details

 * `addressdetails=[0|1]`

-Include a breakdown of the address into elements. (Default for JSON: 0, for HTML: 1)
+Include a breakdown of the address into elements. (Default: 0)

 * `keywords=[0|1]`

@@ -60,11 +61,16 @@ Include a list of name keywords and address keywords (word ids). (Default: 0)

 * `linkedplaces=[0|1]`

-Include details of places higher in the address hierarchy. E.g. for a street this is usually the city, state, postal code, country. (Default: 1)
+Include a details of places that are linked with this one. Places get linked
+together when they are different forms of the same physical object. Nominatim
+links two kinds of objects together: place nodes get linked with the
+corresponding administrative boundaries. Waterway relations get linked together with their
+members.
+(Default: 1)

 * `hierarchy=[0|1]`

-Include details of places lower in the address hierarchy. E.g. for a city this usually a list of streets, suburbs, rivers. (Default for JSON: 0, for HTML: 1)
+Include details of places lower in the address hierarchy. (Default: 0)

 * `group_hierarchy=[0|1]`

@@ -72,7 +78,7 @@ For JSON output will group the places by type. (Default: 0)

 * `polygon_geojson=[0|1]`

-Include geometry of result. (Default for JSON: 0, for HTML: 1)
+Include geometry of result. (Default: 0)

 ### Language of results

@@ -86,10 +92,6 @@ comma-separated list of language codes.

 ## Examples

-##### HTML
-
-[https://nominatim.openstreetmap.org/details.php?osmtype=W&osmid=38210407](https://nominatim.openstreetmap.org/details.php?osmtype=W&osmid=38210407)
-
 ##### JSON

 [https://nominatim.openstreetmap.org/details.php?osmtype=W&osmid=38210407&format=json](https://nominatim.openstreetmap.org/details.php?osmtype=W&osmid=38210407&format=json)
--- a/docs/api/Faq.md
+++ b/docs/api/Faq.md
@@ -35,7 +35,7 @@ it contains the county/state/country across the border.
 #### 3. I get different counties/states/countries when I change the zoom parameter in the reverse query. How is that possible?

 This is basically the same problem as in the previous answer.
-The zoom level influences at which [search rank](https://wiki.openstreetmap.org/wiki/Nominatim/Development_overview#Country_to_street_level) Nominatim starts looking
+The zoom level influences at which [search rank](../customize/Ranking.md#search-rank) Nominatim starts looking
 for the closest object. So the closest house number maybe on one side of the
 border while the closest street is on the other. As the address details contain
 the address of the closest object found, you might sometimes get one result,
@@ -58,4 +58,4 @@ The [Overpass API](https://wiki.openstreetmap.org/wiki/Overpass_API) is more
 suited for these kinds of queries.

 That said if you installed your own Nominatim instance you can use the
-`/utils/export.php` PHP script as basis to return such lists.
+`nominatim export` PHP script as basis to return such lists.
--- a/docs/api/Lookup.md
+++ b/docs/api/Lookup.md
@@ -56,6 +56,21 @@ specified in the "Accept-Language" HTTP header.
 Either use a standard RFC2616 accept-language string or a simple
 comma-separated list of language codes.

+### Polygon output
+
+* `polygon_geojson=1`
+* `polygon_kml=1`
+* `polygon_svg=1`
+* `polygon_text=1`
+
+Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
+options can be used at a time. (Default: 0)
+
+* `polygon_threshold=0.0`
+
+Return a simplified version of the output geometry. The parameter is the
+tolerance in degrees with which the geometry may differ from the original
+geometry. Topology is preserved in the result. (Default: 0.0)

 ### Other

@@ -75,11 +90,11 @@ This overrides the specified machine readable format. (Default: 0)

 ##### XML

-[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189)
+[https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W104393803,N240109189](https://nominatim.openstreetmap.org/lookup?osm_ids=R146656,W50637691,N240109189)

 ```xml
-  <lookupresults timestamp="Mon, 29 Jun 15 18:01:33 +0000" attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright" querystring="R146656,W104393803,N240109189" polygon="false">
-    <place place_id="127761056" osm_type="relation" osm_id="146656" place_rank="16" lat="53.4791466" lon="-2.2447445" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.704893333438333">
+  <lookupresults timestamp="Mon, 28 Mar 22 14:38:54 +0000" attribution="Data &#xA9; OpenStreetMap contributors, ODbL 1.0. http://www.openstreetmap.org/copyright" querystring="R146656,W50637691,N240109189" more_url="">
+    <place place_id="282236157" osm_type="relation" osm_id="146656" place_rank="16" address_rank="16" boundingbox="53.3401044,53.5445923,-2.3199185,-2.1468288" lat="53.44246175" lon="-2.2324547359718547" display_name="Manchester, Greater Manchester, North West England, England, United Kingdom" class="boundary" type="administrative" importance="0.35">
      <city>Manchester</city>
      <county>Greater Manchester</county>
      <state_district>North West England</state_district>
@@ -87,21 +102,20 @@ This overrides the specified machine readable format. (Default: 0)
      <country>United Kingdom</country>
      <country_code>gb</country_code>
    </place>
-    <place place_id="77769745" osm_type="way" osm_id="104393803" place_rank="30" lat="52.5162024" lon="13.3777343363579" display_name="Brandenburg Gate, 1, Pariser Platz, Mitte, Berlin, 10117, Germany" class="tourism" type="attraction" importance="0.443472858361592">
-      <attraction>Brandenburg Gate</attraction>
-      <house_number>1</house_number>
-      <pedestrian>Pariser Platz</pedestrian>
-      <suburb>Mitte</suburb>
-      <city_district>Mitte</city_district>
-      <city>Berlin</city>
-      <state>Berlin</state>
-      <postcode>10117</postcode>
+    <place place_id="115462561" osm_type="way" osm_id="50637691" place_rank="30" address_rank="30" boundingbox="52.3994612,52.3996426,13.0479574,13.0481754" lat="52.399550700000006" lon="13.048066846939687" display_name="Brandenburger Tor, Brandenburger Stra&#xDF;e, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany" class="tourism" type="attraction" importance="0.29402874005524">
+      <tourism>Brandenburger Tor</tourism>
+      <road>Brandenburger Stra&#xDF;e</road>
+      <suburb>Historische Innenstadt</suburb>
+      <city>Potsdam</city>
+      <state>Brandenburg</state>
+      <postcode>14467</postcode>
      <country>Germany</country>
      <country_code>de</country_code>
    </place>
-    <place place_id="2570600569" osm_type="node" osm_id="240109189" place_rank="15" lat="52.5170365" lon="13.3888599" display_name="Berlin, Germany" class="place" type="city" importance="0.822149797630868">
+    <place place_id="567505" osm_type="node" osm_id="240109189" place_rank="15" address_rank="16" boundingbox="52.3586925,52.6786925,13.2396024,13.5596024" lat="52.5186925" lon="13.3996024" display_name="Berlin, 10178, Germany" class="place" type="city" importance="0.78753902824914">
      <city>Berlin</city>
      <state>Berlin</state>
+      <postcode>10178</postcode>
      <country>Germany</country>
      <country_code>de</country_code>
    </place>
@@ -110,38 +124,50 @@ This overrides the specified machine readable format. (Default: 0)

 ##### JSON with extratags

-[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json)
+[https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1](https://nominatim.openstreetmap.org/lookup?osm_ids=W50637691&format=json&extratags=1)

 ```json
 [
-  {
-    "place_id": "84271358",
-    "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
-    "osm_type": "way",
-    "osm_id": "50637691",
-    "lat": "52.39955055",
-    "lon": "13.04806574678",
-    "display_name": "Brandenburger Tor, Brandenburger Straße, Nördliche Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
-    "class": "historic",
-    "type": "city_gate",
-    "importance": "0.221233780277011",
-    "address": {
-      "address29": "Brandenburger Tor",
-      "pedestrian": "Brandenburger Straße",
-      "suburb": "Nördliche Innenstadt",
-      "city": "Potsdam",
-      "state": "Brandenburg",
-      "postcode": "14467",
-      "country": "Germany",
-      "country_code": "de"
-    },
-    "extratags": {
-      "image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
-      "wikidata": "Q695045",
-      "wikipedia": "de:Brandenburger Tor (Potsdam)",
-      "wheelchair": "yes",
-      "description": "Kleines Brandenburger Tor in Potsdam"
-    }
-  }
+   {
+      "place_id": 115462561,
+      "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
+      "osm_type": "way",
+      "osm_id": 50637691,
+      "boundingbox": [
+        "52.3994612",
+        "52.3996426",
+        "13.0479574",
+        "13.0481754"
+      ],
+      "lat": "52.399550700000006",
+      "lon": "13.048066846939687",
+      "display_name": "Brandenburger Tor, Brandenburger Straße, Historische Innenstadt, Innenstadt, Potsdam, Brandenburg, 14467, Germany",
+      "class": "tourism",
+      "type": "attraction",
+      "importance": 0.2940287400552381,
+      "address": {
+        "tourism": "Brandenburger Tor",
+        "road": "Brandenburger Straße",
+        "suburb": "Historische Innenstadt",
+        "city": "Potsdam",
+        "state": "Brandenburg",
+        "postcode": "14467",
+        "country": "Germany",
+        "country_code": "de"
+      },
+      "extratags": {
+        "image": "http://commons.wikimedia.org/wiki/File:Potsdam_brandenburger_tor.jpg",
+        "heritage": "4",
+        "wikidata": "Q695045",
+        "architect": "Carl von Gontard;Georg Christian Unger",
+        "wikipedia": "de:Brandenburger Tor (Potsdam)",
+        "wheelchair": "yes",
+        "description": "Kleines Brandenburger Tor in Potsdam",
+        "heritage:website": "http://www.bldam-brandenburg.de/images/stories/PDF/DML%202012/04-p-internet-13.pdf",
+        "heritage:operator": "bldam",
+        "architect:wikidata": "Q68768;Q95223",
+        "year_of_construction": "1771"
+      }
+   }
 ]
 ```
--- a/docs/api/Output.md
+++ b/docs/api/Output.md
@@ -2,12 +2,10 @@

 The [/reverse](Reverse.md), [/search](Search.md) and [/lookup](Lookup.md)
 API calls produce very similar output which is explained in this section.
-There is one section for each format which is selectable via the `format`
-parameter.
+There is one section for each format. The format correspond to what was
+selected via the `format` parameter.

-## Formats
-
-### JSON
+## JSON

 The JSON format returns an array of places (for search and lookup) or
 a single place (for reverse) of the following format:
@@ -30,6 +28,7 @@ a single place (for reverse) of the following format:
      "city": "London",
      "state_district": "Greater London",
      "state": "England",
+      "ISO3166-2-lvl4": "GB-ENG",
      "postcode": "SW1A 2DU",
      "country": "United Kingdom",
      "country_code": "gb"
@@ -41,48 +40,50 @@ a single place (for reverse) of the following format:
      "wikipedia": "en:London",
      "population": "8416535"
    }
-  },
+  }
 ```

 The possible fields are:

- * `place_id` - reference to the Nominatim internal database ID (see notes below)
- * `osm_type`, `osm_id` - reference to the OSM object
- * `boundingbox` - area of corner coordinates
+ * `place_id` - reference to the Nominatim internal database ID ([see notes](#place_id-is-not-a-persistent-id))
+ * `osm_type`, `osm_id` - reference to the OSM object ([see notes](#osm-reference))
+ * `boundingbox` - area of corner coordinates ([see notes](#boundingbox))
 * `lat`, `lon` - latitude and longitude of the centroid of the object
 * `display_name` - full comma-separated address
 * `class`, `type` - key and value of the main OSM tag
 * `importance` - computed importance rank
 * `icon` - link to class icon (if available)
- * `address` - dictionary of address details (only with `addressdetails=1`)
+ * `address` - dictionary of address details (only with `addressdetails=1`,
+   [see notes](#addressdetails))
 * `extratags` - dictionary with additional useful tags like website or maxspeed
   (only with `extratags=1`)
 * `namedetails` - dictionary with full list of available names including ref etc.
 * `geojson`, `svg`, `geotext`, `geokml` - full geometry
   (only with the appropriate `polygon_*` parameter)

-### JSONv2
+## JSONv2

 This is the same as the JSON format with two changes:

 * `class` renamed to `category`
 * additional field `place_rank` with the search rank of the object

-### GeoJSON
+## GeoJSON

 This format follows the [RFC7946](https://geojson.org). Every feature includes
 a bounding box (`bbox`).

-The feature list has the following fields:
+The properties object has the following fields:

- * `place_id` - reference to the Nominatim internal database ID (see notes below)
- * `osm_type`, `osm_id` - reference to the OSM object
+ * `place_id` - reference to the Nominatim internal database ID ([see notes](#place_id-is-not-a-persistent-id))
+ * `osm_type`, `osm_id` - reference to the OSM object ([see notes](#osm-reference))
 * `category`, `type` - key and value of the main OSM tag
 * `display_name` - full comma-separated address
 * `place_rank` - class search rank
 * `importance` - computed importance rank
 * `icon` - link to class icon (if available)
- * `address` - dictionary of address details (only with `addressdetails=1`)
+ * `address` - dictionary of address details (only with `addressdetails=1`,
+   [see notes](#addressdetails))
 * `extratags` - dictionary with additional useful tags like `website` or `maxspeed`
   (only with `extratags=1`)
 * `namedetails` - dictionary with full list of available names including ref etc.
@@ -90,45 +91,46 @@ The feature list has the following fields:
 Use `polygon_geojson` to output the full geometry of the object instead
 of the centroid.

-### GeocodeJSON
+## GeocodeJSON

 The GeocodeJSON format follows the
 [GeocodeJSON spec 0.1.0](https://github.com/geocoders/geocodejson-spec).
 The following feature attributes are implemented:

- * `osm_type`, `osm_id` - reference to the OSM object (unofficial extension)
- * `type` - value of the main tag of the object (e.g. residential, restaurant, ...)
+ * `osm_type`, `osm_id` - reference to the OSM object (unofficial extension, [see notes](#osm-reference))
+ * `type` - the 'address level' of the object ('house', 'street', `district`, `city`,
+            `county`, `state`, `country`, `locality`)
+ * `osm_key`- key of the main tag of the OSM object (e.g. boundary, highway, amenity)
+ * `osm_value` - value of the main tag of the OSM object (e.g. residential, restaurant)
 * `label` - full comma-separated address
 * `name` - localised name of the place
- * `housenumber`, `street`, `locality`, `postcode`, `city`,
-   `district`, `county`, `state`, `country` -
+ * `housenumber`, `street`, `locality`, `district`, `postcode`, `city`,
+   `county`, `state`, `country` -
   provided when it can be determined from the address
-   (see [this issue](https://github.com/openstreetmap/Nominatim/issues/1080) for
-   current limitations on the correctness of the address) and `addressdetails=1`
-   was given
 * `admin` - list of localised names of administrative boundaries (only with `addressdetails=1`)

 Use `polygon_geojson` to output the full geometry of the object instead
 of the centroid.

-### XML
+## XML

 The XML response returns one or more place objects in slightly different
 formats depending on the API call.

-#### Reverse
+### Reverse

 ```
 <reversegeocode timestamp="Sat, 11 Aug 18 11:53:21 +0000"
                attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright"
                querystring="lat=48.400381&lon=11.745876&zoom=5&format=xml">
-  <result place_id="179509537" osm_type="relation" osm_id="2145268" ref="BY"
+  <result place_id="179509537" osm_type="relation" osm_id="2145268" ref="BY" place_rank="15" address_rank="15"
          lat="48.9467562" lon="11.4038717"
          boundingbox="47.2701114,50.5647142,8.9763497,13.8396373">
       Bavaria, Germany
  </result>
  <addressparts>
     <state>Bavaria</state>
+     <ISO3166-2-lvl4>DE-BY</ISO3166-2-lvl4>
     <country>Germany</country>
     <country_code>de</country_code>
  </addressparts>
@@ -148,11 +150,11 @@ attribution to OSM and the original querystring.

 The place information can be found in the `result` element. The attributes of that element contain:

- * `place_id` - reference to the Nominatim internal database ID (see notes below)
- * `osm_type`, `osm_id` - reference to the OSM object
+ * `place_id` - reference to the Nominatim internal database ID ([see notes](#place_id-is-not-a-persistent-id))
+ * `osm_type`, `osm_id` - reference to the OSM object ([see notes](#osm-reference))
 * `ref` - content of `ref` tag if it exists
 * `lat`, `lon` - latitude and longitude of the centroid of the object
- * `boundingbox` - comma-separated list of corner coordinates
+ * `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))

 The full address of the result can be found in the content of the
 `result` element as a comma-separated list.
@@ -160,14 +162,14 @@ The full address of the result can be found in the content of the
 Additional information requested with `addressdetails=1`, `extratags=1` and
 `namedetails=1` can be found in extra elements.

-#### Search and Lookup
+### Search and Lookup

 ```
 <searchresults timestamp="Sat, 11 Aug 18 11:55:35 +0000"
               attribution="Data © OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright"
               querystring="london" polygon="false" exclude_place_ids="100149"
               more_url="https://nominatim.openstreetmap.org/search.php?q=london&addressdetails=1&extratags=1&exclude_place_ids=100149&format=xml&accept-language=en-US%2Cen%3Bq%3D0.7%2Cde%3Bq%3D0.3">
-  <place place_id="100149" osm_type="node" osm_id="107775" place_rank="15"
+  <place place_id="100149" osm_type="node" osm_id="107775" place_rank="15" address_rank="15"
         boundingbox="51.3473219,51.6673219,-0.2876474,0.0323526" lat="51.5073219" lon="-0.1276474"
         display_name="London, Greater London, England, SW1A 2DU, United Kingdom"
         class="place" type="city" importance="0.9654895765402"
@@ -182,6 +184,7 @@ Additional information requested with `addressdetails=1`, `extratags=1` and
    <city>London</city>
    <state_district>Greater London</state_district>
    <state>England</state>
+    <ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
    <postcode>SW1A 2DU</postcode>
    <country>United Kingdom</country>
    <country_code>gb</country_code>
@@ -203,12 +206,13 @@ generic information about the query:
 The place information can be found in the `place` elements, of which there may
 be more than one. The attributes of that element contain:

- * `place_id` - reference to the Nominatim internal database ID (see notes below)
- * `osm_type`, `osm_id` - reference to the OSM object
+ * `place_id` - reference to the Nominatim internal database ID ([see notes](#place_id-is-not-a-persistent-id))
+ * `osm_type`, `osm_id` - reference to the OSM object ([see notes](#osm-reference))
 * `ref` - content of `ref` tag if it exists
 * `lat`, `lon` - latitude and longitude of the centroid of the object
- * `boundingbox` - comma-separated list of corner coordinates
- * `place_rank` - class search rank
+ * `boundingbox` - comma-separated list of corner coordinates ([see notes](#boundingbox))
+ * `place_rank` - class [search rank](../customize/Ranking#search-rank)
+ * `address_rank` - place [address rank](../customize/Ranking#address-rank)
 * `display_name` - full comma-separated address
 * `class`, `type` - key and value of the main OSM tag
 * `importance` - computed importance rank
@@ -218,29 +222,81 @@ When `addressdetails=1` is requested, the localised address parts appear
 as subelements with the type of the address part.

 Additional information requested with `extratags=1` and `namedetails=1` can
-be found in extra elements as sub-element of each place.
+be found in extra elements as sub-element of `extratags` and `namedetails`
+respectively.


 ## Notes on field values

 ### place_id is not a persistent id

-The `place_id` is created when a Nominatim database gets installed. A
-single place will have a different value on another server or even when
-the same data gets re-imported. It's thus not useful to treat it as
-permanent for later use.
+The `place_id` is an internal identifier that is assigned data is imported
+into a Nominatim database. The same OSM object will have a different value
+on another server. It may even change its ID on the same server when it is
+removed and reimported while updating the database with fresh OSM data.
+It is thus not useful to treat it as permanent for later use.

-The combination `osm_type`+`osm_id` is slighly better but remember in
+The combination `osm_type`+`osm_id` is slightly better but remember in
 OpenStreetMap mappers can delete, split, recreate places (and those
 get a new `osm_id`), there is no link between those old and new ids.
 Places can also change their meaning without changing their `osm_id`,
 e.g. when a restaurant is retagged as supermarket. For a more in-depth
 discussion see [Permanent ID](https://wiki.openstreetmap.org/wiki/Permanent_ID).

-Nominatim merges some places (e.g. center node of a city with the boundary
-relation) so `osm_type`+`osm_id`+`class_name` would be more unique.
+If you need an ID that is consistent over multiple installations of Nominatim,
+then you should use the combination of `osm_type`+`osm_id`+`class`.
+
+### OSM reference
+
+Nominatim may sometimes return special objects that do not correspond directly
+to an object in OpenStreetMap. These are:
+
+* **Postcodes**. Nominatim returns an postcode point created from all mapped
+  postcodes of the same name. The class and type of these object is `place=postcdode`.
+  No `osm_type` and `osm_id` are included in the result.
+* **Housenumber interpolations**. Nominatim returns a single interpolated
+  housenumber from the interpolation way. The class and type are `place=house`
+  and `osm_type` and `osm_id` correspond to the interpolation way in OSM.
+* **TIGER housenumber.** Nominatim returns a single interpolated housenumber
+  from the TIGER data. The class and type are `place=house`
+  and `osm_type` and `osm_id` correspond to the street mentioned in the result.
+
+Please note that the `osm_type` and `osm_id` returned may be changed in the
+future. You should not expect to only find `node`, `way` and `relation` for
+the type.

 ### boundingbox

 Comma separated list of min latitude, max latitude, min longitude, max longitude.
 The whole planet would be `-90,90,-180,180`.
+
+Can be used to pan and center the map on the result, for example with leafletjs
+mapping library
+`map.fitBounds([[bbox[0],bbox[2]],[bbox[1],bbox[3]]], {padding: [20, 20], maxzoom: 16});`
+
+Bounds crossing the antimeridian have a min latitude -180 and max latitude 180,
+essentially covering the entire planet
+(see [issue 184](https://github.com/openstreetmap/Nominatim/issues/184)).
+
+### addressdetails
+
+Address details in the xml and json formats return a list of names together
+with a designation label. Per default the following labels may appear:
+
+ * continent
+ * country, country_code
+ * region, state, state_district, county, ISO3166-2-lvl<admin_level>
+ * municipality, city, town, village
+ * city_district, district, borough, suburb, subdivision
+ * hamlet, croft, isolated_dwelling
+ * neighbourhood, allotments, quarter
+ * city_block, residential, farm, farmyard, industrial, commercial, retail
+ * road
+ * house_number, house_name
+ * emergency, historic, military, natural, landuse, place, railway,
+   man_made, aerialway, boundary, amenity, aeroway, club, craft, leisure,
+   office, mountain_pass, shop, tourism, bridge, tunnel, waterway
+ * postcode
+
+They roughly correspond to the classification of the OpenStreetMap data
+according to either the `place` tag or the main key of the object.
--- a/docs/api/Overview.md
+++ b/docs/api/Overview.md
@@ -7,7 +7,7 @@ Its API has the following endpoints for querying the data:
 * __[/search](Search.md)__ - search OSM objects by name or type
 * __[/reverse](Reverse.md)__ - search OSM object by their location
 * __[/lookup](Lookup.md)__ - look up address details for OSM objects by their ID
- * __/status__ - query the status of the server
+ * __[/status](Status.md)__ - query the status of the server
 * __/deletable__ - list objects that have been deleted in OSM but are held
                    back in Nominatim in case the deletion was accidental
 * __/polygons__ - list of broken polygons detected by Nominatim
--- a/docs/api/Reverse.md
+++ b/docs/api/Reverse.md
@@ -1,36 +1,48 @@
 # Reverse Geocoding

-Reverse geocoding generates an address from a latitude and longitude or from
-an OSM object.
+Reverse geocoding generates an address from a latitude and longitude.
+
+## How it works
+
+The reverse geocoding API does not exactly compute the address for the
+coordinate it receives. It works by finding the closest suitable OSM object
+and returning its address information. This may occasionally lead to
+unexpected results.
+
+First of all, Nominatim only includes OSM objects in
+its index that are suitable for searching. Small, unnamed paths for example
+are missing from the database and can therefore not be used for reverse
+geocoding either.
+
+The other issue to be aware of is that the closest OSM object may not always
+have a similar enough address to the coordinate you were requesting. For
+example, in dense city areas it may belong to a completely different street.
+

 ## Parameters

 The main format of the reverse API is

 ```
-https://nominatim.openstreetmap.org/reverse?<query>
+https://nominatim.openstreetmap.org/reverse?lat=<value>&lon=<value>&<params>
 ```

-There are two ways how the requested location can be specified:
+where `lat` and `lon` are latitude and longitude of a coordinate in WGS84
+projection. The API returns exactly one result or an error when the coordinate
+is in an area with no OSM data coverage.

-* `lat=<value>` `lon=<value>`
+Additional parameters are accepted as listed below.

-    A geographic location to generate an address for. The coordiantes must be
-    in WGS84 format.
-
-* `osm_type=[N|W|R]` `osm_id=<value>`
-
-    A specific OSM node(N), way(W) or relation(R) to return an address for.
-
-In both cases exactly one object is returned. The two input parameters cannot
-be used at the same time. Both accept the additional optional parameters listed
-below.
+!!! warning "Deprecation warning"
+    The reverse API used to allow address lookup for a single OSM object by
+    its OSM id. This use is now deprecated. Use the [Address Lookup API](../Lookup)
+    instead.

 ### Output format

 * `format=[xml|json|jsonv2|geojson|geocodejson]`

-See [Place Output Formats](Output.md) for details on each format. (Default: html)
+See [Place Output Formats](Output.md) for details on each format. (Default: xml)

 * `json_callback=<string>`

@@ -69,8 +81,9 @@ comma-separated list of language codes.

 * `zoom=[0-18]`

-Level of detail required for the address. Default: 18. This is a number that corresponds
-roughly to the zoom level used in map frameworks like Leaflet.js, Openlayers etc.
+Level of detail required for the address. Default: 18. This is a number that
+corresponds roughly to the zoom level used in XYZ tile sources in frameworks
+like Leaflet.js, Openlayers etc.
 In terms of address details the zoom levels are as follows:

 zoom | address detail
@@ -97,7 +110,7 @@ options can be used at a time. (Default: 0)

 * `polygon_threshold=0.0`

-Simplify the output geometry before returning. The parameter is the
+Return a simplified version of the output geometry. The parameter is the
 tolerance in degrees with which the geometry may differ from the original
 geometry. Topology is preserved in the result. (Default: 0.0)

@@ -105,7 +118,7 @@ geometry. Topology is preserved in the result. (Default: 0.0)

 * `email=<valid email address>`

-If you are making large numbers of request please include an appropriate email
+If you are making a large number of requests, please include an appropriate email
 address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.


@@ -149,7 +162,7 @@ This overrides the specified machine readable format. (Default: 0)
  "licence":"Data © OpenStreetMap contributors, ODbL 1.0. https:\/\/www.openstreetmap.org\/copyright",
  "osm_type":"way",
  "osm_id":"280940520",
-"lat":"-34.4391708",
+  "lat":"-34.4391708",
  "lon":"-58.7064573",
  "place_rank":"26",
  "category":"highway",
--- a/docs/api/Search.md
+++ b/docs/api/Search.md
@@ -1,37 +1,34 @@
 # Search queries

-The search API allows you to look up a location from a textual description.
-Nominatim supports structured as well as free-form search queries.
+The search API allows you to look up a location from a textual description
+or address. Nominatim supports structured and free-form search queries.

 The search query may also contain
 [special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
 which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
-Note that this only limits the items to be found, it's not suited to return complete
-lists of OSM objects of a specific type. For those use [Overpass API](https://overpass-api.de/).
+This can be used to narrow down the kind of objects to be returned.
+
+!!! warning
+    Special phrases are not suitable to query all objects of a certain type in an
+    area. Nominatim will always just return a collection of the best matches. To
+    download OSM data by object type, use the [Overpass API](https://overpass-api.de/).

 ## Parameters

-The search API has the following two formats:
-
-```
-   https://nominatim.openstreetmap.org/search/<query>?<params>
-```
-
-This format only accepts a free-form query string where the
-parts of the query are separated by slashes.
+The search API has the following format:

 ```
   https://nominatim.openstreetmap.org/search?<params>
 ```

-In this form, the query may be given through two different sets of parameters:
+The search term may be specified with two different sets of parameters:

 * `q=<query>`

    Free-form query string to search for.
    Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
-    [pilkington avenue, birmingham](//nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
-    [birmingham, pilkington avenue](//nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
+    [pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
+    [birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
    Commas are optional, but improve performance by reducing the complexity of the search.


@@ -46,13 +43,19 @@ In this form, the query may be given through two different sets of parameters:
    Structured requests are faster but are less robust against alternative
    OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.

-All three query forms accept the additional parameters listed below.
+Both query forms accept the additional parameters listed below.

 ### Output format

-* `format=[html|xml|json|jsonv2|geojson|geocodejson]`
+* `format=[xml|json|jsonv2|geojson|geocodejson]`

-See [Place Output Formats](Output.md) for details on each format. (Default: html)
+See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
+
+!!! note
+    The Nominatim service at
+    [https://nominatim.openstreetmap.org](https://nominatim.openstreetmap.org)
+    has a different default behaviour for historical reasons. When the
+    `format` parameter is omitted, the request will be forwarded to the Web UI.

 * `json_callback=<string>`

@@ -92,16 +95,20 @@ comma-separated list of language codes.
 * `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`

 Limit search results to one or more countries. `<countrycode>` must be the
-ISO 3166-1alpha2 code, e.g. `gb` for the United Kingdom, `de` for Germany.
+[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
+e.g. `gb` for the United Kingdom, `de` for Germany.

+Each place in Nominatim is assigned to one country code based
+on OSM country boundaries. In rare cases a place may not be in any country
+at all, for example, in international waters.

 * `exclude_place_ids=<place_id,[place_id],[place_id]`

 If you do not want certain OSM objects to appear in the search
 result, give a comma separated list of the `place_id`s you want to skip.
-This can be used to broaden search results. For example, if a previous
-query only returned a few results, then including those here would cause
-the search to return other, less accurate, matches (if possible).
+This can be used to retrieve additional search results. For example, if a
+previous query only returned a few results, then including those here would
+cause the search to return other, less accurate, matches (if possible).


 * `limit=<integer>`
@@ -112,16 +119,17 @@ Limit the number of returned results. (Default: 10, Maximum: 50)
 * `viewbox=<x1>,<y1>,<x2>,<y2>`

 The preferred area to find search results. Any two corner points of the box
-are accepted in any order as long as they span a real box. `x` is longitude,
+are accepted as long as they span a real box. `x` is longitude,
 `y` is latitude.


 * `bounded=[0|1]`

-When a viewbox is given, restrict the result to items contained with that
+When a viewbox is given, restrict the result to items contained within that
 viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
-only search is allowed. In this case, give the special keyword for the
-amenity in square brackets, e.g. `[pub]`. (Default: 0)
+only search is allowed. Give the special keyword for the amenity in square
+brackets, e.g. `[pub]` and a selection of objects of this type is returned.
+There is no guarantee that the result is complete. (Default: 0)


 ### Polygon output
@@ -136,7 +144,7 @@ options can be used at a time. (Default: 0)

 * `polygon_threshold=0.0`

-Simplify the output geometry before returning. The parameter is the
+Return a simplified version of the output geometry. The parameter is the
 tolerance in degrees with which the geometry may differ from the original
 geometry. Topology is preserved in the result. (Default: 0.0)

@@ -150,13 +158,11 @@ address to identify your requests. See Nominatim's [Usage Policy](https://operat
 * `dedupe=[0|1]`

 Sometimes you have several objects in OSM identifying the same place or
-object in reality. The simplest case is a street being split in many
+object in reality. The simplest case is a street being split into many
 different OSM ways due to different characteristics. Nominatim will
 attempt to detect such duplicates and only return one match unless
 this parameter is set to 0. (Default: 1)

-
-
 * `debug=[0|1]`

 Output assorted developer debug information. Data on internals of Nominatim's
@@ -168,21 +174,27 @@ This overrides the specified machine readable format. (Default: 0)
 ## Examples


-##### XML with polygon points
+##### XML with kml polygon

-* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon=1&addressdetails=1)
-* [https://nominatim.openstreetmap.org/search/gb/birmingham/pilkington%20avenue/135?format=xml&polygon=1&addressdetails=1](https://nominatim.openstreetmap.org/search/gb/birmingham/pilkington%20avenue/135?format=xml&polygon=1&addressdetails=1)
-* [https://nominatim.openstreetmap.org/search/135%20pilkington%20avenue,%20birmingham?format=xml&polygon=1&addressdetails=1](https://nominatim.openstreetmap.org/search/135%20pilkington%20avenue,%20birmingham?format=xml&polygon=1&addressdetails=1)
+* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)

 ```xml
  <searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
    <place
      place_id="1620612" osm_type="node" osm_id="452010817"
      boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
-      polygonpoints="[['-1.81592098644987','52.5487429714954'],['-1.81592290792183','52.5487234624632'],...]"
      lat="52.5487429714954" lon="-1.81602098644987"
      display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
      class="place" type="house">
+      <geokml>
+        <Polygon>
+          <outerBoundaryIs>
+            <LinearRing>
+              <coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
+            </LinearRing>
+          </outerBoundaryIs>
+        </Polygon>
+      </geokml>
      <house_number>135</house_number>
      <road>Pilkington Avenue</road>
      <village>Wylde Green</village>
--- a/docs/api/Status.md
+++ b/docs/api/Status.md
@@ -0,0 +1,66 @@
+# Status
+
+Useful for checking if the service and database is running. The JSON output also shows
+when the database was last updated.
+
+## Parameters
+
+* `format=[text|json]` (defaults to 'text')
+
+
+## Output
+
+#### Text format
+
+```
+   https://nominatim.openstreetmap.org/status.php
+```
+
+will return HTTP status code 200 and print `OK`.
+
+On error it will return HTTP status code 500 and print a message, e.g.
+`ERROR: Database connection failed`.
+
+
+
+#### JSON format
+
+```
+   https://nominatim.openstreetmap.org/status.php?format=json
+```
+
+will return HTTP code 200 and a structure
+
+```json
+  {
+      "status": 0,
+      "message": "OK",
+      "data_updated": "2020-05-04T14:47:00+00:00",
+      "software_version": "3.6.0-0",
+      "database_version": "3.6.0-0"
+  }
+```
+
+The `software_version` field contains the version of Nominatim used to serve
+the API. The `database_version` field contains the version of the data format
+in the database.
+
+On error will also return HTTP status code 200 and a structure with error
+code and message, e.g.
+
+```json
+   {
+       "status": 700,
+       "message": "Database connection failed"
+   }
+```
+
+Possible status codes are
+
+   |     | message              | notes                                             |
+   |-----|----------------------|---------------------------------------------------|
+   | 700 | "No database"        | connection failed                                 |
+   | 701 | "Module failed"      | database could not load nominatim.so              |
+   | 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
+   | 703 | "Query failed"       | test query against a database table failed        |
+   | 704 | "No value"           | test query worked but returned no results         |
--- a/docs/customize/Country-Settings.md
+++ b/docs/customize/Country-Settings.md
@@ -0,0 +1,149 @@
+# Customizing Per-Country Data
+
+Whenever an OSM is imported into Nominatim, the object is first assigned
+a country. Nominatim can use this information to adapt various aspects of
+the address computation to the local customs of the country. This section
+explains how country assignment works and the principal per-country
+localizations.
+
+## Country assignment
+
+Countries are assigned on the basis of country data from the OpenStreetMap
+input data itself. Countries are expected to be tagged according to the
+[administrative boundary schema](https://wiki.openstreetmap.org/wiki/Tag:boundary%3Dadministrative):
+a OSM relation with `boundary=administrative` and `admin_level=2`. Nominatim
+uses the country code to distinguish the countries.
+
+If there is no country data available for a point, then Nominatim uses the
+fallback data imported from `data/country_osm_grid.sql.gz`. This was computed
+from OSM data as well but is guaranteed to cover all countries.
+
+Some OSM objects may also be located outside any country, for example a buoy
+in the middle of the ocean. These object do not get any country assigned and
+get a default treatment when it comes to localized handling of data.
+
+## Per-country settings
+
+### Global country settings
+
+The main place to configure settings per country is the file
+`settings/country_settings.yaml`. This file has one section per country that
+is recognised by Nominatim. Each section is tagged with the country code
+(in lower case) and contains the different localization information. Only
+countries which are listed in this file are taken into account for computations.
+
+For example, the section for Andorra looks like this:
+
+```
+    partition: 35
+    languages: ca
+    names: !include country-names/ad.yaml
+    postcode:
+      pattern: "(ddd)"
+      output: AD\1
+```
+
+The individual settings are described below.
+
+#### `partition`
+
+Nominatim internally splits the data into multiple tables to improve
+performance. The partition number tells Nominatim into which table to put
+the country. This is purely internal management and has no effect on the
+output data.
+
+The default is to have one partition per country.
+
+#### `languages`
+
+A comma-separated list of ISO-639 language codes of default languages in the
+country. These are the languages used in name tags without a language suffix.
+Note that this is not necessarily the same as the list of official languages
+in the country. There may be officially recognised languages in a country
+which are only ever used in name tags with the appropriate language suffixes.
+Conversely, a non-official language may appear a lot in the name tags, for
+example when used as an unofficial Lingua Franca.
+
+List the languages in order of frequency of appearance with the most frequently
+used language first. It is not recommended to add languages when there are only
+very few occurrences.
+
+If only one language is listed, then Nominatim will 'auto-complete' the
+language of names without an explicit language-suffix.
+
+#### `names`
+
+List of names of the country and its translations. These names are used as
+a baseline. It is always possible to search countries by the given names, no
+matter what other names are in the OSM data. They are also used as a fallback
+when a needed translation is not available.
+
+!!! Note
+    The list of names per country is currently fairly large because Nominatim
+    supports translations in many languages per default. That is why the
+    name lists have been separated out into extra files. You can find the
+    name lists in the file `settings/country-names/<country code>.yaml`.
+    The names section in the main country settings file only refers to these
+    files via the special `!include` directive.
+
+#### `postcode`
+
+Describes the format of the postcode that is in use in the country.
+
+When a country has no official postcodes, set this to no. Example:
+
+```
+ae:
+    postcode: no
+```
+
+When a country has a postcode, you need to state the postcode pattern and
+the default output format. Example:
+
+```
+bm:
+    postcode:
+      pattern: "(ll)[ -]?(dd)"
+      output: \1 \2
+```
+
+The **pattern** is a regular expression that describes the possible formats
+accepted as a postcode. The pattern follows the standard syntax for
+[regular expressions in Python](https://docs.python.org/3/library/re.html#regular-expression-syntax)
+with two extra shortcuts: `d` is a shortcut for a single digit([0-9])
+and `l` for a single ASCII letter ([A-Z]).
+
+Use match groups to indicate groups in the postcode that may optionally be
+separated with a space or a hyphen.
+
+For example, the postcode for Bermuda above always consists of two letters
+and two digits. They may optionally be separated by a space or hyphen. That
+means that Nominatim will consider `AB56`, `AB 56` and `AB-56` spelling variants
+for one and the same postcode.
+
+Never add the country code in front of the postcode pattern. Nominatim will
+automatically accept variants with a country code prefix for all postcodes.
+
+The **output** field is an optional field that describes what the canonical
+spelling of the postcode should be. The format is the
+[regular expression expand syntax](https://docs.python.org/3/library/re.html#re.Match.expand) referring back to the bracket groups in the pattern.
+
+Most simple postcodes only have one spelling variant. In that case, the
+**output** can be omitted. The postcode will simply be used as is.
+
+In the Bermuda example above, the canonical spelling would be to have a space
+between letters and digits.
+
+!!! Warning
+    When your postcode pattern covers multiple variants of the postcode, then
+    you must explicitly state the canonical output or Nominatim will not
+    handle the variations correctly.
+
+### Other country-specific configuration
+
+There are some other configuration files where you can set localized settings
+according to the assigned country. These are:
+
+ * [Place ranking configuration](Ranking.md)
+
+Please see the linked documentation sections for more information.
--- a/docs/customize/Import-Styles.md
+++ b/docs/customize/Import-Styles.md
@@ -0,0 +1,153 @@
+## Configuring the Import
+
+Which OSM objects are added to the database and which of the tags are used
+can be configured via the import style configuration file. This
+is a JSON file which contains a list of rules which are matched against every
+tag of every object and then assign the tag its specific role.
+
+The style to use is given by the `NOMINATIM_IMPORT_STYLE` configuration
+option. There are a number of default styles, which are explained in detail
+in the [Import section](../admin/Import.md#filtering-imported-data). These
+standard styles may be referenced by their name.
+
+You can also create your own custom style. Put the style file into your
+project directory and then set `NOMINATIM_IMPORT_STYLE` to the name of the file.
+It is always recommended to start with one of the standard styles and customize
+those. You find the standard styles under the name `import-<stylename>.style`
+in the standard Nominatim configuration path (usually `/etc/nominatim` or
+`/usr/local/etc/nominatim`).
+
+The remainder of the page describes the format of the file.
+
+### Configuration Rules
+
+A single rule looks like this:
+
+```json
+{
+    "keys" : ["key1", "key2", ...],
+    "values" : {
+        "value1" : "prop",
+        "value2" : "prop1,prop2"
+    }
+}
+```
+
+A rule first defines a list of keys to apply the rule to. This is always a list
+of strings. The string may have four forms. An empty string matches against
+any key. A string that ends in an asterisk `*` is a prefix match and accordingly
+matches against any key that starts with the given string (minus the `*`). A
+suffix match can be defined similarly with a string that starts with a `*`. Any
+other string constitutes an exact match.
+
+The second part of the rules defines a list of values and the properties that
+apply to a successful match. Value strings may be either empty, which
+means that they match any value, or describe an exact match. Prefix
+or suffix matching of values is not possible.
+
+For a rule to match, it has to find a valid combination of keys and values. The
+resulting property is that of the matched values.
+
+The rules in a configuration file are processed sequentially and the first
+match for each tag wins.
+
+A rule where key and value are the empty string is special. This defines the
+fallback when none of the rules match. The fallback is always used as a last
+resort when nothing else matches, no matter where the rule appears in the file.
+Defining multiple fallback rules is not allowed. What happens in this case,
+is undefined.
+
+### Tag Properties
+
+One or more of the following properties may be given for each tag:
+
+* `main`
+
+    A principal tag. A new row will be added for the object with key and value
+    as `class` and `type`.
+
+* `with_name`
+
+    When the tag is a principal tag (`main` property set): only really add a new
+    row, if there is any name tag found (a reference tag is not sufficient, see
+    below).
+
+* `with_name_key`
+
+    When the tag is a principal tag (`main` property set): only really add a new
+    row, if there is also a name tag that matches the key of the principal tag.
+    For example, if the main tag is `bridge=yes`, then it will only be added as
+    an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
+    If this property is set, all other names that are not domain-specific are
+    ignored.
+
+* `fallback`
+
+    When the tag is a principal tag (`main` property set): only really add a new
+    row, when no other principal tags for this object have been found. Only one
+    fallback tag can win for an object.
+
+* `operator`
+
+    When the tag is a principal tag (`main` property set): also include the
+    `operator` tag in the list of names. This is a special construct for an
+    out-dated tagging practise in OSM. Fuel stations and chain restaurants
+    in particular used to have the name of the chain tagged as `operator`.
+    These days the chain can be more commonly found in the `brand` tag but
+    there is still enough old data around to warrant this special case.
+
+* `name`
+
+    Add tag to the list of names.
+
+* `ref`
+
+    Add tag to the list of names as a reference. At the moment this only means
+    that the object is not considered to be named for `with_name`.
+
+* `address`
+
+    Add tag to the list of address tags. If the tag starts with `addr:` or
+    `is_in:`, then this prefix is cut off before adding it to the list.
+
+* `postcode`
+
+    Add the value as a postcode to the address tags. If multiple tags are
+    candidate for postcodes, one wins out and the others are dropped.
+
+* `country`
+
+    Add the value as a country code to the address tags. The value must be a
+    two letter country code, otherwise it is ignored. If there are multiple
+    tags that match, then one wins out and the others are dropped.
+
+* `house`
+
+    If no principle tags can be found for the object, still add the object with
+    `class`=`place` and `type`=`house`. Use this for address nodes that have no
+    other function.
+
+* `interpolation`
+
+    Add this object as an address interpolation (appears as `class`=`place` and
+    `type`=`houses` in the database).
+
+* `extra`
+
+    Add tag to the list of extra tags.
+
+* `skip`
+
+    Skip the tag completely. Useful when a custom default fallback is defined
+    or to define exceptions to rules.
+
+A rule can define as many of these properties for one match as it likes. For
+example, if the property is `"main,extra"` then the tag will open a new row
+but also have the tag appear in the list of extra tags.
+
+### Changing the Style of Existing Databases
+
+There is normally no issue changing the style of a database that is already
+imported and now kept up-to-date with change files. Just be aware that any
+change in the style applies to updates only. If you want to change the data
+that is already in the database, then a reimport is necessary.
--- a/docs/customize/Overview.md
+++ b/docs/customize/Overview.md
@@ -0,0 +1,20 @@
+Nominatim comes with a predefined set of configuration options that should
+work for most standard installations. If you have special requirements, there
+are many places where the configuration can be adapted. This chapter describes
+the following configurable parts:
+
+* [Global Settings](Settings.md) has a detailed description of all parameters that
+  can be set in your local `.env` configuration
+* [Import styles](Import-Styles.md) explains how to write your own import style
+  in order to control what kind of OSM data will be imported
+* [Place ranking](Ranking.md) describes the configuration around classifing
+  places in terms of their importance and their role in an address
+* [Tokenizers](Tokenizers.md) describes the configuration of the module
+  responsible for analysing and indexing names
+* [Special Phrases](Special-Phrases.md) are common nouns or phrases that
+  can be used in search to identify a class of places
+
+There are also guides for adding the following external data:
+
+* [US house numbers from the TIGER dataset](Tiger.md)
+* [External postcodes](Postcodes.md)
--- a/docs/customize/Postcodes.md
+++ b/docs/customize/Postcodes.md
@@ -0,0 +1,37 @@
+# External postcode data
+
+Nominatim creates a table of known postcode centroids during import. This table
+is used for searches of postcodes and for adding postcodes to places where the
+OSM data does not provide one. These postcode centroids are mainly computed
+from the OSM data itself. In addition, Nominatim supports reading postcode
+information from an external CSV file, to supplement the postcodes that are
+missing in OSM.
+
+To enable external postcode support, simply put one CSV file per country into
+your project directory and name it `<CC>_postcodes.csv`. `<CC>` must be the
+two-letter country code for which to apply the file. The file may also be
+gzipped. Then it must be called `<CC>_postcodes.csv.gz`.
+
+The CSV file must use commas as a delimiter and have a header line. Nominatim
+expects three columns to be present: `postcode`, `lat` and `lon`. All other
+columns are ignored. `lon` and `lat` must describe the x and y coordinates of the
+postcode centroids in WGS84.
+
+The postcode files are loaded only when there is data for the given country
+in your database. For example, if there is a `us_postcodes.csv` file in your
+project directory but you import only an excerpt of Italy, then the US postcodes
+will simply be ignored.
+
+As a rule, the external postcode data should be put into the project directory
+**before** starting the initial import. Still, you can add, remove and update the
+external postcode data at any time. Simply
+run:
+
+```
+nominatim refresh --postcodes
+```
+
+to make the changes visible in your database. Be aware, however, that the changes
+only have an immediate effect on searches for postcodes. Postcodes that were
+added to places are only updated, when they are reindexed. That usually happens
+only during replication updates.
--- a/docs/customize/Ranking.md
+++ b/docs/customize/Ranking.md
@@ -0,0 +1,139 @@
+# Place Ranking in Nominatim
+
+Nominatim uses two metrics to rank a place: search rank and address rank.
+This chapter explains what place ranking means and how it can be customized.
+
+## Search rank
+
+The search rank describes the extent and importance of a place. It is used
+when ranking search results. Simply put, if there are two results for a
+search query which are otherwise equal, then the result with the _lower_
+search rank will be appear higher in the result list.
+
+Search ranks are not so important these days because many well-known
+places use the Wikipedia importance ranking instead.
+
+The following table gives an overview of the kind of features that Nominatim
+expects for each rank:
+
+rank   | typical place types             | extent
+-------|---------------------------------|-------
+1-3    | oceans, continents              | -
+4      | countries                       | -
+5-9    | states, regions, provinces      | -
+10-12  | counties                        | -
+13-16  | cities, municipalities, islands | 15 km
+17-18  | towns, boroughs                 | 4 km
+19     | villages, suburbs               | 2 km
+20     | hamlets, farms, neighbourhoods  |  1 km
+21-25  | isolated dwellings, city blocks | 500 m
+
+The extent column describes how far a feature is assumed to reach when it
+is mapped only as a point. Larger features like countries and states are usually
+available with their exact area in the OpenStreetMap data. That is why no extent
+is given.
+
+## Address rank
+
+The address rank describes where a place shows up in an address hierarchy.
+Usually only administrative boundaries and place nodes and areas are
+eligible to be part of an address. Places that should not appear in the
+address must have an address rank of 0.
+
+The following table gives an overview how ranks are mapped to address parts:
+
+ rank        | address part
+-------------|-------------
+ 1-3         | _unused_
+ 4           | country
+ 5-9         | state
+ 10-12       | county
+ 13-16       | city
+ 17-21       | suburb
+ 22-24       | neighbourhood
+ 25          | squares, farms, localities
+ 26-27       | street
+ 28-30       | POI/house number
+
+The country rank 4 usually doesn't show up in the address parts of an object.
+The country is determined indirectly from the country code.
+
+Ranks 5-24 can be assigned more or less freely. They make up the major part
+of the address.
+
+Rank 25 is also an addressing rank but it is special because while it can be
+the parent to a POI with an addr:place of the same name, it cannot be a parent
+to streets. Use it for place features that are technically on the same level
+as a street (e.g. squares, city blocks) or for places that should not normally
+appear in an address unless explicitly tagged so (e.g place=locality which
+should be uninhabited and as such not addressable).
+
+The street ranks 26 and 27 are handled slightly differently. Only one object
+from these ranks shows up in an address.
+
+For POI level objects like shops, buildings or house numbers always use rank 30.
+Ranks 28 is reserved for house number interpolations. 29 is for internal use
+only.
+
+## Rank configuration
+
+Search and address ranks are assigned to a place when it is first imported
+into the database. There are a few hard-coded rules for the assignment:
+
+ * postcodes follow special rules according to their length
+ * boundaries that are not areas and railway=rail are dropped completely
+ * the following are always search rank 30 and address rank 0:
+    * highway nodes
+    * landuse that is not an area
+
+Other than that, the ranks can be freely assigned via the JSON file according
+to their type and the country they are in. The name of the config file to be
+used can be changed with the setting `NOMINATIM_ADDRESS_LEVEL_CONFIG`.
+
+The address level configuration must consist of an array of configuration
+entries, each containing a tag definition and an optional country array:
+
+```
+[ {
+    "tags" : {
+      "place" : {
+        "county" : 12,
+        "city" : 16,
+      },
+      "landuse" : {
+        "residential" : 22,
+        "" : 30
+      }
+    }
+  },
+  {
+    "countries" : [ "ca", "us" ],
+    "tags" : {
+      "boundary" : {
+        "administrative8" : 18,
+        "administrative9" : 20
+      },
+      "landuse" : {
+        "residential" : [22, 0]
+      }
+    }
+  }
+]
+```
+
+The `countries` field contains a list of countries (as ISO 3166-1 alpha 2 code)
+for which the definition applies. When the field is omitted, then the
+definition is used as a fallback, when nothing more specific for a given
+country exists.
+
+`tags` contains the ranks for key/value pairs. The ranks can be either a
+single number, in which case they are the search and address rank, or an array
+of search and address rank (in that order). The value may be left empty.
+Then the rank is used when no more specific value is found for the given
+key.
+
+Countries and key/value combination may appear in multiple definitions. Just
+make sure that each combination of country/key/value appears only once per
+file. Otherwise the import will fail with a UNIQUE INDEX constraint violation
+on import.
+
--- a/docs/customize/Settings.md
+++ b/docs/customize/Settings.md
@@ -0,0 +1,649 @@
+This section provides a reference of all configuration parameters that can
+be used with Nominatim.
+
+# Configuring Nominatim
+
+Nominatim uses [dotenv](https://github.com/theskumar/python-dotenv) to manage
+its configuration settings. There are two means to set configuration
+variables: through an `.env` configuration file or through an environment
+variable.
+
+The `.env` configuration file needs to be placed into the
+[project directory](../admin/Import.md#creating-the-project-directory). It
+must contain configuration parameters in `<parameter>=<value>` format.
+Please refer to the dotenv documentation for details.
+
+The configuration options may also be set in the form of shell environment
+variables. This is particularly useful, when you want to temporarily change
+a configuration option. For example, to force the replication serve to
+download the next change, you can temporarily disable the update interval:
+
+    NOMINATIM_REPLICATION_UPDATE_INTERVAL=0 nominatim replication --once
+
+If a configuration option is defined through .env file and environment
+variable, then the latter takes precedence. 
+
+## Configuration Parameter Reference
+
+### Import and Database Settings
+
+#### NOMINATIM_DATABASE_DSN
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Database connection string |
+| **Format:**        | string: `pgsql:<param1>=<value1>;<param2>=<value2>;...` |
+| **Default:**       | pgsql:dbname=nominatim |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Sets the connection parameters for the Nominatim database. At a minimum
+the name of the database (`dbname`) is required. You can set any additional
+parameter that is understood by libpq. See the [Postgres documentation](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) for a full list.
+
+!!! note
+    It is usually recommended not to set the password directly in this
+    configuration parameter. Use a
+    [password file](https://www.postgresql.org/docs/current/libpq-pgpass.html)
+    instead.
+
+
+#### NOMINATIM_DATABASE_WEBUSER
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Database query user |
+| **Format:**        | string  |
+| **Default:**       | www-data |
+| **After Changes:** | cannot be changed after import |
+
+Defines the name of the database user that will run search queries. Usually
+this is the user under which the webserver is executed. When running Nominatim
+via php-fpm, you can also define a separate query user. The Postgres user
+needs to be set up before starting the import.
+
+Nominatim grants minimal rights to this user to all tables that are needed
+for running geocoding queries.
+
+
+#### NOMINATIM_DATABASE_MODULE_PATH
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Directory where to find the PostgreSQL server module |
+| **Format:**        | path |
+| **Default:**       | _empty_ (use `<project_directory>/module`) |
+| **After Changes:** | run `nominatim refresh --functions` |
+| **Comment:**       | Legacy tokenizer only |
+
+Defines the directory in which the PostgreSQL server module `nominatim.so`
+is stored. The directory and module must be accessible by the PostgreSQL
+server.
+
+For information on how to use this setting when working with external databases,
+see [Advanced Installations](../admin/Advanced-Installations.md).
+
+The option is only used by the Legacy tokenizer and ignored otherwise.
+
+
+#### NOMINATIM_TOKENIZER
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Tokenizer used for normalizing and parsing queries and names |
+| **Format:**        | string |
+| **Default:**       | legacy |
+| **After Changes:** | cannot be changed after import |
+
+Sets the tokenizer type to use for the import. For more information on
+available tokenizers and how they are configured, see
+[Tokenizers](../customize/Tokenizers.md).
+
+
+#### NOMINATIM_TOKENIZER_CONFIG
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Configuration file for the tokenizer |
+| **Format:**        | path |
+| **Default:**       | _empty_ (default file depends on tokenizer) |
+| **After Changes:** | see documentation for each tokenizer |
+
+Points to the file with additional configuration for the tokenizer.
+See the [Tokenizer](../customize/Tokenizers.md) descriptions for details
+on the file format.
+
+If a relative path is given, then the file is searched first relative to the
+project directory and then in the global settings directory.
+
+#### NOMINATIM_MAX_WORD_FREQUENCY
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Number of occurrences before a word is considered frequent |
+| **Format:**        | int |
+| **Default:**       | 50000 |
+| **After Changes:** | cannot be changed after import |
+| **Comment:**       | Legacy tokenizer only |
+
+The word frequency count is used by the Legacy tokenizer to automatically
+identify _stop words_. Any partial term that occurs more often then what
+is defined in this setting, is effectively ignored during search.
+
+
+#### NOMINATIM_LIMIT_REINDEXING
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Avoid invalidating large areas |
+| **Format:**        | bool |
+| **Default:**       | yes |
+
+Nominatim computes the address of each place at indexing time. This has the
+advantage to make search faster but also means that more objects needs to
+be invalidated when the data changes. For example, changing the name of
+the state of Florida would require recomputing every single address point
+in the state to make the new name searchable in conjunction with addresses.
+
+Setting this option to 'yes' means that Nominatim skips reindexing of contained
+objects when the area becomes too large.
+
+
+#### NOMINATIM_LANGUAGES
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Restrict search languages |
+| **Format:**        | string: comma-separated list of language codes |
+| **Default:**       | _empty_ |
+
+Normally Nominatim will include all language variants of name:XX
+in the search index. Set this to a comma separated list of language
+codes, to restrict import to a subset of languages.
+
+Currently only affects the initial import of country names and special phrases.
+
+
+#### NOMINATIM_TERM_NORMALIZATION
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Rules for normalizing terms for comparisons |
+| **Format:**        | string: semicolon-separated list of ICU rules |
+| **Default:**       | :: NFD (); [[:Nonspacing Mark:] [:Cf:]] >;  :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC (); |
+| **Comment:**       | Legacy tokenizer only |
+
+[Special phrases](Special-Phrases.md) have stricter matching requirements than
+normal search terms. They must appear exactly in the query after this term
+normalization has been applied.
+
+Only has an effect on the Legacy tokenizer. For the ICU tokenizer the rules
+defined in the
+[normalization section](Tokenizers.md#normalization-and-transliteration)
+will be used.
+
+
+#### NOMINATIM_USE_US_TIGER_DATA
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable searching for Tiger house number data |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --functions` |
+
+When this setting is enabled, search and reverse queries also take data
+from [Tiger house number data](Tiger.md) into account.
+
+
+#### NOMINATIM_USE_AUX_LOCATION_DATA
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable searching in external house number tables |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --functions` |
+| **Comment:**       | Do not use. |
+
+When this setting is enabled, search queries also take data from external
+house number tables into account.
+
+*Warning:* This feature is currently unmaintained and should not be used.
+
+
+#### NOMINATIM_HTTP_PROXY
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Use HTTP proxy when downloading data |
+| **Format:**        | boolean |
+| **Default:**       | no |
+
+When this setting is enabled and at least
+[NOMINATIM_HTTP_PROXY_HOST](#nominatim_http_proxy_host) and
+[NOMINATIM_HTTP_PROXY_PORT](#nominatim_http_proxy_port) are set, the
+configured proxy will be used, when downloading external data like
+replication diffs.
+
+
+#### NOMINATIM_HTTP_PROXY_HOST
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Host name of the proxy to use |
+| **Format:**        | string |
+| **Default:**       | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, this setting
+configures the proxy host name.
+
+
+#### NOMINATIM_HTTP_PROXY_PORT
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Port number of the proxy to use |
+| **Format:**        | integer |
+| **Default:**       | 3128 |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, this setting
+configures the port number to use with the proxy.
+
+
+#### NOMINATIM_HTTP_PROXY_LOGIN
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Username for proxies that require login |
+| **Format:**        | string |
+| **Default:**       | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, use this
+setting to define the username for proxies that require a login.
+
+
+#### NOMINATIM_HTTP_PROXY_PASSWORD
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Password for proxies that require login |
+| **Format:**        | string |
+| **Default:**       | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, use this
+setting to define the password for proxies that require a login.
+
+
+#### NOMINATIM_OSM2PGSQL_BINARY
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Location of the osm2pgsql binary |
+| **Format:**        | path |
+| **Default:**       | _empty_ (use binary shipped with Nominatim) |
+| **Comment:**       | EXPERT ONLY |
+
+Nominatim uses [osm2pgsql](https://osm2pgsql.org) to load the OSM data
+initially into the database. Nominatim comes bundled with a version of
+osm2pgsql that is guaranteed to be compatible. Use this setting to use
+a different binary instead. You should do this only when you know exactly
+what you are doing. If the osm2pgsql version is not compatible, then the
+result is undefined.
+
+
+#### NOMINATIM_WIKIPEDIA_DATA_PATH
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Directory with the wikipedia importance data |
+| **Format:**        | path |
+| **Default:**       | _empty_ (project directory) |
+
+Set a custom location for the
+[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
+unset, Nominatim expects the data to be saved in the project directory.
+
+#### NOMINATIM_ADDRESS_LEVEL_CONFIG
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Configuration file for rank assignments |
+| **Format:**        | path |
+| **Default:**       | address-levels.json |
+
+The _address level configuration_ defines the rank assignments for places. See
+[Place Ranking](Ranking.md) for a detailed explanation what rank assignments
+are and what the configuration file must look like.
+
+When a relative path is given, then the file is searched first relative to the
+project directory and then in the global settings directory.
+
+
+#### NOMINATIM_IMPORT_STYLE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Configuration to use for the initial OSM data import |
+| **Format:**        | string or path |
+| **Default:**       | extratags |
+
+The _style configuration_ describes which OSM objects and tags are taken
+into consideration for the search database. Nominatim comes with a set
+of pre-configured styles, that may be configured here.
+
+You can also write your own custom style and point the setting to the file
+with the style. When a relative path is given, then the style file is searched
+first relative to the project directory and then in the global settings
+directory.
+
+See [Import Styles](Import-Styles.md)
+for more information on the available internal styles and the format of the
+configuration file.
+
+#### NOMINATIM_FLATNODE_FILE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Location of osm2pgsql flatnode file |
+| **Format:**        | path |
+| **Default:**       | _empty_ (do not use a flatnote file) |
+| **After Changes:** | Only change when moving the file physically. |
+
+The `osm2pgsql flatnode file` is file that efficiently stores geographic
+location for OSM nodes. For larger imports it can significantly speed up
+the import. When this option is unset, then osm2pgsql uses a PsotgreSQL table
+to store the locations.
+
+When a relative path is given, then the flatnode file is created/searched
+relative to the project directory.
+
+!!! warning
+
+    The flatnode file is not only used during the initial import but also
+    when adding new data with `nominatim add-data` or `nominatim replication`.
+    Make sure you keep the flatnode file around and this setting unmodified,
+    if you plan to add more data or run regular updates.
+
+
+#### NOMINATIM_TABLESPACE_*
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Group of settings for distributing the database over tablespaces |
+| **Format:**        | string |
+| **Default:**       | _empty_ (do not use a table space) |
+| **After Changes:** | no effect after initial import |
+
+Nominatim allows to distribute the search database over up to 10 different
+[PostgreSQL tablespaces](https://www.postgresql.org/docs/current/manage-ag-tablespaces.html).
+If you use this option, make sure that the tablespaces exist before starting
+the import.
+
+The available tablespace groups are:
+
+NOMINATIM_TABLESPACE_SEARCH_DATA
+:    Data used by the geocoding frontend.
+
+NOMINATIM_TABLESPACE_SEARCH_INDEX
+:    Indexes used by the geocoding frontend.
+
+NOMINATIM_TABLESPACE_OSM_DATA
+:    Raw OSM data cache used for import and updates.
+
+NOMINATIM_TABLESPACE_OSM_DATA
+:    Indexes on the raw OSM data cache.
+
+NOMINATIM_TABLESPACE_PLACE_DATA
+:    Data table with the pre-filtered but still unprocessed OSM data.
+     Used only during imports and updates.
+
+NOMINATIM_TABLESPACE_PLACE_INDEX
+:    Indexes on raw data table. Used only during imports and updates.
+
+NOMINATIM_TABLESPACE_ADDRESS_DATA
+:    Data tables used for computing search terms and addresses of places
+     during import and updates.
+
+NOMINATIM_TABLESPACE_ADDRESS_INDEX
+:    Indexes on the data tables for search term and address computation.
+     Used only for import and updates.
+
+NOMINATIM_TABLESPACE_AUX_DATA
+:    Auxiliary data tables for non-OSM data, e.g. for Tiger house number data.
+
+NOMINATIM_TABLESPACE_AUX_INDEX
+:    Indexes on auxiliary data tables.
+
+
+### Replication Update Settings
+
+#### NOMINATIM_REPLICATION_URL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Base URL of the replication service |
+| **Format:**        | url |
+| **Default:**       | https://planet.openstreetmap.org/replication/minute |
+| **After Changes:** | run `nominatim replication --init` |
+
+Replication services deliver updates to OSM data. Use this setting to choose
+which replication service to use. See [Updates](../admin/Update.md) for more
+information on how to set up regular updates.
+
+#### NOMINATIM_REPLICATION_MAX_DIFF
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Maximum amount of data to download per update cycle (in MB) |
+| **Format:**        | integer |
+| **Default:**       | 50 |
+| **After Changes:** | restart the replication process |
+
+At each update cycle Nominatim downloads diffs until either no more diffs
+are available on the server (i.e. the database is up-to-date) or the limit
+given in this setting is exceeded. Nominatim guarantees to downloads at least
+one diff, if one is available, no matter how small the setting.
+
+The default for this setting is fairly conservative because Nominatim keeps
+all data downloaded in one cycle in RAM. Using large values in a production
+server may interfere badly with the search frontend because it evicts data
+from RAM that is needed for speedy answers to incoming requests. It is usually
+a better idea to keep this setting lower and run multiple update cycles
+to catch up with updates.
+
+When catching up in non-production mode, for example after the initial import,
+the setting can easily be changed temporarily on the command line:
+
+    NOMINATIM_REPLICATION_MAX_DIFF=3000 nominatim replication
+
+
+#### NOMINATIM_REPLICATION_UPDATE_INTERVAL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Publication interval of the replication service (in seconds) |
+| **Format:**        | integer |
+| **Default:**       | 75 |
+| **After Changes:** | restart the replication process |
+
+This setting determines when Nominatim will attempt to download again a new
+update. The time is computed from the publication date of the last diff
+downloaded. Setting this to a slightly higher value than the actual
+publication interval avoids unnecessary rechecks.
+
+
+#### NOMINATIM_REPLICATION_RECHECK_INTERVAL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Wait time to recheck for a pending update (in seconds)  |
+| **Format:**        | integer |
+| **Default:**       | 60 |
+| **After Changes:** | restart the replication process |
+
+When replication updates are run in continuous mode (using `nominatim replication`),
+this setting determines how long Nominatim waits until it looks for updates
+again when updates were not available on the server.
+
+Note that this is different from
+[NOMINATIM_REPLICATION_UPDATE_INTERVAL](#nominatim_replication_update_interval).
+Nominatim will never attempt to query for new updates for UPDATE_INTERVAL
+seconds after the current database date. Only after the update interval has
+passed it asks for new data. If then no new data is found, it waits for
+RECHECK_INTERVAL seconds before it attempts again.
+
+### API Settings
+
+#### NOMINATIM_CORS_NOACCESSCONTROL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Send permissive CORS access headers |
+| **Format:**        | boolean |
+| **Default:**       | yes |
+| **After Changes:** | run `nominatim refresh --website` |
+
+When this setting is enabled, API HTTP responses include the HTTP
+[CORS](https://en.wikipedia.org/wiki/CORS) headers
+`access-control-allow-origin: *` and `access-control-allow-methods: OPTIONS,GET`.
+
+#### NOMINATIM_MAPICON_URL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | URL prefix for static icon images |
+| **Format:**        | url |
+| **Default:**       | _empty_ |
+| **After Changes:** | run `nominatim refresh --website` |
+
+When a mapicon URL is configured, then Nominatim includes an additional `icon`
+field in the responses, pointing to an appropriate icon for the place type.
+
+Map icons used to be included in Nominatim itself but now have moved to the
+[nominatim-ui](https://github.com/osm-search/nominatim-ui/) project. If you
+want the URL to be included in API responses, make the `/mapicon`
+directory of the project available under a public URL and point this setting
+to the directory.
+
+
+#### NOMINATIM_DEFAULT_LANGUAGE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Language of responses when no language is requested |
+| **Format:**        | language code |
+| **Default:**       | _empty_ (use the local language of the feature) |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Nominatim localizes the place names in responses when the corresponding
+translation is available. Users can request a custom language setting through
+the HTTP accept-languages header or through the explicit parameter
+[accept-languages](../api/Search.md#language-of-results). If neither is
+given, it falls back to this setting. If the setting is also empty, then
+the local languages (in OSM: the name tag without any language suffix) is
+used.
+
+
+#### NOMINATIM_SEARCH_BATCH_MODE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable a special batch query mode |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --website` |
+
+This feature is currently undocumented and potentially broken.
+
+
+#### NOMINATIM_SEARCH_NAME_ONLY_THRESHOLD
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Threshold for switching the search index lookup strategy |
+| **Format:**        | integer |
+| **Default:**       | 500 |
+| **After Changes:** | run `nominatim refresh --website` |
+
+This setting defines the threshold over which a name is no longer considered
+as rare. When searching for places with rare names, only the name is used
+for place lookups. Otherwise the name and any address information is used.
+
+This setting only has an effect after `nominatim refresh --word-counts` has
+been called to compute the word frequencies.
+
+
+#### NOMINATIM_LOOKUP_MAX_COUNT
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Maximum number of OSM ids accepted by /lookup |
+| **Format:**        | integer |
+| **Default:**       | 50 |
+| **After Changes:** | run `nominatim refresh --website` |
+
+The /lookup point accepts list of ids to look up address details for. This
+setting restricts the number of places a user may look up with a single
+request.
+
+
+#### NOMINATIM_POLYGON_OUTPUT_MAX_TYPES
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Number of different geometry formats that may be returned |
+| **Format:**        | integer |
+| **Default:**       | 1 |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Nominatim supports returning full geometries of places. The geometries may
+be requested in different formats with one of the
+[`polygon_*` parameters](../api/Search.md#polygon-output). Use this
+setting to restrict the number of geometry types that may be requested
+with a single query.
+
+Setting this parameter to 0 disables polygon output completely.
+
+### Logging Settings
+
+#### NOMINATIM_LOG_DB
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Log requests into the database |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Enable logging requests into a database table with this setting. The logs
+can be found in the table `new_query_log`.
+
+When using this logging method, it is advisable to set up a job that
+regularly clears out old logging information. Nominatim will not do that
+on its own.
+
+Can be used as the same time as NOMINATIM_LOG_FILE.
+
+#### NOMINATIM_LOG_FILE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Log requests into a file |
+| **Format:**        | path |
+| **Default:**       | _empty_ (logging disabled) |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Enable logging of requests into a file with this setting by setting the log
+file where to log to. A relative file name is assumed to be relative to
+the project directory.
+
+
+The entries in the log file have the following format:
+
+    <request time> <execution time in s> <number of results> <type> "<query string>"
+
+Request time is the time when the request was started. The execution time is
+given in ms and corresponds to the time the query took executing in PHP.
+type contains the name of the endpoint used.
+
+Can be used as the same time as NOMINATIM_LOG_DB.
--- a/docs/customize/Special-Phrases.md
+++ b/docs/customize/Special-Phrases.md
@@ -0,0 +1,34 @@
+# Special phrases
+
+## Importing OSM user-maintained special phrases
+
+As described in the [Import section](../admin/Import.md), it is possible to
+import special phrases from the wiki with the following command:
+
+```sh
+nominatim special-phrases --import-from-wiki
+```
+
+## Importing custom special phrases
+
+But, it is also possible to import some phrases from a csv file. 
+To do so, you have access to the following command:
+
+```sh
+nominatim special-phrases --import-from-csv <csv file>
+```
+
+Note that the two previous import commands will update the phrases from your database.
+This means that if you import some phrases from a csv file, only the phrases
+present in the csv file will be kept into the database. All other phrases will
+be removed.
+
+If you want to only add new phrases and not update the other ones you can add
+the argument `--no-replace` to the import command. For example:
+
+```sh
+nominatim special-phrases --import-from-csv <csv file> --no-replace
+```
+
+This will add the phrases present in the csv file into the database without
+removing the other ones.
--- a/docs/customize/Tiger.md
+++ b/docs/customize/Tiger.md
@@ -0,0 +1,28 @@
+# Installing TIGER housenumber data for the US
+
+Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
+address set to complement the OSM house number data in the US. You can add
+TIGER data to your own Nominatim instance by following these steps. The
+entire US adds about 10GB to your database.
+
+  1. Get preprocessed TIGER data:
+
+        cd $PROJECT_DIR
+        wget https://nominatim.org/data/tiger-nominatim-preprocessed-latest.csv.tar.gz
+
+  2. Import the data into your Nominatim database:
+
+        nominatim add-data --tiger-data tiger-nominatim-preprocessed-latest.csv.tar.gz
+
+  3. Enable use of the Tiger data in your existing `.env` file by adding:
+
+        echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
+
+  4. Apply the new settings:
+
+        nominatim refresh --functions --website
+
+
+See the [TIGER-data project](https://github.com/osm-search/TIGER-data) for more
+information on how the data got preprocessed.
+
--- a/docs/customize/Tokenizers.md
+++ b/docs/customize/Tokenizers.md
@@ -0,0 +1,382 @@
+# Tokenizers
+
+The tokenizer module in Nominatim is responsible for analysing the names given
+to OSM objects and the terms of an incoming query in order to make sure, they
+can be matched appropriately.
+
+Nominatim offers different tokenizer modules, which behave differently and have
+different configuration options. This sections describes the tokenizers and how
+they can be configured.
+
+!!! important
+    The use of a tokenizer is tied to a database installation. You need to choose
+    and configure the tokenizer before starting the initial import. Once the import
+    is done, you cannot switch to another tokenizer anymore. Reconfiguring the
+    chosen tokenizer is very limited as well. See the comments in each tokenizer
+    section.
+
+## Legacy tokenizer
+
+The legacy tokenizer implements the analysis algorithms of older Nominatim
+versions. It uses a special Postgresql module to normalize names and queries.
+This tokenizer is automatically installed and used when upgrading an older
+database. It should not be used for new installations anymore.
+
+### Compiling the PostgreSQL module
+
+The tokeinzer needs a special C module for PostgreSQL which is not compiled
+by default. If you need the legacy tokenizer, compile Nominatim as follows:
+
+```
+mkdir build
+cd build
+cmake -DBUILD_MODULE=on
+make
+```
+
+### Enabling the tokenizer
+
+To enable the tokenizer add the following line to your project configuration:
+
+```
+NOMINATIM_TOKENIZER=legacy
+```
+
+The Postgresql module for the tokenizer is available in the `module` directory
+and also installed with the remainder of the software under
+`lib/nominatim/module/nominatim.so`. You can specify a custom location for
+the module with
+
+```
+NOMINATIM_DATABASE_MODULE_PATH=<path to directory where nominatim.so resides>
+```
+
+This is in particular useful when the database runs on a different server.
+See [Advanced installations](../admin/Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
+
+There are no other configuration options for the legacy tokenizer. All
+normalization functions are hard-coded.
+
+## ICU tokenizer
+
+The ICU tokenizer uses the [ICU library](http://site.icu-project.org/) to
+normalize names and queries. It also offers configurable decomposition and
+abbreviation handling.
+This tokenizer is currently the default.
+
+To enable the tokenizer add the following line to your project configuration:
+
+```
+NOMINATIM_TOKENIZER=icu
+```
+
+### How it works
+
+On import the tokenizer processes names in the following three stages:
+
+1. During the **Sanitizer step** incoming names are cleaned up and converted to
+   **full names**. This step can be used to regularize spelling, split multi-name
+   tags into their parts and tag names with additional attributes. See the
+   [Sanitizers section](#sanitizers) below for available cleaning routines.
+2. The **Normalization** part removes all information from the full names
+   that are not relevant for search.
+3. The **Token analysis** step takes the normalized full names and creates
+   all transliterated variants under which the name should be searchable.
+   See the [Token analysis](#token-analysis) section below for more
+   information.
+
+During query time, only normalization and transliteration are relevant.
+An incoming query is first split into name chunks (this usually means splitting
+the string at the commas) and the each part is normalised and transliterated.
+The result is used to look up places in the search index.
+
+### Configuration
+
+The ICU tokenizer is configured using a YAML file which can be configured using
+`NOMINATIM_TOKENIZER_CONFIG`. The configuration is read on import and then
+saved as part of the internal database status. Later changes to the variable
+have no effect.
+
+Here is an example configuration file:
+
+``` yaml
+normalization:
+    - ":: lower ()"
+    - "ß > 'ss'" # German szet is unimbigiously equal to double ss
+transliteration:
+    - !include /etc/nominatim/icu-rules/extended-unicode-to-asccii.yaml
+    - ":: Ascii ()"
+sanitizers:
+    - step: split-name-list
+token-analysis:
+    - analyzer: generic
+      variants:
+          - !include icu-rules/variants-ca.yaml
+          - words:
+              - road -> rd
+              - bridge -> bdge,br,brdg,bri,brg
+      mutations:
+          - pattern: 'ä'
+            replacements: ['ä', 'ae']
+```
+
+The configuration file contains four sections:
+`normalization`, `transliteration`, `sanitizers` and `token-analysis`.
+
+#### Normalization and Transliteration
+
+The normalization and transliteration sections each define a set of
+ICU rules that are applied to the names.
+
+The **normalisation** rules are applied after sanitation. They should remove
+any information that is not relevant for search at all. Usual rules to be
+applied here are: lower-casing, removing of special characters, cleanup of
+spaces.
+
+The **transliteration** rules are applied at the end of the tokenization
+process to transfer the name into an ASCII representation. Transliteration can
+be useful to allow for further fuzzy matching, especially between different
+scripts.
+
+Each section must contain a list of
+[ICU transformation rules](https://unicode-org.github.io/icu/userguide/transforms/general/rules.html).
+The rules are applied in the order in which they appear in the file.
+You can also include additional rules from external yaml file using the
+`!include` tag. The included file must contain a valid YAML list of ICU rules
+and may again include other files.
+
+!!! warning
+    The ICU rule syntax contains special characters that conflict with the
+    YAML syntax. You should therefore always enclose the ICU rules in
+    double-quotes.
+
+#### Sanitizers
+
+The sanitizers section defines an ordered list of functions that are applied
+to the name and address tags before they are further processed by the tokenizer.
+They allows to clean up the tagging and bring it to a standardized form more
+suitable for building the search index.
+
+!!! hint
+    Sanitizers only have an effect on how the search index is built. They
+    do not change the information about each place that is saved in the
+    database. In particular, they have no influence on how the results are
+    displayed. The returned results always show the original information as
+    stored in the OpenStreetMap database.
+
+Each entry contains information of a sanitizer to be applied. It has a
+mandatory parameter `step` which gives the name of the sanitizer. Depending
+on the type, it may have additional parameters to configure its operation.
+
+The order of the list matters. The sanitizers are applied exactly in the order
+that is configured. Each sanitizer works on the results of the previous one.
+
+The following is a list of sanitizers that are shipped with Nominatim.
+
+##### split-name-list
+
+::: nominatim.tokenizer.sanitizers.split_name_list
+    selection:
+        members: False
+    rendering:
+        heading_level: 6
+
+##### strip-brace-terms
+
+::: nominatim.tokenizer.sanitizers.strip_brace_terms
+    selection:
+        members: False
+    rendering:
+        heading_level: 6
+
+##### tag-analyzer-by-language
+
+::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
+    selection:
+        members: False
+    rendering:
+        heading_level: 6
+
+##### clean-housenumbers
+
+::: nominatim.tokenizer.sanitizers.clean_housenumbers
+    selection:
+        members: False
+    rendering:
+        heading_level: 6
+
+##### clean-postcodes
+
+::: nominatim.tokenizer.sanitizers.clean_postcodes
+    selection:
+        members: False
+    rendering:
+        heading_level: 6
+
+
+#### Token Analysis
+
+Token analyzers take a full name and transform it into one or more normalized
+form that are then saved in the search index. In its simplest form, the
+analyzer only applies the transliteration rules. More complex analyzers
+create additional spelling variants of a name. This is useful to handle
+decomposition and abbreviation.
+
+The ICU tokenizer may use different analyzers for different names. To select
+the analyzer to be used, the name must be tagged with the `analyzer` attribute
+by a sanitizer (see for example the
+[tag-analyzer-by-language sanitizer](#tag-analyzer-by-language)).
+
+The token-analysis section contains the list of configured analyzers. Each
+analyzer must have an `id` parameter that uniquely identifies the analyzer.
+The only exception is the default analyzer that is used when no special
+analyzer was selected. There are analysers with special ids:
+
+ * '@housenumber'. If an analyzer with that name is present, it is used
+   for normalization of house numbers.
+ * '@potcode'. If an analyzer with that name is present, it is used
+   for normalization of postcodes.
+
+Different analyzer implementations may exist. To select the implementation,
+the `analyzer` parameter must be set. The different implementations are
+described in the following.
+
+##### Generic token analyzer
+
+The generic analyzer `generic` is able to create variants from a list of given
+abbreviation and decomposition replacements and introduce spelling variations.
+
+###### Variants
+
+The optional 'variants' section defines lists of replacements which create alternative
+spellings of a name. To create the variants, a name is scanned from left to
+right and the longest matching replacement is applied until the end of the
+string is reached.
+
+The variants section must contain a list of replacement groups. Each group
+defines a set of properties that describes where the replacements are
+applicable. In addition, the word section defines the list of replacements
+to be made. The basic replacement description is of the form:
+
+```
+<source>[,<source>[...]] => <target>[,<target>[...]]
+```
+
+The left side contains one or more `source` terms to be replaced. The right side
+lists one or more replacements. Each source is replaced with each replacement
+term.
+
+!!! tip
+    The source and target terms are internally normalized using the
+    normalization rules given in the configuration. This ensures that the
+    strings match as expected. In fact, it is better to use unnormalized
+    words in the configuration because then it is possible to change the
+    rules for normalization later without having to adapt the variant rules.
+
+###### Decomposition
+
+In its standard form, only full words match against the source. There
+is a special notation to match the prefix and suffix of a word:
+
+``` yaml
+- ~strasse => str  # matches "strasse" as full word and in suffix position
+- hinter~ => hntr  # matches "hinter" as full word and in prefix position
+```
+
+There is no facility to match a string in the middle of the word. The suffix
+and prefix notation automatically trigger the decomposition mode: two variants
+are created for each replacement, one with the replacement attached to the word
+and one separate. So in above example, the tokenization of "hauptstrasse" will
+create the variants "hauptstr" and "haupt str". Similarly, the name "rote strasse"
+triggers the variants "rote str" and "rotestr". By having decomposition work
+both ways, it is sufficient to create the variants at index time. The variant
+rules are not applied at query time.
+
+To avoid automatic decomposition, use the '|' notation:
+
+``` yaml
+- ~strasse |=> str
+```
+
+simply changes "hauptstrasse" to "hauptstr" and "rote strasse" to "rote str".
+
+###### Initial and final terms
+
+It is also possible to restrict replacements to the beginning and end of a
+name:
+
+``` yaml
+- ^south => s  # matches only at the beginning of the name
+- road$ => rd  # matches only at the end of the name
+```
+
+So the first example would trigger a replacement for "south 45th street" but
+not for "the south beach restaurant".
+
+###### Replacements vs. variants
+
+The replacement syntax `source => target` works as a pure replacement. It changes
+the name instead of creating a variant. To create an additional version, you'd
+have to write `source => source,target`. As this is a frequent case, there is
+a shortcut notation for it:
+
+```
+<source>[,<source>[...]] -> <target>[,<target>[...]]
+```
+
+The simple arrow causes an additional variant to be added. Note that
+decomposition has an effect here on the source as well. So a rule
+
+``` yaml
+- "~strasse -> str"
+```
+
+means that for a word like `hauptstrasse` four variants are created:
+`hauptstrasse`, `haupt strasse`, `hauptstr` and `haupt str`.
+
+###### Mutations
+
+The 'mutation' section in the configuration describes an additional set of
+replacements to be applied after the variants have been computed.
+
+Each mutation is described by two parameters: `pattern` and `replacements`.
+The pattern must contain a single regular expression to search for in the
+variant name. The regular expressions need to follow the syntax for
+[Python regular expressions](file:///usr/share/doc/python3-doc/html/library/re.html#regular-expression-syntax).
+Capturing groups are not permitted.
+`replacements` must contain a list of strings that the pattern
+should be replaced with. Each occurrence of the pattern is replaced with
+all given replacements. Be mindful of combinatorial explosion of variants.
+
+###### Modes
+
+The generic analyser supports a special mode `variant-only`. When configured
+then it consumes the input token and emits only variants (if any exist). Enable
+the mode by adding:
+
+```
+  mode: variant-only
+```
+
+to the analyser configuration.
+
+##### Housenumber token analyzer
+
+The analyzer `housenumbers` is purpose-made to analyze house numbers. It
+creates variants with optional spaces between numbers and letters. Thus,
+house numbers of the form '3 a', '3A', '3-A' etc. are all considered equivalent.
+
+The analyzer cannot be customized.
+
+##### Postcode token analyzer
+
+The analyzer `postcodes` is pupose-made to analyze postcodes. It supports
+a 'lookup' varaint of the token, which produces variants with optional
+spaces. Use together with the clean-postcodes sanitizer.
+
+The analyzer cannot be customized.
+
+### Reconfiguration
+
+Changing the configuration after the import is currently not possible, although
+this feature may be added at a later time.
--- a/docs/data-sources/overview.md
+++ b/docs/data-sources/overview.md
@@ -1,4 +0,0 @@
-# Additional Data Sources
-
-This guide explains how data sources other than OpenStreetMap mentioned in
-the install instructions got obtained and converted.
--- a/docs/develop/Database-Layout.md
+++ b/docs/develop/Database-Layout.md
@@ -0,0 +1,167 @@
+# Database Layout
+
+### Import tables
+
+OSM data is initially imported using [osm2pgsql](https://osm2pgsql.org).
+Nominatim uses its own data output style 'gazetteer', which differs from the
+output style created for map rendering.
+
+The import process creates the following tables:
+
+![osm2pgsql tables](osm2pgsql-tables.svg)
+
+The `planet_osm_*` tables are the usual backing tables for OSM data. Note
+that Nominatim uses them to look up special relations and to find nodes on
+ways.
+
+The gazetteer style produces a single table `place` as output with the following
+columns:
+
+ * `osm_type` - kind of OSM object (**N** - node, **W** - way, **R** - relation)
+ * `osm_id` - original OSM ID
+ * `class` - key of principal tag defining the object type
+ * `type` - value of principal tag defining the object type
+ * `name` - collection of tags that contain a name or reference
+ * `admin_level` - numerical value of the tagged administrative level
+ * `address` - collection of tags defining the address of an object
+ * `extratags` - collection of additional interesting tags that are not
+                 directly relevant for searching
+ * `geometry` - geometry of the object (in WGS84)
+
+A single OSM object may appear multiple times in this table when it is tagged
+with multiple tags that may constitute a principal tag. Take for example a
+motorway bridge. In OSM, this would be a way which is tagged with
+`highway=motorway` and `bridge=yes`. This way would appear in the `place` table
+once with `class` of `highway` and once with a `class` of `bridge`. Thus the
+*unique key* for `place` is (`osm_type`, `osm_id`, `class`).
+
+How raw OSM tags are mapped to the columns in the place table is to a certain
+degree configurable. See [Customizing Import Styles](../customize/Import-Styles.md)
+for more information.
+
+### Search tables
+
+The following tables carry all information needed to do the search:
+
+![search tables](search-tables.svg)
+
+The **placex** table is the central table that saves all information about the
+searchable places in Nominatim. The basic columns are the same as for the
+place table and have the same meaning. The placex tables adds the following
+additional columns:
+
+ * `place_id` - the internal unique ID to identify the place
+ * `partition` - the id to use with partitioned tables (see below)
+ * `geometry_sector` - a location hash used for geographically close ordering
+ * `parent_place_id` - the next higher place in the address hierarchy, only
+   relevant for POI-type places (with rank 30)
+ * `linked_place_id` - place ID of the place this object has been merged with.
+   When this ID is set, then the place is invisible for search.
+ * `importance` - measure how well known the place is
+ * `rank_search`, `rank_address` - search and address rank (see [Customizing ranking](../customize/Ranking.md)
+ * `wikipedia` - the wikipedia page used for computing the importance of the place
+ * `country_code` - the country the place is located in
+ * `housenumber` - normalized housenumber, if the place has one
+ * `postcode` - computed postcode for the place
+ * `indexed_status` - processing status of the place (0 - ready, 1 - freshly inserted, 2 - needs updating, 100 - needs deletion)
+ * `indexed_date` - timestamp when the place was processed last
+ * `centroid` - a point feature for the place
+
+The **location_property_osmline** table is a special table for
+[address interpolations](https://wiki.openstreetmap.org/wiki/Addresses#Using_interpolation).
+The columns have the same meaning and use as the columns with the same name in
+the placex table. Only three columns are special:
+
+ * `startnumber` and `endnumber` - beginning and end of the number range
+    for the interpolation
+ * `interpolationtype` - a string `odd`, `even` or `all` to indicate
+    the interval between the numbers
+
+Address interpolations are always ways in OSM, which is why there is no column
+`osm_type`.
+
+The **location_postcode** table holds computed centroids of all postcodes that
+can be found in the OSM data. The meaning of the columns is again the same
+as that of the placex table.
+
+Every place needs an address, a set of surrounding places that describe the
+location of the place. The set of address places is made up of OSM places
+themselves. The **place_addressline** table cross-references for each place
+all the places that make up its address. Two columns define the address
+relation:
+
+  * `place_id` - reference to the place being addressed
+  * `address_place_id` - reference to the place serving as an address part
+
+The most of the columns cache information from the placex entry of the address
+part. The exceptions are:
+
+  * `fromarea` - is true if the address part has an area geometry and can
+    therefore be considered preceise
+  * `isaddress` - is true if the address part should show up in the address
+    output. Sometimes there are multiple places competing for for same address
+    type (e.g. multiple cities) and this field resolves the tie.
+
+The **search_name** table contains the search index proper. It saves for each
+place the terms with which the place can be found. The terms are split into
+the name itself and all terms that make up the address. The table mirrors some
+of the columns from placex for faster lookup.
+
+Search terms are not saved as strings. Each term is assigned an integer and those
+integers are saved in the name and address vectors of the search_name table. The
+**word** table serves as the lookup table from string to such a word ID. The
+exact content of the word table depends on the [tokenizer](Tokenizers.md) used.
+
+## Address computation tables
+
+Next to the main search tables, there is a set of secondary helper tables used
+to compute the address relations between places. These tables are partitioned.
+Each country is assigned a partition number in the country_name table (see
+below) and the data is then split between a set of tables, one for each
+partition. Note that Nominatim still manually manages partitioned tables.
+Native support for partitions in PostgreSQL only became usable with version 13.
+It will be a little while before Nominatim drops support for older versions.
+
+![address tables](address-tables.svg)
+
+The **search_name_X** tables are used to look up streets that appear in the
+`addr:street` tag.
+
+The **location_area_large_X** tables are used to look up larger areas
+(administrative boundaries and place nodes) either through their geographic
+closeness or through `addr:*` entries.
+
+The **location_road_X** tables are used to find the closest street for a
+dependent place.
+
+All three table cache specific information from the placex table for their
+selected subset of places:
+
+ * `keywords` and `name_vector` contain lists of term ids (from the word table)
+   that the full name of the place should match against
+ * `isguess` is true for places that are not described by an area
+
+All other columns reflect their counterpart in the placex table.
+
+## Static data tables
+
+Nominatim also creates a number of static tables at import:
+
+ * `nominatim_properties` saves settings that must not be changed after
+    import
+ * `address_levels` save the rank information from the
+   [ranking configuration](../customize/Ranking.md)
+ * `country_name` contains a fallback of names for all countries, their
+   default languages and saves the assignment of countries to partitions.
+ * `country_osm_grid` provides a fallback for country geometries
+
+## Auxiliary data tables
+
+Finally there are some table for auxiliary data:
+
+ * `location_property_tiger` - saves housenumber from the Tiger import. Its
+   layout is similar to that of `location_propoerty_osmline`.
+ * `place_class_*` tables are helper tables to facilitate lookup of POIs
+   by their class and type. They exist because it is not possible to create
+   combined indexes with geometries.
+
--- a/docs/develop/Development-Environment.md
+++ b/docs/develop/Development-Environment.md
@@ -0,0 +1,133 @@
+# Setting up Nominatim for Development
+
+This chapter gives an overview how to set up Nominatim for development
+and how to run tests.
+
+!!! Important
+    This guide assumes that you develop under the latest version of Ubuntu. You
+    can of course also use your favourite distribution. You just might have to
+    adapt the commands below slightly, in particular the commands for installing
+    additional software.
+
+## Installing Nominatim
+
+The first step is to install Nominatim itself. Please follow the installation
+instructions in the [Admin section](../admin/Installation.md). You don't need
+to set up a webserver for development, the webserver that is included with PHP
+is sufficient.
+
+If you want to run Nominatim in a VM via Vagrant, use the default `ubuntu` setup.
+Vagrant's libvirt provider runs out-of-the-box under Ubuntu. You also need to
+install an NFS daemon to enable directory sharing between host and guest. The
+following packages should get you started:
+
+    sudo apt install vagrant vagrant-libvirt libvirt-daemon nfs-kernel-server
+
+## Prerequisites for testing and documentation
+
+The Nominatim test suite consists of behavioural tests (using behave) and
+unit tests (using PHPUnit for PHP code and pytest for Python code).
+It has the following additional requirements:
+
+* [behave test framework](https://behave.readthedocs.io) >= 1.2.6
+* [phpunit](https://phpunit.de) (9.5 is known to work)
+* [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer)
+* [Pylint](https://pylint.org/) (CI always runs the latest version from pip)
+* [mypy](http://mypy-lang.org/) (plus typing information for external libs)
+* [Python Typing Extensions](https://github.com/python/typing_extensions) (for Python < 3.9)
+* [pytest](https://pytest.org)
+
+The documentation is built with mkdocs:
+
+* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
+* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.16
+* [mkdocstrings-python-legacy](https://mkdocstrings.github.io/python-legacy/)
+
+### Installing prerequisites on Ubuntu/Debian
+
+Some of the Python packages require the newest version which is not yet
+available with the current distributions. Therefore it is recommended to
+install pip to get the newest versions.
+
+To install all necessary packages run:
+
+```sh
+sudo apt install php-cgi phpunit php-codesniffer \
+                 python3-pip python3-setuptools python3-dev
+
+pip3 install --user behave mkdocs mkdocstrings pytest pylint \
+                    mypy types-PyYAML types-jinja2 types-psycopg2 types-psutil
+```
+
+The `mkdocs` executable will be located in `.local/bin`. You may have to add
+this directory to your path, for example by running:
+
+```
+echo 'export PATH=~/.local/bin:$PATH' > ~/.profile
+```
+
+If your distribution does not have PHPUnit 7.3+, you can install it (as well
+as CodeSniffer) via composer:
+
+```
+sudo apt-get install composer
+composer global require "squizlabs/php_codesniffer=*"
+composer global require "phpunit/phpunit=8.*"
+```
+
+The binaries are found in `.config/composer/vendor/bin`. You need to add this
+to your PATH as well:
+
+```
+echo 'export PATH=~/.config/composer/vendor/bin:$PATH' > ~/.profile
+```
+
+
+## Executing Tests
+
+All tests are located in the `/test` directory.
+
+To run all tests just go to the build directory and run make:
+
+```sh
+cd build
+make test
+```
+
+For more information about the structure of the tests and how to change and
+extend the test suite, see the [Testing chapter](Testing.md).
+
+## Documentation Pages
+
+The [Nominatim documentation](https://nominatim.org/release-docs/develop/) is
+built using the [MkDocs](https://www.mkdocs.org/) static site generation
+framework. The master branch is automatically deployed every night on
+[https://nominatim.org/release-docs/develop/](https://nominatim.org/release-docs/develop/)
+
+To build the documentation, go to the build directory and run
+
+```
+make doc
+INFO - Cleaning site directory
+INFO - Building documentation to directory: /home/vagrant/build/site-html
+```
+
+This runs `mkdocs build` plus extra transformation of some files and adds
+symlinks (see `CMakeLists.txt` for the exact steps).
+
+Now you can start webserver for local testing
+
+```
+build> make serve-doc
+[server:296] Serving on http://127.0.0.1:8000
+[handlers:62] Start watching changes
+```
+
+If you develop inside a Vagrant virtual machine, use a port that is forwarded
+to your host:
+
+```
+build> PYTHONPATH=$SRCDIR mkdocs serve --dev-addr 0.0.0.0:8088
+[server:296] Serving on http://0.0.0.0:8088
+[handlers:62] Start watching changes
+```
--- a/docs/develop/Documentation.md
+++ b/docs/develop/Documentation.md
@@ -1,36 +0,0 @@
-# Documentation Pages
-
-The [Nominatim documentation](https://nominatim.org/release-docs/develop/) is built using the [MkDocs](https://www.mkdocs.org/) static site generation framework. The master branch is automatically deployed every night on under [https://nominatim.org/release-docs/develop/]()
-
-To preview local changes:
-
-1. Install MkDocs
-
-   ```
-   pip3 install --user mkdocs 
-   ```
-
-
-2. In build directory run
-
-   ```
-   make doc
-   INFO - Cleaning site directory
-   INFO - Building documentation to directory: /home/vagrant/build/site-html 
-   ```
-
-   This runs `mkdocs build` plus extra transformion of some files and adds symlinks (see `CMakeLists.txt` for the exact steps).
-
-
-3. Start webserver for local testing
-
-   ```
-   mkdocs serve
-   [server:296] Serving on http://127.0.0.1:8000
-   [handlers:62] Start watching changes
-   ```
-
-   If you develop inside a Vagrant virtual machine:
-   * add port forwarding to your Vagrantfile, e.g. `config.vm.network "forwarded_port", guest: 8000, host: 8000`
-   * use `mkdocs serve --dev-addr 0.0.0.0:8000` because the default localhost
-      IP does not get forwarded.
--- a/docs/develop/ICU-Tokenizer-Modules.md
+++ b/docs/develop/ICU-Tokenizer-Modules.md
@@ -0,0 +1,227 @@
+# Writing custom sanitizer and token analysis modules for the ICU tokenizer
+
+The [ICU tokenizer](../customize/Tokenizers.md#icu-tokenizer) provides a
+highly customizable method to pre-process and normalize the name information
+of the input data before it is added to the search index. It comes with a
+selection of sanitizers and token analyzers which you can use to adapt your
+installation to your needs. If the provided modules are not enough, you can
+also provide your own implementations. This section describes the API
+of sanitizers and token analysis.
+
+!!! warning
+    This API is currently in early alpha status. While this API is meant to
+    be a public API on which other sanitizers and token analyzers may be
+    implemented, it is not guaranteed to be stable at the moment.
+
+
+## Using non-standard sanitizers and token analyzers
+
+Sanitizer names (in the `step` property) and token analysis names (in the
+`analyzer`) may refer to externally supplied modules. There are two ways
+to include external modules: through a library or from the project directory.
+
+To include a module from a library, use the absolute import path as name and
+make sure the library can be found in your PYTHONPATH.
+
+To use a custom module without creating a library, you can put the module
+somewhere in your project directory and then use the relative path to the
+file. Include the whole name of the file including the `.py` ending.
+
+## Custom sanitizer modules
+
+A sanitizer module must export a single factory function `create` with the
+following signature:
+
+``` python
+def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]
+```
+
+The function receives the custom configuration for the sanitizer and must
+return a callable (function or class) that transforms the name and address
+terms of a place. When a place is processed, then a `ProcessInfo` object
+is created from the information that was queried from the database. This
+object is sequentially handed to each configured sanitizer, so that each
+sanitizer receives the result of processing from the previous sanitizer.
+After the last sanitizer is finished, the resulting name and address lists
+are forwarded to the token analysis module.
+
+Sanitizer functions are instantiated once and then called for each place
+that is imported or updated. They don't need to be thread-safe.
+If multi-threading is used, each thread creates their own instance of
+the function.
+
+### Sanitizer configuration
+
+::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
+    rendering:
+        show_source: no
+        heading_level: 6
+
+### The main filter function of the sanitizer
+
+The filter function receives a single object of type `ProcessInfo`
+which has with three members:
+
+ * `place`: read-only information about the place being processed.
+   See PlaceInfo below.
+ * `names`: The current list of names for the place. Each name is a
+   PlaceName object.
+ * `address`: The current list of address names for the place. Each name
+   is a PlaceName object.
+
+While the `place` member is provided for information only, the `names` and
+`address` lists are meant to be manipulated by the sanitizer. It may add and
+remove entries, change information within a single entry (for example by
+adding extra attributes) or completely replace the list with a different one.
+
+#### PlaceInfo - information about the place
+
+::: nominatim.data.place_info.PlaceInfo
+    rendering:
+        show_source: no
+        heading_level: 6
+
+
+#### PlaceName - extended naming information
+
+::: nominatim.data.place_name.PlaceName
+    rendering:
+        show_source: no
+        heading_level: 6
+
+
+### Example: Filter for US street prefixes
+
+The following sanitizer removes the directional prefixes from street names
+in the US:
+
+``` python
+import re
+
+def _filter_function(obj):
+    if obj.place.country_code == 'us' \
+       and obj.place.rank_address >= 26 and obj.place.rank_address <= 27:
+        for name in obj.names:
+            name.name = re.sub(r'^(north|south|west|east) ',
+                               '',
+                               name.name,
+                               flags=re.IGNORECASE)
+
+def create(config):
+    return _filter_function
+```
+
+This is the most simple form of a sanitizer module. If defines a single
+filter function and implements the required `create()` function by returning
+the filter.
+
+The filter function first checks if the object is interesting for the
+sanitizer. Namely it checks if the place is in the US (through `country_code`)
+and it the place is a street (a `rank_address` of 26 or 27). If the
+conditions are met, then it goes through all available names and
+removes any leading directional prefix using a simple regular expression.
+
+Save the source code in a file in your project directory, for example as
+`us_streets.py`. Then you can use the sanitizer in your `icu_tokenizer.yaml`:
+
+``` yaml
+...
+sanitizers:
+    - step: us_streets.py
+...
+```
+
+!!! warning
+    This example is just a simplified show case on how to create a sanitizer.
+    It is not really read for real-world use: while the sanitizer would
+    correcly transform `West 5th Street` into `5th Street`. it would also
+    shorten a simple `North Street` to `Street`.
+
+For more sanitizer examples, have a look at the sanitizers provided by Nominatim.
+They can be found in the directory
+[`nominatim/tokenizer/sanitizers`](https://github.com/osm-search/Nominatim/tree/master/nominatim/tokenizer/sanitizers).
+
+
+## Custom token analysis module
+
+::: nominatim.tokenizer.token_analysis.base.AnalysisModule
+    rendering:
+        show_source: no
+        heading_level: 6
+
+
+::: nominatim.tokenizer.token_analysis.base.Analyzer
+    rendering:
+        show_source: no
+        heading_level: 6
+
+### Example: Creating acronym variants for long names
+
+The following example of a token analysis module creates acronyms from
+very long names and adds them as a variant:
+
+``` python
+class AcronymMaker:
+    """ This class is the actual analyzer.
+    """
+    def __init__(self, norm, trans):
+        self.norm = norm
+        self.trans = trans
+
+
+    def get_canonical_id(self, name):
+        # In simple cases, the normalized name can be used as a canonical id.
+        return self.norm.transliterate(name.name).strip()
+
+
+    def compute_variants(self, name):
+        # The transliterated form of the name always makes up a variant.
+        variants = [self.trans.transliterate(name)]
+
+        # Only create acronyms from very long words.
+        if len(name) > 20:
+            # Take the first letter from each word to form the acronym.
+            acronym = ''.join(w[0] for w in name.split())
+            # If that leds to an acronym with at least three letters,
+            # add the resulting acronym as a variant.
+            if len(acronym) > 2:
+                # Never forget to transliterate the variants before returning them.
+                variants.append(self.trans.transliterate(acronym))
+
+        return variants
+
+# The following two functions are the module interface.
+
+def configure(rules, normalizer, transliterator):
+    # There is no configuration to parse and no data to set up.
+    # Just return an empty configuration.
+    return None
+
+
+def create(normalizer, transliterator, config):
+    # Return a new instance of our token analysis class above.
+    return AcronymMaker(normalizer, transliterator)
+```
+
+Given the name `Trans-Siberian Railway`, the code above would return the full
+name `Trans-Siberian Railway` and the acronym `TSR` as variant, so that
+searching would work for both.
+
+## Sanitizers vs. Token analysis - what to use for variants?
+
+It is not always clear when to implement variations in the sanitizer and
+when to write a token analysis module. Just take the acronym example
+above: it would also have been possible to write a sanitizer which adds the
+acronym as an additional name to the name list. The result would have been
+similar. So which should be used when?
+
+The most important thing to keep in mind is that variants created by the
+token analysis are only saved in the word lookup table. They do not need
+extra space in the search index. If there are many spelling variations, this
+can mean quite a significant amount of space is saved.
+
+When creating additional names with a sanitizer, these names are completely
+independent. In particular, they can be fed into different token analysis
+modules. This gives a much greater flexibility but at the price that the
+additional names increase the size of the search index.
+
--- a/docs/develop/Import.md
+++ b/docs/develop/Import.md
@@ -1,170 +0,0 @@
-# OSM Data Import
-
-OSM data is initially imported using osm2pgsql. Nominatim uses its own data
-output style 'gazetteer', which differs from the output style created for
-map rendering.
-
-## Database Layout
-
-The gazetteer style produces a single table `place` with the following rows:
-
- * `osm_type` - kind of OSM object (**N** - node, **W** - way, **R** - relation)
- * `osm_id` - original OSM ID
- * `class` - key of principal tag defining the object type
- * `type` - value of principal tag defining the object type
- * `name` - collection of tags that contain a name or reference
- * `admin_level` - numerical value of the tagged administrative level
- * `address` - collection of tags defining the address of an object
- * `extratags` - collection of additional interesting tags that are not
-                 directly relevant for searching
- * `geometry` - geometry of the object (in WGS84)
-
-A single OSM object may appear multiple times in this table when it is tagged
-with multiple tags that may constitute a principal tag. Take for example a
-motorway bridge. In OSM, this would be a way which is tagged with
-`highway=motorway` and `bridge=yes`. This way would appear in the `place` table
-once with `class` of `highway` and once with a `class` of `bridge`. Thus the
-*unique key* for `place` is (`osm_type`, `osm_id`, `class`).
-
-## Configuring the Import
-
-How tags are interpreted and assigned to the different `place` columns can be
-configured via the import style configuration file (`CONST_Import_style`). This
-is a JSON file which contains a list of rules which are matched against every
-tag of every object and then assign the tag its specific role.
-
-### Configuration Rules
-
-A single rule looks like this:
-
-```json
-{
-    "keys" : ["key1", "key2", ...],
-    "values" : {
-        "value1" : "prop",
-        "value2" : "prop1,prop2"
-    }
-}
-```
-
-A rule first defines a list of keys to apply the rule to. This is always a list
-of strings. The string may have four forms. An empty string matches against
-any key. A string that ends in an asterisk `*` is a prefix match and accordingly
-matches against any key that starts with the given string (minus the `*`). A
-suffix match can be defined similarly with a string that starts with a `*`. Any
-other string constitutes an exact match.
-
-The second part of the rules defines a list of values and the properties that
-apply to a successful match. Value strings may be either empty, which
-means that they match any value, or describe an exact match. Prefix
-or suffix matching of values is not possible.
-
-For a rule to match, it has to find a valid combination of keys and values. The
-resulting property is that of the matched values.
-
-The rules in a configuration file are processed sequentially and the first
-match for each tag wins.
-
-A rule where key and value are the empty string is special. This defines the
-fallback when none of the rules match. The fallback is always used as a last
-resort when nothing else matches, no matter where the rule appears in the file.
-Defining multiple fallback rules is not allowed. What happens in this case,
-is undefined.
-
-### Tag Properties
-
-One or more of the following properties may be given for each tag:
-
-* `main`
-
-    A principal tag. A new row will be added for the object with key and value
-    as `class` and `type`.
-
-* `with_name`
-
-    When the tag is a principal tag (`main` property set): only really add a new
-    row, if there is any name tag found (a reference tag is not sufficient, see
-    below).
-
-* `with_name_key`
-
-    When the tag is a principal tag (`main` property set): only really add a new
-    row, if there is also a name tag that matches the key of the principal tag.
-    For example, if the main tag is `bridge=yes`, then it will only be added as
-    an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
-    If this property is set, all other names that are not domain-specific are
-    ignored.
-
-* `fallback`
-
-    When the tag is a principal tag (`main` property set): only really add a new
-    row, when no other principal tags for this object have been found. Only one
-    fallback tag can win for an object.
-
-* `operator`
-
-    When the tag is a principal tag (`main` property set): also include the
-    `operator` tag in the list of names. This is a special construct for an
-    out-dated tagging practise in OSM. Fuel stations and chain restaurants
-    in particular used to have the name of the chain tagged as `operator`.
-    These days the chain can be more commonly found in the `brand` tag but
-    there is still enough old data around to warrant this special case.
-
-* `name`
-
-    Add tag to the list of names.
-
-* `ref`
-
-    Add tag to the list of names as a reference. At the moment this only means
-    that the object is not considered to be named for `with_name`.
-
-* `address`
-
-    Add tag to the list of address tags. If the tag starts with `addr:` or
-    `is_in:`, then this prefix is cut off before adding it to the list.
-
-* `postcode`
-
-    Add the value as a postcode to the address tags. If multiple tags are
-    candidate for postcodes, one wins out and the others are dropped.
-
-* `country`
-
-    Add the value as a country code to the address tags. The value must be a
-    two letter country code, otherwise it is ignored. If there are multiple
-    tags that match, then one wins out and the others are dropped.
-
-* `house`
-
-    If no principle tags can be found for the object, still add the object with
-    `class`=`place` and `type`=`house`. Use this for address nodes that have no
-    other function.
-
-* `interpolation`
-
-    Add this object as an address interpolation (appears as `class`=`place` and
-    `type`=`houses` in the database).
-
-* `extra`
-
-    Add tag to the list of extra tags.
-
-* `skip`
-
-    Skip the tag completely. Useful when a custom default fallback is defined
-    or to define exceptions to rules.
-
-A rule can define as many of these properties for one match as it likes. For
-example, if the property is `"main,extra"` then the tag will open a new row
-but also have the tag appear in the list of extra tags.
-
-There are a number of pre-defined styles in the `settings/` directory. It is
-advisable to start from one of these styles when defining your own.
-
-### Changing the Style of Existing Databases
-
-There is normally no issue changing the style of a database that is already
-imported and now kept up-to-date with change files. Just be aware that any
-change in the style applies to updates only. If you want to change the data
-that is already in the database, then a reimport is necessary.
--- a/docs/develop/Indexing.md
+++ b/docs/develop/Indexing.md
@@ -0,0 +1,152 @@
+# Indexing Places
+
+In Nominatim, the word __indexing__ refers to the process that takes the raw
+OpenStreetMap data from the place table, enriches it with address information
+and creates the search indexes. This section explains the basic data flow.
+
+
+## Initial import
+
+After osm2pgsql has loaded the raw OSM data into the place table,
+the data is copied to the final search tables placex and location_property_osmline.
+While they are copied, some basic properties are added:
+
+ * country_code, geometry_sector and partition
+ * initial search and address rank
+
+In addition the column `indexed_status` is set to `1` marking the place as one
+that needs to be indexed.
+
+All this happens in the triggers `placex_insert` and `osmline_insert`.
+
+## Indexing
+
+The main work horse of the data import is the indexing step, where Nominatim
+takes every place from the placex and location_property_osmline tables where
+the indexed_status != 0 and computes the search terms and the address parts
+of the place.
+
+The indexing happens in three major steps:
+
+1. **Data preparation** - The indexer gets the data for the place to be indexed
+   from the database.
+
+2. **Search name processing** - The prepared data is given to the
+   tokenizer which computes the search terms from the names
+   and potentially other information.
+
+3. **Address processing** - The indexer then hands the prepared data and the
+   tokenizer information back to the database via an `INSERT` statement which
+   also sets the indexed_status to `0`. This triggers the update triggers
+   `placex_update`/`osmline_update` which do the work of computing address
+   parts and filling all the search tables.
+
+When computing the address terms of a place, Nominatim relies on the processed
+search names of all the address parts. That is why places are processed in rank
+order, from smallest rank to largest. To ensure correct handling of linked
+place nodes, administrative boundaries are processed before all other places.
+
+Apart from these restrictions, each place can be indexed independently
+from the others. This allows a large degree of parallelization during the indexing.
+It also means that the indexing process can be interrupted at any time and
+will simply pick up where it left of when restarted.
+
+### Data preparation
+
+The data preparation step computes and retrieves all data for a place that
+might be needed for the next step of processing the search name. That includes
+
+* location information (country code)
+* place classification (class, type, ranks)
+* names (including names of linked places)
+* address information (`addr:*` tags)
+
+Data preparation is implemented in pl/PgSQL mostly in the functions
+`placex_indexing_prepare()` and `get_interpolation_address()`.
+
+#### `addr:*` tag inheritance
+
+Nominatim has limited support for inheriting address tags from a building
+to POIs inside the building. This only works when the address tags are on the
+building outline. Any rank 30 object inside such a building or on its outline
+inherits all address tags when it does not have any address tags of its own.
+
+The inheritance is computed in the data preparation step.
+
+### Search name processing
+
+The prepared place information is handed to the tokenizer next. This is a
+Python module responsible for processing the names  from both name and address
+terms and building up the word index from them. The process is explained in
+more detail in the [Tokenizer chapter](Tokenizers.md).
+
+### Address processing
+
+Finally, the preprocessed place information and the results of the search name
+processing are written back to the database. At this point the update trigger
+of the placex/location_property_osmline tables take over and fill all the
+dependent tables. This makes up the most work-intensive part of the indexing.
+
+Nominatim distinguishes between dependent and independent places.
+**Dependent places** are all places on rank 30: house numbers, POIs etc. These
+places don't have a full address of their own. Instead they are attached to
+a parent street or place and use the information of the parent for searching
+and displaying information. Everything else are **independent places**: streets,
+parks, water bodies, suburbs, cities, states etc.  They receive a full address
+on their own.
+
+The address processing for both types of places is very different.
+
+#### Independent places
+
+To compute the address of an independent place Nominatim searches for all
+places that cover the place to compute the address for at least partially.
+For places with an area, that area is used to check for coverage. For place
+nodes an artificial square area is computed according to the rank of
+the place. The lower the rank the lager the area. The `location_area_large_X`
+tables are there to facilitate the lookup. All places that can function as
+the address of another place are saved in those tables.
+
+`addr:*` and `isin:*` tags are taken into account to compute the address, too.
+Nominatim will give preference to places with the same name as in these tags
+when looking for places in the vicinity. If there are no matching place names
+at all, then the tags are at least added to the search index. That means that
+the names will not be shown in the result as the 'address' of the place, but
+searching by them still works.
+
+Independent places are always added to the global search index `search_name`.
+
+#### Dependent places
+
+Dependent places skip the full address computation for performance reasons.
+Instead they just find a parent place to attach themselves to.
+
+![parenting of dependent places](parenting-flow.svg)
+
+By default a POI
+or house number will be attached to the closest street. That can be any major
+or minor street indexed by Nominatim. In the default configuration that means
+that it can attach itself to a footway but only when it has a name.
+
+When the dependent place has an `addr:street` tag, then Nominatim will first
+try to find a street with the same name before falling back to the closest
+street.
+
+There are also addresses in OSM, where the housenumber does not belong
+to a street at all. These have an `addr:place` tag. For these places, Nominatim
+tries to find a place with the given name in the indexed places with an
+address rank between 16 and 25. If none is found, then the dependent place
+is attached to the closest place in that category and the addr:place name is
+added as *unlisted* place, which indicates to Nominatim that it needs to add
+it to the address output, no matter what. This special case is necessary to
+cover addresses that don't really refer to an existing object.
+
+When an address has both the `addr:street` and `addr:place` tag, then Nominatim
+assumes that the `addr:place` tag in fact should be the city part of the address
+and give the POI the usual street number address.
+
+Dependent places are only added to the global search index `search_name` when
+they have either a name themselves or when they have address tags that are not
+covered by the places that make up their address. The latter ensures that
+addresses are always searchable by those address tags.
+
--- a/docs/develop/Ranking.md
+++ b/docs/develop/Ranking.md
@@ -1,90 +0,0 @@
-# Place Ranking in Nominatim
-
-Nominatim uses two metrics to rank a place: search rank and address rank.
-Both can be assigned a value between 0 and 30. They serve slightly
-different purposes, which are explained in this chapter.
-
-## Search rank
-
-The search rank describes the extent and importance of a place. It is used
-when ranking search result. Simply put, if there are two results for a
-search query which are otherwise equal, then the result with the _lower_
-search rank will be appear higher in the result list.
-
-Search ranks are not so important these days because many well-known
-places use the Wikipedia importance ranking instead.
-
-## Address rank
-
-The address rank describes where a place shows up in an address hierarchy.
-Usually only administrative boundaries and place nodes and areas are
-eligible to be part of an address. All other objects have an address rank
-of 0.
-
-Note that the search rank of a place plays a role in the address computation
-as well. When collecting the places that should make up the address parts
-then only places are taken into account that have a lower address rank than
-the search rank of the base object.
-
-## Rank configuration
-
-Search and address ranks are assigned to a place when it is first imported
-into the database. There are a few hard-coded rules for the assignment:
-
- * postcodes follow special rules according to their length
- * boundaries that are not areas and railway=rail are dropped completely
- * the following are always search rank 30 and address rank 0:
-    * highway nodes
-    * landuse that is not an area
-
-Other than that, the ranks can be freely assigned via the JSON file
-defined with `CONST_Address_Level_Config` according to their type and
-the country they are in.
-
-The address level configuration must consist of an array of configuration
-entries, each containing a tag definition and an optional country array:
-
-```
-[ {
-    "tags" : {
-      "place" : {
-        "county" : 12,
-        "city" : 16,
-      },
-      "landuse" : {
-        "residential" : 22,
-        "" : 30
-      }
-    }
-  },
-  {
-    "countries" : [ "ca", "us" ],
-    "tags" : {
-      "boundary" : {
-        "administrative8" : 18,
-        "administrative9" : 20
-      },
-      "landuse" : {
-        "residential" : [22, 0]
-      }
-    }
-  }
-]
-```
-
-The `countries` field contains a list of countries (as ISO 3166-1 alpha 2 code)
-for which the definition applies. When the field is omitted, then the
-definition is used as a fallback, when nothing more specific for a given
-country exists.
-
-`tags` contains the ranks for key/value pairs. The ranks can be either a
-single number, in which case they are the search and address rank, or an array
-of search and address rank (in that order). The value may be left empty.
-Then the rank is used when no more specific value is found for the given
-key.
-
-Countries and key/value combination may appear in multiple definitions. Just
-make sure that each combination of counrty/key/value appears only once per
-file. Otherwise the import will fail with a UNIQUE INDEX constraint violation
-on import.
-
--- a/docs/develop/Testing.md
+++ b/docs/develop/Testing.md
@@ -0,0 +1,159 @@
+# Nominatim Test Suite
+
+This chapter describes the tests in the `/test` directory, how they are
+structured and how to extend them. For a quick introduction on how to run
+the tests, see the [Development setup chapter](Development-Environment.md).
+
+## Overall structure
+
+There are two kind of tests in this test suite. There are functional tests
+which test the API interface using a BDD test framework and there are unit
+tests for specific PHP functions.
+
+This test directory is sturctured as follows:
+
+```
+ -+-   bdd         Functional API tests
+  | \
+  | +-  steps      Step implementations for test descriptions
+  | +-  osm2pgsql  Tests for data import via osm2pgsql
+  | +-  db         Tests for internal data processing on import and update
+  | +-  api        Tests for API endpoints (search, reverse, etc.)
+  |
+  +-   php         PHP unit tests
+  +-   python      Python unit tests
+  +-   testdb      Base data for generating API test database
+  +-   testdata    Additional test data used by unit tests
+```
+
+## PHP Unit Tests (`test/php`)
+
+Unit tests for PHP code can be found in the `php/` directory. They test selected
+PHP functions. Very low coverage.
+
+To execute the test suite run
+
+    cd test/php
+    UNIT_TEST_DSN='pgsql:dbname=nominatim_unit_tests' phpunit ../
+
+It will read phpunit.xml which points to the library, test path, bootstrap
+strip and sets other parameters.
+
+It will use (and destroy) a local database 'nominatim_unit_tests'. You can set
+a different connection string with e.g. UNIT_TEST_DSN='pgsql:dbname=foo_unit_tests'.
+
+## Python Unit Tests (`test/python`)
+
+Unit tests for Python code can be found in the `python/` directory. The goal is
+to have complete coverage of the Python library in `nominatim`.
+
+To execute the tests run
+
+    py.test-3 test/python
+
+or
+
+    pytest test/python
+
+The name of the pytest binary depends on your installation.
+
+## BDD Functional Tests (`test/bdd`)
+
+Functional tests are written as BDD instructions. For more information on
+the philosophy of BDD testing, see the
+[Behave manual](http://pythonhosted.org/behave/philosophy.html).
+
+The following explanation assume that the reader is familiar with the BDD
+notations of features, scenarios and steps.
+
+All possible steps can be found in the `steps` directory and should ideally
+be documented.
+
+### General Usage
+
+To run the functional tests, do
+
+    cd test/bdd
+    behave
+
+The tests can be configured with a set of environment variables (`behave -D key=val`):
+
+ * `BUILDDIR` - build directory of Nominatim installation to test
+ * `TEMPLATE_DB` - name of template database used as a skeleton for
+                   the test databases (db tests)
+ * `TEST_DB` - name of test database (db tests)
+ * `API_TEST_DB` - name of the database containing the API test data (api tests)
+ * `API_TEST_FILE` - OSM file to be imported into the API test database (api tests)
+ * `DB_HOST` - (optional) hostname of database host
+ * `DB_PORT` - (optional) port of database on host
+ * `DB_USER` - (optional) username of database login
+ * `DB_PASS` - (optional) password for database login
+ * `SERVER_MODULE_PATH` - (optional) path on the Postgres server to Nominatim
+                          module shared library file
+ * `REMOVE_TEMPLATE` - if true, the template and API database will not be reused
+                       during the next run. Reusing the base templates speeds
+                       up tests considerably but might lead to outdated errors
+                       for some changes in the database layout.
+ * `KEEP_TEST_DB` - if true, the test database will not be dropped after a test
+                    is finished. Should only be used if one single scenario is
+                    run, otherwise the result is undefined.
+
+Logging can be defined through command line parameters of behave itself. Check
+out `behave --help` for details. Also have a look at the 'work-in-progress'
+feature of behave which comes in handy when writing new tests.
+
+### API Tests (`test/bdd/api`)
+
+These tests are meant to test the different API endpoints and their parameters.
+They require to import several datasets into a test database. This is normally
+done automatically during setup of the test. The API test database is then
+kept around and reused in subsequent runs of behave. Use `behave -DREMOVE_TEMPLATE`
+to force a reimport of the database.
+
+The official test dataset is saved in the file `test/testdb/apidb-test-data.pbf`
+and compromises the following data:
+
+ * Geofabrik extract of Liechtenstein
+ * extract of Autauga country, Alabama, US (for tests against Tiger data)
+ * additional data from `test/testdb/additional_api_test.data.osm`
+
+API tests should only be testing the functionality of the website PHP code.
+Most tests should be formulated as BDD DB creation tests (see below) instead.
+
+#### Code Coverage
+
+The API tests also support code coverage tests. You need to install
+[PHP_CodeCoverage](https://github.com/sebastianbergmann/php-code-coverage).
+On Debian/Ubuntu run:
+
+    apt-get install php-codecoverage php-xdebug
+
+Then run the API tests as follows:
+
+    behave api -DPHPCOV=<coverage output dir>
+
+The output directory must be an absolute path. To generate reports, you can use
+the [phpcov](https://github.com/sebastianbergmann/phpcov) tool:
+
+    phpcov merge --html=<report output dir> <coverage output dir>
+
+### DB Creation Tests (`test/bdd/db`)
+
+These tests check the import and update of the Nominatim database. They do not
+test the correctness of osm2pgsql. Each test will write some data into the `place`
+table (and optionally the `planet_osm_*` tables if required) and then run
+Nominatim's processing functions on that.
+
+These tests need to create their own test databases. By default they will be
+called `test_template_nominatim` and `test_nominatim`. Names can be changed with
+the environment variables `TEMPLATE_DB` and `TEST_DB`. The user running the tests
+needs superuser rights for postgres.
+
+### Import Tests (`test/bdd/osm2pgsql`)
+
+These tests check that data is imported correctly into the place table. They
+use the same template database as the DB Creation tests, so the same remarks apply.
+
+Note that most testing of the gazetteer output of osm2pgsql is done in the tests
+of osm2pgsql itself. The BDD tests are just there to ensure compatibility of
+the osm2pgsql and Nominatim code.
--- a/docs/develop/Tokenizers.md
+++ b/docs/develop/Tokenizers.md
@@ -0,0 +1,332 @@
+# Tokenizers
+
+The tokenizer is the component of Nominatim that is responsible for
+analysing names of OSM objects and queries. Nominatim provides different
+tokenizers that use different strategies for normalisation. This page describes
+how tokenizers are expected to work and the public API that needs to be
+implemented when creating a new tokenizer. For information on how to configure
+a specific tokenizer for a database see the
+[tokenizer chapter in the Customization Guide](../customize/Tokenizers.md).
+
+## Generic Architecture
+
+### About Search Tokens
+
+Search in Nominatim is organised around search tokens. Such a token represents
+string that can be part of the search query. Tokens are used so that the search
+index does not need to be organised around strings. Instead the database saves
+for each place which tokens match this place's name, address, house number etc.
+To be able to distinguish between these different types of information stored
+with the place, a search token also always has a certain type: name, house number,
+postcode etc.
+
+During search an incoming query is transformed into a ordered list of such
+search tokens (or rather many lists, see below) and this list is then converted
+into a database query to find the right place.
+
+It is the core task of the tokenizer to create, manage and assign the search
+tokens. The tokenizer is involved in two distinct operations:
+
+* __at import time__: scanning names of OSM objects, normalizing them and
+  building up the list of search tokens.
+* __at query time__: scanning the query and returning the appropriate search
+  tokens.
+
+
+### Importing
+
+The indexer is responsible to enrich an OSM object (or place) with all data
+required for geocoding. It is split into two parts: the controller collects
+the places that require updating, enriches the place information as required
+and hands the place to Postgresql. The collector is part of the Nominatim
+library written in Python. Within Postgresql, the `placex_update`
+trigger is responsible to fill out all secondary tables with extra geocoding
+information. This part is written in PL/pgSQL.
+
+The tokenizer is involved in both parts. When the indexer prepares a place,
+it hands it over to the tokenizer to inspect the names and create all the
+search tokens applicable for the place. This usually involves updating the
+tokenizer's internal token lists and creating a list of all token IDs for
+the specific place. This list is later needed in the PL/pgSQL part where the
+indexer needs to add the token IDs to the appropriate search tables. To be
+able to communicate the list between the Python part and the pl/pgSQL trigger,
+the `placex` table contains a special JSONB column `token_info` which is there
+for the exclusive use of the tokenizer.
+
+The Python part of the tokenizer returns a structured information about the
+tokens of a place to the indexer which converts it to JSON and inserts it into
+the `token_info` column. The content of the column is then handed to the PL/pqSQL
+callbacks of the tokenizer which extracts the required information. Usually
+the tokenizer then removes all information from the `token_info` structure,
+so that no information is ever persistently saved in the table. All information
+that went in should have been processed after all and put into secondary tables.
+This is however not a hard requirement. If the tokenizer needs to store
+additional information about a place permanently, it may do so in the
+`token_info` column. It just may never execute searches over it and
+consequently not create any special indexes on it.
+
+### Querying
+
+At query time, Nominatim builds up multiple _interpretations_ of the search
+query. Each of these interpretations is tried against the database in order
+of the likelihood with which they match to the search query. The first
+interpretation that yields results wins.
+
+The interpretations are encapsulated in the `SearchDescription` class. An
+instance of this class is created by applying a sequence of
+_search tokens_ to an initially empty SearchDescription. It is the
+responsibility of the tokenizer to parse the search query and derive all
+possible sequences of search tokens. To that end the tokenizer needs to parse
+the search query and look up matching words in its own data structures.
+
+## Tokenizer API
+
+The following section describes the functions that need to be implemented
+for a custom tokenizer implementation.
+
+!!! warning
+    This API is currently in early alpha status. While this API is meant to
+    be a public API on which other tokenizers may be implemented, the API is
+    far away from being stable at the moment.
+
+### Directory Structure
+
+Nominatim expects two files for a tokenizer:
+
+* `nominatim/tokenizer/<NAME>_tokenizer.py` containing the Python part of the
+  implementation
+* `lib-php/tokenizer/<NAME>_tokenizer.php` with the PHP part of the
+  implementation
+
+where `<NAME>` is a unique name for the tokenizer consisting of only lower-case
+letters, digits and underscore. A tokenizer also needs to install some SQL
+functions. By convention, these should be placed in `lib-sql/tokenizer`.
+
+If the tokenizer has a default configuration file, this should be saved in
+the `settings/<NAME>_tokenizer.<SUFFIX>`.
+
+### Configuration and Persistence
+
+Tokenizers may define custom settings for their configuration. All settings
+must be prefixed with `NOMINATIM_TOKENIZER_`. Settings may be transient or
+persistent. Transient settings are loaded from the configuration file when
+Nominatim is started and may thus be changed at any time. Persistent settings
+are tied to a database installation and must only be read during installation
+time. If they are needed for the runtime then they must be saved into the
+`nominatim_properties` table and later loaded from there.
+
+### The Python module
+
+The Python module is expect to export a single factory function:
+
+```python
+def create(dsn: str, data_dir: Path) -> AbstractTokenizer
+```
+
+The `dsn` parameter contains the DSN of the Nominatim database. The `data_dir`
+is a directory in the project directory that the tokenizer may use to save
+database-specific data. The function must return the instance of the tokenizer
+class as defined below.
+
+### Python Tokenizer Class
+
+All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
+and implement the abstract functions defined there.
+
+::: nominatim.tokenizer.base.AbstractTokenizer
+    rendering:
+        heading_level: 4
+
+### Python Analyzer Class
+
+::: nominatim.tokenizer.base.AbstractAnalyzer
+    rendering:
+        heading_level: 4
+
+### PL/pgSQL Functions
+
+The tokenizer must provide access functions for the `token_info` column
+to the indexer which extracts the necessary information for the global
+search tables. If the tokenizer needs additional SQL functions for private
+use, then these functions must be prefixed with `token_` in order to ensure
+that there are no naming conflicts with the SQL indexer code.
+
+The following functions are expected:
+
+```sql
+FUNCTION token_get_name_search_tokens(info JSONB) RETURNS INTEGER[]
+```
+
+Return an array of token IDs of search terms that should match
+the name(s) for the given place. These tokens are used to look up the place
+by name and, where the place functions as part of an address for another place,
+by address. Must return NULL when the place has no name.
+
+```sql
+FUNCTION token_get_name_match_tokens(info JSONB) RETURNS INTEGER[]
+```
+
+Return an array of token IDs of full names of the place that should be used
+to match addresses. The list of match tokens is usually more strict than
+search tokens as it is used to find a match between two OSM tag values which
+are expected to contain matching full names. Partial terms should not be
+used for match tokens. Must return NULL when the place has no name.
+
+```sql
+FUNCTION token_get_housenumber_search_tokens(info JSONB) RETURNS INTEGER[]
+```
+
+Return an array of token IDs of house number tokens that apply to the place.
+Note that a place may have multiple house numbers, for example when apartments
+each have their own number. Must be NULL when the place has no house numbers.
+
+```sql
+FUNCTION token_normalized_housenumber(info JSONB) RETURNS TEXT
+```
+
+Return the house number(s) in the normalized form that can be matched against
+a house number token text. If a place has multiple house numbers they must
+be listed with a semicolon as delimiter. Must be NULL when the place has no
+house numbers.
+
+```sql
+FUNCTION token_matches_street(info JSONB, street_tokens INTEGER[]) RETURNS BOOLEAN
+```
+
+Check if the given tokens (previously saved from `token_get_name_match_tokens()`)
+match against the `addr:street` tag name. Must return either NULL or FALSE
+when the place has no `addr:street` tag.
+
+```sql
+FUNCTION token_matches_place(info JSONB, place_tokens INTEGER[]) RETURNS BOOLEAN
+```
+
+Check if the given tokens (previously saved from `token_get_name_match_tokens()`)
+match against the `addr:place` tag name. Must return either NULL or FALSE
+when the place has no `addr:place` tag.
+
+
+```sql
+FUNCTION token_addr_place_search_tokens(info JSONB) RETURNS INTEGER[]
+```
+
+Return the search token IDs extracted from the `addr:place` tag. These tokens
+are used for searches by address when no matching place can be found in the
+database. Must be NULL when the place has no `addr:place` tag.
+
+```sql
+FUNCTION token_get_address_keys(info JSONB) RETURNS SETOF TEXT
+```
+
+Return the set of keys for which address information is provided. This
+should correspond to the list of (relevant) `addr:*` tags with the `addr:`
+prefix removed or the keys used in the `address` dictionary of the place info.
+
+```sql
+FUNCTION token_get_address_search_tokens(info JSONB, key TEXT) RETURNS INTEGER[]
+```
+
+Return the array of search tokens for the given address part. `key` can be
+expected to be one of those returned with `token_get_address_keys()`. The
+search tokens are added to the address search vector of the place, when no
+corresponding OSM object could be found for the given address part from which
+to copy the name information.
+
+```sql
+FUNCTION token_matches_address(info JSONB, key TEXT, tokens INTEGER[])
+```
+
+Check if the given tokens match against the address part `key`.
+
+__Warning:__ the tokens that are handed in are the lists previously saved
+from `token_get_name_search_tokens()`, _not_ from the match token list. This
+is an historical oddity which will be fixed at some point in the future.
+Currently, tokenizers are encouraged to make sure that matching works against
+both the search token list and the match token list.
+
+```sql
+FUNCTION token_get_postcode(info JSONB) RETURNS TEXT
+```
+
+Return the postcode for the object, if any exists. The postcode must be in
+the form that should also be presented to the end-user.
+
+```sql
+FUNCTION token_strip_info(info JSONB) RETURNS JSONB
+```
+
+Return the part of the `token_info` field that should be stored in the database
+permanently. The indexer calls this function when all processing is done and
+replaces the content of the `token_info` column with the returned value before
+the trigger stores the information in the database. May return NULL if no
+information should be stored permanently.
+
+### PHP Tokenizer class
+
+The PHP tokenizer class is instantiated once per request and responsible for
+analyzing the incoming query. Multiple requests may be in flight in
+parallel.
+
+The class is expected to be found under the
+name of `\Nominatim\Tokenizer`. To find the class the PHP code includes the file
+`tokenizer/tokenizer.php` in the project directory. This file must be created
+when the tokenizer is first set up on import. The file should initialize any
+configuration variables by setting PHP constants and then require the file
+with the actual implementation of the tokenizer.
+
+The tokenizer class must implement the following functions:
+
+```php
+public function __construct(object &$oDB)
+```
+
+The constructor of the class receives a database connection that can be used
+to query persistent data in the database.
+
+```php
+public function checkStatus()
+```
+
+Check that the tokenizer can access its persistent data structures. If there
+is an issue, throw an `\Exception`.
+
+```php
+public function normalizeString(string $sTerm) : string
+```
+
+Normalize string to a form to be used for comparisons when reordering results.
+Nominatim reweighs results how well the final display string matches the actual
+query. Before comparing result and query, names and query are normalised against
+this function. The tokenizer can thus remove all properties that should not be
+taken into account for reweighing, e.g. special characters or case.
+
+```php
+public function tokensForSpecialTerm(string $sTerm) : array
+```
+
+Return the list of special term tokens that match the given term.
+
+```php
+public function extractTokensFromPhrases(array &$aPhrases) : TokenList
+```
+
+Parse the given phrases, splitting them into word lists and retrieve the
+matching tokens.
+
+The phrase array may take on two forms. In unstructured searches (using `q=`
+parameter) the search query is split at the commas and the elements are
+put into a sorted list. For structured searches the phrase array is an
+associative array where the key designates the type of the term (street, city,
+county etc.) The tokenizer may ignore the phrase type at this stage in parsing.
+Matching phrase type and appropriate search token type will be done later
+when the SearchDescription is built.
+
+For each phrase in the list of phrases, the function must analyse the phrase
+string and then call `setWordSets()` to communicate the result of the analysis.
+A word set is a list of strings, where each string refers to a search token.
+A phrase may have multiple interpretations. Therefore a list of word sets is
+usually attached to the phrase. The search tokens themselves are returned
+by the function in an associative array, where the key corresponds to the
+strings given in the word sets. The value is a list of search tokens. Thus
+a single string in the list of word sets may refer to multiple search tokens.
+
--- a/docs/develop/address-tables.plantuml
+++ b/docs/develop/address-tables.plantuml
@@ -0,0 +1,35 @@
+@startuml
+skinparam monochrome true
+skinparam ObjectFontStyle bold
+
+map search_name_X {
+  place_id => BIGINT
+  address_rank => SMALLINT
+  name_vector => INT[]
+  centroid => GEOMETRY
+}
+
+map location_area_large_X {
+  place_id => BIGINT
+  keywords => INT[]
+  partition => SMALLINT
+  rank_search => SMALLINT
+  rank_address => SMALLINT
+  country_code => VARCHR(2)
+  isguess => BOOLEAN
+  postcode => TEXT
+  centroid => POINT
+  geometry => GEOMETRY
+}
+
+map location_road_X {
+  place_id => BIGINT
+  partition => SMALLINT
+  country_code => VARCHR(2)
+  geometry => GEOMETRY
+}
+
+search_name_X -[hidden]> location_area_large_X
+location_area_large_X -[hidden]> location_road_X
+
+@enduml
--- a/docs/develop/address-tables.svg
+++ b/docs/develop/address-tables.svg
--- a/docs/develop/data-sources.md
+++ b/docs/develop/data-sources.md
@@ -0,0 +1,34 @@
+# Additional Data Sources
+
+This guide explains how data sources other than OpenStreetMap mentioned in
+the install instructions got obtained and converted.
+
+## Country grid
+
+Nominatim uses pre-generated country borders data. In case one imports only
+a subset of a country. And to assign each place a partition. Nominatim
+database tables are split into partitions for performance.
+
+More details in [osm-search/country-grid-data](https://github.com/osm-search/country-grid-data).
+
+## US Census TIGER
+
+For the United States you can choose to import additional street-level data.
+The data isn't mixed into OSM data but queried as fallback when no OSM
+result can be found.
+
+More details in [osm-search/TIGER-data](https://github.com/osm-search/TIGER-data).
+
+## GB postcodes
+
+For Great Britain you can choose to import Royalmail postcode centroids.
+
+More details in [osm-search/gb-postcode-data](https://github.com/osm-search/gb-postcode-data).
+
+
+## Wikipedia & Wikidata rankings
+
+Nominatim can import "importance" data of place names. This greatly
+improves ranking of results.
+
+More details in [osm-search/wikipedia-wikidata](https://github.com/osm-search/wikipedia-wikidata).
--- a/docs/develop/osm2pgsql-tables.plantuml
+++ b/docs/develop/osm2pgsql-tables.plantuml
@@ -0,0 +1,44 @@
+@startuml
+skinparam monochrome true
+skinparam ObjectFontStyle bold
+
+map planet_osm_nodes #eee {
+  id => BIGINT
+  lat => INT
+  lon => INT
+}
+
+map planet_osm_ways #eee {
+  id => BIGINT
+  nodes => BIGINT[]
+  tags => TEXT[]
+}
+
+map planet_osm_rels #eee {
+  id => BIGINT
+  parts => BIGINT[]
+  members => TEXT[]
+  tags => TEXT[]
+  way_off => SMALLINT
+  rel_off => SMALLINT
+}
+
+map place {
+  osm_type => CHAR(1)
+  osm_id => BIGINT
+  class => TEXT
+  type => TEXT
+  name => HSTORE
+  address => HSTORE
+  extratags => HSTORE
+  admin_level => SMALLINT
+  geometry => GEOMETRY
+}
+
+planet_osm_nodes  -[hidden]> planet_osm_ways
+planet_osm_ways  -[hidden]> planet_osm_rels
+planet_osm_ways -[hidden]-> place
+
+planet_osm_nodes::id <- planet_osm_ways::nodes
+
+@enduml
--- a/docs/develop/osm2pgsql-tables.svg
+++ b/docs/develop/osm2pgsql-tables.svg
--- a/docs/develop/overview.md
+++ b/docs/develop/overview.md
@@ -9,14 +9,14 @@ the address computation and the search frontend.
 The __data import__ stage reads the raw OSM data and extracts all information
 that is useful for geocoding. This part is done by osm2pgsql, the same tool
 that can also be used to import a rendering database. It uses the special
-gazetteer output plugin in `osm2pgsql/output-gazetter.[ch]pp`. The result of
+gazetteer output plugin in `osm2pgsql/src/output-gazetter.[ch]pp`. The result of
 the import can be found in the database table `place`.

 The __address computation__ or __indexing__ stage takes the data from `place`
 and adds additional information needed for geocoding. It ranks the places by
 importance, links objects that belong together and computes addresses and
 the search index. Most of this work is done in PL/pgSQL via database triggers
-and can be found in the file `sql/functions.sql`.
+and can be found in the files in the `sql/functions/` directory.

 The __search frontend__ implements the actual API. It takes search
 and reverse geocoding queries from the user, looks up the data and
--- a/docs/develop/parenting-flow.plantuml
+++ b/docs/develop/parenting-flow.plantuml
@@ -0,0 +1,31 @@
+@startuml
+skinparam monochrome true
+
+start
+
+if (has 'addr:street'?) then (yes)
+  if (street with that name\n nearby?) then (yes)
+    :**Use closest street**
+     **with same name**;
+     kill
+  else (no)
+    :** Use closest**\n**street**;
+     kill
+  endif
+elseif (has 'addr:place'?) then (yes)
+  if (place with that name\n nearby?) then (yes)
+    :**Use closest place**
+     **with same name**;
+     kill
+  else (no)
+    :add addr:place to adress;
+    :**Use closest place**\n**rank 16 to 25**;
+     kill
+  endif
+else (otherwise)
+ :**Use closest**\n**street**;
+ kill
+endif
+
+
+@enduml
--- a/docs/develop/parenting-flow.svg
+++ b/docs/develop/parenting-flow.svg
--- a/docs/develop/search-tables.plantuml
+++ b/docs/develop/search-tables.plantuml
@@ -0,0 +1,99 @@
+@startuml
+skinparam monochrome true
+skinparam ObjectFontStyle bold
+
+left to right direction
+
+map placex {
+  place_id => BIGINT
+  osm_type => CHAR(1)
+  osm_id => BIGINT
+  class => TEXT
+  type => TEXT
+  name => HSTORE
+  address => HSTORE
+  extratags => HSTORE
+  admin_level => SMALLINT
+  partition => SMALLINT
+  geometry_sector => INT
+  parent_place_id => BIGINT
+  linked_place_id => BIGINT
+  importance => DOUBLE
+  rank_search => SMALLINT
+  rank_address => SMALLINT
+  wikipedia => TEXT
+  country_code => VARCHAR(2)
+  housenumber => TEXT
+  postcode => TEXT
+  indexed_status => SMALLINT
+  indexed_date => TIMESTAMP
+  centroid => GEOMETRY
+  geometry => GEOMETRY
+}
+
+map search_name {
+  place_id => BIGINT
+  importance => DOUBLE
+  search_rank => SMALLINT
+  address_rank => SMALLINT
+  name_vector => INT[]
+  nameaddress_vector => INT[]
+  country_code => VARCHAR(2)
+  centroid => GEOMETRY
+}
+
+map word {
+  word_id => INT
+  word_token => TEXT
+  ... =>
+}
+
+map location_property_osmline {
+  place_id => BIGINT
+  osm_id => BIGINT
+  startnumber => INT
+  endnumber => INT
+  interpolationtype => TEXT
+  address => HSTORE
+  partition => SMALLINT
+  geometry_sector => INT
+  parent_place_id => BIGINT
+  country_code => VARCHAR(2)
+  postcode => text
+  indexed_status => SMALLINT
+  indexed_date => TIMESTAMP
+  linegeo => GEOMETRY
+}
+
+map place_addressline {
+  place_id => BIGINT
+  address_place_id => BIGINT
+  distance => DOUBLE
+  cached_rank_address => SMALLINT
+  fromarea => BOOLEAN
+  isaddress => BOOLEAN
+}
+
+map location_postcode {
+  place_id => BIGINT
+  postcode => TEXT
+  parent_place_id => BIGINT
+  rank_search => SMALLINT
+  rank_address => SMALLINT
+  indexed_status => SMALLINT
+  indexed_date => TIMESTAMP
+  geometry => GEOMETRY
+}
+
+placex::place_id <-- search_name::place_id
+placex::place_id <-- place_addressline::place_id
+placex::place_id <-- place_addressline::address_place_id
+
+search_name::name_vector --> word::word_id
+search_name::nameaddress_vector --> word::word_id
+
+place_addressline -[hidden]> location_property_osmline
+search_name -[hidden]> place_addressline
+location_property_osmline -[hidden]-> location_postcode
+
+@enduml
--- a/docs/develop/search-tables.svg
+++ b/docs/develop/search-tables.svg
--- a/docs/extra.css
+++ b/docs/extra.css
@@ -1,3 +1,24 @@
 .toctree-l3 {
    display: none!important
 }
+
+table {
+    margin-bottom: 12pt
+}
+
+th, td {
+    padding: 1pt 12pt;
+}
+
+th {
+    background-color: #eee;
+}
+
+.doc-object h6 {
+    margin-bottom: 0.8em;
+    font-size: 120%;
+}
+
+.doc-object {
+    margin-bottom: 1.3em;
+}
--- a/docs/index.md
+++ b/docs/index.md
@@ -1,8 +1,10 @@
 Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).

-This guide comes in three parts:
+This guide comes in four parts:

 * __[API reference](api/Overview.md)__ for users of Nominatim
 * __[Administration Guide](admin/Installation.md)__ for those who want
   to install their own Nominatim server
+ * __[Customization Guide](customize/Overview.md)__ for those who want to
+   adapt their own installation to their special requirements
 * __[Developer's Guide](develop/overview.md)__ for developers of the software
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@@ -1,9 +1,9 @@
 site_name: Nominatim Documentation
 theme: readthedocs
 docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
-site_url: http://nominatim.org
+site_url: https://nominatim.org
 repo_url: https://github.com/openstreetmap/Nominatim
-pages:
+nav:
    - 'Introduction' : 'index.md'
    - 'API Reference':
        - 'Overview': 'api/Overview.md'
@@ -11,31 +11,54 @@ pages:
        - 'Reverse': 'api/Reverse.md'
        - 'Address Lookup': 'api/Lookup.md'
        - 'Details' : 'api/Details.md'
+        - 'Status' : 'api/Status.md'
        - 'Place Output Formats': 'api/Output.md'
        - 'FAQ': 'api/Faq.md'
    - 'Administration Guide':
        - 'Basic Installation': 'admin/Installation.md'
-        - 'Importing and Updating' : 'admin/Import-and-Update.md'
+        - 'Import' : 'admin/Import.md'
+        - 'Update' : 'admin/Update.md'
+        - 'Deploy' : 'admin/Deployment.md'
+        - 'Nominatim UI'  : 'admin/Setup-Nominatim-UI.md'
+        - 'Advanced Installations' : 'admin/Advanced-Installations.md'
+        - 'Maintenance' : 'admin/Maintenance.md'
        - 'Migration from older Versions' : 'admin/Migration.md'
        - 'Troubleshooting' : 'admin/Faq.md'
+    - 'Customization Guide':
+        - 'Overview': 'customize/Overview.md'
+        - 'Import Styles': 'customize/Import-Styles.md'
+        - 'Configuration Settings': 'customize/Settings.md'
+        - 'Per-Country Data': 'customize/Country-Settings.md'
+        - 'Place Ranking' : 'customize/Ranking.md'
+        - 'Tokenizers' : 'customize/Tokenizers.md'
+        - 'Special Phrases': 'customize/Special-Phrases.md'
+        - 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
+        - 'External data: Postcodes': 'customize/Postcodes.md'
    - 'Developers Guide':
-        - 'Overview' : 'develop/overview.md'
-        - 'OSM Data Import' : 'develop/Import.md'
-        - 'Place Ranking' : 'develop/Ranking.md'
-        - 'Documentation' : 'develop/Documentation.md'
-    - 'External Data Sources':
-        - 'Overview' : 'data-sources/overview.md'
-        - 'US Census (Tiger)': 'data-sources/US-Tiger.md'
-        - 'GB Postcodes': 'data-sources/GB-Postcodes.md'
-        - 'Country Grid': 'data-sources/Country-Grid.md'
-        - 'Wikipedia & Wikidata': 'data-sources/Wikipedia-Wikidata.md'
+        - 'Architecture Overview' : 'develop/overview.md'
+        - 'Database Layout' : 'develop/Database-Layout.md'
+        - 'Indexing' : 'develop/Indexing.md'
+        - 'Tokenizers' : 'develop/Tokenizers.md'
+        - 'Custom modules for ICU tokenizer': 'develop/ICU-Tokenizer-Modules.md'
+        - 'Setup for Development' : 'develop/Development-Environment.md'
+        - 'Testing' : 'develop/Testing.md'
+        - 'External Data Sources': 'develop/data-sources.md'
    - 'Appendix':
-        - 'Installation on CentOS 7' : 'appendix/Install-on-Centos-7.md'
-        - 'Installation on Ubuntu 16' : 'appendix/Install-on-Ubuntu-16.md'
        - 'Installation on Ubuntu 18' : 'appendix/Install-on-Ubuntu-18.md'
+        - 'Installation on Ubuntu 20' : 'appendix/Install-on-Ubuntu-20.md'
+        - 'Installation on Ubuntu 22' : 'appendix/Install-on-Ubuntu-22.md'
 markdown_extensions:
-    - codehilite:
-        use_pygments: False
+    - codehilite
+    - admonition
+    - def_list
    - toc:
        permalink: 
-extra_css: [extra.css]
+extra_css: [extra.css, styles.css]
+plugins:
+    - search
+    - mkdocstrings:
+        handlers:
+          python-legacy:
+            rendering:
+              show_source: false
+              show_signature_annotations: false
--- a/docs/styles.css
+++ b/docs/styles.css
@@ -0,0 +1,69 @@
+.codehilite .hll { background-color: #ffffcc }
+.codehilite  { background: #f0f0f0; }
+.codehilite .c { color: #60a0b0; font-style: italic } /* Comment */
+.codehilite .err { /* border: 1px solid #FF0000 */ } /* Error */
+.codehilite .k { color: #007020; font-weight: bold } /* Keyword */
+.codehilite .o { color: #666666 } /* Operator */
+.codehilite .ch { color: #60a0b0; font-style: italic } /* Comment.Hashbang */
+.codehilite .cm { color: #60a0b0; font-style: italic } /* Comment.Multiline */
+.codehilite .cp { color: #007020 } /* Comment.Preproc */
+.codehilite .cpf { color: #60a0b0; font-style: italic } /* Comment.PreprocFile */
+.codehilite .c1 { color: #60a0b0; font-style: italic } /* Comment.Single */
+.codehilite .cs { color: #60a0b0; background-color: #fff0f0 } /* Comment.Special */
+.codehilite .gd { color: #A00000 } /* Generic.Deleted */
+.codehilite .ge { font-style: italic } /* Generic.Emph */
+.codehilite .gr { color: #FF0000 } /* Generic.Error */
+.codehilite .gh { color: #000080; font-weight: bold } /* Generic.Heading */
+.codehilite .gi { color: #00A000 } /* Generic.Inserted */
+.codehilite .go { color: #888888 } /* Generic.Output */
+.codehilite .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
+.codehilite .gs { font-weight: bold } /* Generic.Strong */
+.codehilite .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
+.codehilite .gt { color: #0044DD } /* Generic.Traceback */
+.codehilite .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
+.codehilite .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
+.codehilite .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
+.codehilite .kp { color: #007020 } /* Keyword.Pseudo */
+.codehilite .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
+.codehilite .kt { color: #902000 } /* Keyword.Type */
+.codehilite .m { color: #40a070 } /* Literal.Number */
+.codehilite .s { color: #4070a0 } /* Literal.String */
+.codehilite .na { color: #4070a0 } /* Name.Attribute */
+.codehilite .nb { color: #007020 } /* Name.Builtin */
+.codehilite .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
+.codehilite .no { color: #60add5 } /* Name.Constant */
+.codehilite .nd { color: #555555; font-weight: bold } /* Name.Decorator */
+.codehilite .ni { color: #d55537; font-weight: bold } /* Name.Entity */
+.codehilite .ne { color: #007020 } /* Name.Exception */
+.codehilite .nf { color: #06287e } /* Name.Function */
+.codehilite .nl { color: #002070; font-weight: bold } /* Name.Label */
+.codehilite .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
+.codehilite .nt { color: #062873; font-weight: bold } /* Name.Tag */
+.codehilite .nv { color: #bb60d5 } /* Name.Variable */
+.codehilite .ow { color: #007020; font-weight: bold } /* Operator.Word */
+.codehilite .w { color: #bbbbbb } /* Text.Whitespace */
+.codehilite .mb { color: #40a070 } /* Literal.Number.Bin */
+.codehilite .mf { color: #40a070 } /* Literal.Number.Float */
+.codehilite .mh { color: #40a070 } /* Literal.Number.Hex */
+.codehilite .mi { color: #40a070 } /* Literal.Number.Integer */
+.codehilite .mo { color: #40a070 } /* Literal.Number.Oct */
+.codehilite .sa { color: #4070a0 } /* Literal.String.Affix */
+.codehilite .sb { color: #4070a0 } /* Literal.String.Backtick */
+.codehilite .sc { color: #4070a0 } /* Literal.String.Char */
+.codehilite .dl { color: #4070a0 } /* Literal.String.Delimiter */
+.codehilite .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
+.codehilite .s2 { color: #4070a0 } /* Literal.String.Double */
+.codehilite .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
+.codehilite .sh { color: #4070a0 } /* Literal.String.Heredoc */
+.codehilite .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
+.codehilite .sx { color: #c65d09 } /* Literal.String.Other */
+.codehilite .sr { color: #235388 } /* Literal.String.Regex */
+.codehilite .s1 { color: #4070a0 } /* Literal.String.Single */
+.codehilite .ss { color: #517918 } /* Literal.String.Symbol */
+.codehilite .bp { color: #007020 } /* Name.Builtin.Pseudo */
+.codehilite .fm { color: #06287e } /* Name.Function.Magic */
+.codehilite .vc { color: #bb60d5 } /* Name.Variable.Class */
+.codehilite .vg { color: #bb60d5 } /* Name.Variable.Global */
+.codehilite .vi { color: #bb60d5 } /* Name.Variable.Instance */
+.codehilite .vm { color: #bb60d5 } /* Name.Variable.Magic */
+.codehilite .il { color: #40a070 } /* Literal.Number.Integer.Long */
--- a/lib-php/AddressDetails.php
+++ b/lib-php/AddressDetails.php
@@ -0,0 +1,191 @@
+<?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */
+
+namespace Nominatim;
+
+require_once(CONST_LibDir.'/ClassTypes.php');
+
+/**
+ * Detailed list of address parts for a single result
+ */
+class AddressDetails
+{
+    private $iPlaceID;
+    private $aAddressLines;
+
+    public function __construct(&$oDB, $iPlaceID, $sHousenumber, $mLangPref)
+    {
+        $this->iPlaceID = $iPlaceID;
+
+        if (is_array($mLangPref)) {
+            $mLangPref = $oDB->getArraySQL($oDB->getDBQuotedList($mLangPref));
+        }
+
+        if (!isset($sHousenumber)) {
+            $sHousenumber = -1;
+        }
+
+        $sSQL = 'SELECT *,';
+        $sSQL .= ' get_name_by_language(name,'.$mLangPref.') as localname';
+        $sSQL .= ' FROM get_addressdata('.$iPlaceID.','.$sHousenumber.')';
+        $sSQL .= ' ORDER BY rank_address DESC, isaddress DESC';
+
+        $this->aAddressLines = $oDB->getAll($sSQL);
+    }
+
+    private static function isAddress($aLine)
+    {
+        return $aLine['isaddress'] || $aLine['type'] == 'country_code';
+    }
+
+    public function getAddressDetails($bAll = false)
+    {
+        if ($bAll) {
+            return $this->aAddressLines;
+        }
+
+        return array_filter($this->aAddressLines, array(__CLASS__, 'isAddress'));
+    }
+
+    public function getLocaleAddress()
+    {
+        $aParts = array();
+        $sPrevResult = '';
+
+        foreach ($this->aAddressLines as $aLine) {
+            if ($aLine['isaddress'] && $sPrevResult != $aLine['localname']) {
+                $sPrevResult = $aLine['localname'];
+                $aParts[] = $sPrevResult;
+            }
+        }
+
+        return join(', ', $aParts);
+    }
+
+    public function getAddressNames()
+    {
+        $aAddress = array();
+
+        foreach ($this->aAddressLines as $aLine) {
+            if (!self::isAddress($aLine)) {
+                continue;
+            }
+
+            $sTypeLabel = ClassTypes\getLabelTag($aLine);
+
+            $sName = null;
+            if (isset($aLine['localname']) && $aLine['localname']!=='') {
+                $sName = $aLine['localname'];
+            } elseif (isset($aLine['housenumber']) && $aLine['housenumber']!=='') {
+                $sName = $aLine['housenumber'];
+            }
+
+            if (isset($sName)
+                && (!isset($aAddress[$sTypeLabel])
+                    || $aLine['class'] == 'place')
+            ) {
+                $aAddress[$sTypeLabel] = $sName;
+
+                if (!empty($aLine['name'])) {
+                    $this->addSubdivisionCode($aAddress, $aLine['admin_level'], $aLine['name']);
+                }
+            }
+        }
+
+        return $aAddress;
+    }
+
+    /**
+     * Annotates the given json with geocodejson address information fields.
+     *
+     * @param array  $aJson  Json hash to add the fields to.
+     *
+     * Geocodejson has the following fields:
+     *  street, locality, postcode, city, district,
+     *  county, state, country
+     *
+     * Postcode and housenumber are added by type, district is not used.
+     * All other fields are set according to address rank.
+     */
+    public function addGeocodeJsonAddressParts(&$aJson)
+    {
+        foreach (array_reverse($this->aAddressLines) as $aLine) {
+            if (!$aLine['isaddress']) {
+                continue;
+            }
+
+            if (!isset($aLine['localname']) || $aLine['localname'] == '') {
+                continue;
+            }
+
+            if ($aLine['type'] == 'postcode' || $aLine['type'] == 'postal_code') {
+                $aJson['postcode'] = $aLine['localname'];
+                continue;
+            }
+
+            if ($aLine['type'] == 'house_number') {
+                $aJson['housenumber'] = $aLine['localname'];
+                continue;
+            }
+
+            if ($this->iPlaceID == $aLine['place_id']) {
+                continue;
+            }
+
+            $iRank = (int)$aLine['rank_address'];
+
+            if ($iRank > 25 && $iRank < 28) {
+                $aJson['street'] = $aLine['localname'];
+            } elseif ($iRank >= 22 && $iRank <= 25) {
+                $aJson['locality'] = $aLine['localname'];
+            } elseif ($iRank >= 17 && $iRank <= 21) {
+                $aJson['district'] = $aLine['localname'];
+            } elseif ($iRank >= 13 && $iRank <= 16) {
+                $aJson['city'] = $aLine['localname'];
+            } elseif ($iRank >= 10 && $iRank <= 12) {
+                $aJson['county'] = $aLine['localname'];
+            } elseif ($iRank >= 5 && $iRank <= 9) {
+                $aJson['state'] = $aLine['localname'];
+            } elseif ($iRank == 4) {
+                $aJson['country'] = $aLine['localname'];
+            }
+        }
+    }
+
+    public function getAdminLevels()
+    {
+        $aAddress = array();
+        foreach (array_reverse($this->aAddressLines) as $aLine) {
+            if (self::isAddress($aLine)
+                && isset($aLine['admin_level'])
+                && $aLine['admin_level'] < 15
+                && !isset($aAddress['level'.$aLine['admin_level']])
+            ) {
+                $aAddress['level'.$aLine['admin_level']] = $aLine['localname'];
+            }
+        }
+        return $aAddress;
+    }
+
+    public function debugInfo()
+    {
+        return $this->aAddressLines;
+    }
+
+    private function addSubdivisionCode(&$aAddress, $iAdminLevel, $nameDetails)
+    {
+        if (is_string($nameDetails)) {
+            $nameDetails = json_decode('{' . str_replace('"=>"', '":"', $nameDetails) . '}', true);
+        }
+        if (!empty($nameDetails['ISO3166-2'])) {
+            $aAddress["ISO3166-2-lvl$iAdminLevel"] = $nameDetails['ISO3166-2'];
+        }
+    }
+}
--- a/lib-php/ClassTypes.php
+++ b/lib-php/ClassTypes.php
@@ -0,0 +1,576 @@
+<?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */
+
+namespace Nominatim\ClassTypes;
+
+/**
+ * Create a label tag for the given place that can be used as an XML name.
+ *
+ * @param array[] $aPlace  Information about the place to label.
+ *
+ * A label tag groups various object types together under a common
+ * label. The returned value is lower case and has no spaces
+ */
+function getLabelTag($aPlace, $sCountry = null)
+{
+    $iRank = (int) ($aPlace['rank_address'] ?? 30);
+    $sLabel;
+    if (isset($aPlace['place_type'])) {
+        $sLabel = $aPlace['place_type'];
+    } elseif ($aPlace['class'] == 'boundary' && $aPlace['type'] == 'administrative') {
+        $sLabel = getBoundaryLabel($iRank/2, $sCountry);
+    } elseif ($aPlace['type'] == 'postal_code') {
+        $sLabel = 'postcode';
+    } elseif ($iRank < 26) {
+        $sLabel = $aPlace['type'];
+    } elseif ($iRank < 28) {
+        $sLabel = 'road';
+    } elseif ($aPlace['class'] == 'place'
+            && ($aPlace['type'] == 'house_number' ||
+                $aPlace['type'] == 'house_name' ||
+                $aPlace['type'] == 'country_code')
+    ) {
+        $sLabel = $aPlace['type'];
+    } else {
+        $sLabel = $aPlace['class'];
+    }
+
+    return strtolower(str_replace(' ', '_', $sLabel));
+}
+
+/**
+ * Create a label for the given place.
+ *
+ * @param array[] $aPlace  Information about the place to label.
+ */
+function getLabel($aPlace, $sCountry = null)
+{
+    if (isset($aPlace['place_type'])) {
+        return ucwords(str_replace('_', ' ', $aPlace['place_type']));
+    }
+
+    if ($aPlace['class'] == 'boundary' && $aPlace['type'] == 'administrative') {
+        return getBoundaryLabel(($aPlace['rank_address'] ?? 30)/2, $sCountry ?? null);
+    }
+
+    // Return a label only for 'important' class/type combinations
+    if (getImportance($aPlace) !== null) {
+        return ucwords(str_replace('_', ' ', $aPlace['type']));
+    }
+
+    return null;
+}
+
+
+/**
+ * Return a simple label for an administrative boundary for the given country.
+ *
+ * @param int $iAdminLevel   Content of admin_level tag.
+ * @param string $sCountry   Country code of the country where the object is
+ *                           in. May be null, in which case a world-wide
+ *                           fallback is used.
+ * @param string $sFallback  String to return if no explicit string is listed.
+ *
+ * @return string
+ */
+function getBoundaryLabel($iAdminLevel, $sCountry, $sFallback = 'Administrative')
+{
+    static $aBoundaryList = array (
+                             'default' => array (
+                                           1 => 'Continent',
+                                           2 => 'Country',
+                                           3 => 'Region',
+                                           4 => 'State',
+                                           5 => 'State District',
+                                           6 => 'County',
+                                           7 => 'Municipality',
+                                           8 => 'City',
+                                           9 => 'City District',
+                                           10 => 'Suburb',
+                                           11 => 'Neighbourhood',
+                                           12 => 'City Block'
+                                          ),
+                             'no' => array (
+                                      3 => 'State',
+                                      4 => 'County'
+                                     ),
+                             'se' => array (
+                                      3 => 'State',
+                                      4 => 'County'
+                                     )
+            );
+
+    if (isset($aBoundaryList[$sCountry])
+        && isset($aBoundaryList[$sCountry][$iAdminLevel])
+    ) {
+        return $aBoundaryList[$sCountry][$iAdminLevel];
+    }
+
+    return $aBoundaryList['default'][$iAdminLevel] ?? $sFallback;
+}
+
+/**
+ * Return an estimated radius of how far the object node extends.
+ *
+ * @param array[] $aPlace  Information about the place. This must be a node
+ *                         feature.
+ *
+ * @return float  The radius around the feature in degrees.
+ */
+function getDefRadius($aPlace)
+{
+    $aSpecialRadius = array(
+                       'place:continent' => 25,
+                       'place:country' => 7,
+                       'place:state' => 2.6,
+                       'place:province' => 2.6,
+                       'place:region' => 1.0,
+                       'place:county' => 0.7,
+                       'place:city' => 0.16,
+                       'place:municipality' => 0.16,
+                       'place:island' => 0.32,
+                       'place:postcode' => 0.16,
+                       'place:town' => 0.04,
+                       'place:village' => 0.02,
+                       'place:hamlet' => 0.02,
+                       'place:district' => 0.02,
+                       'place:borough' => 0.02,
+                       'place:suburb' => 0.02,
+                       'place:locality' => 0.01,
+                       'place:neighbourhood'=> 0.01,
+                       'place:quarter' => 0.01,
+                       'place:city_block' => 0.01,
+                       'landuse:farm' => 0.01,
+                       'place:farm' => 0.01,
+                       'place:airport' => 0.015,
+                       'aeroway:aerodrome' => 0.015,
+                       'railway:station' => 0.005
+           );
+
+    $sClassPlace = $aPlace['class'].':'.$aPlace['type'];
+
+    return $aSpecialRadius[$sClassPlace] ?? 0.00005;
+}
+
+/**
+ * Get the icon to use with the given object.
+ */
+function getIcon($aPlace)
+{
+    $aIcons = array(
+               'boundary:administrative' => 'poi_boundary_administrative',
+               'place:city' => 'poi_place_city',
+               'place:town' => 'poi_place_town',
+               'place:village' => 'poi_place_village',
+               'place:hamlet' => 'poi_place_village',
+               'place:suburb' => 'poi_place_village',
+               'place:locality' => 'poi_place_village',
+               'place:airport' => 'transport_airport2',
+               'aeroway:aerodrome' => 'transport_airport2',
+               'railway:station' => 'transport_train_station2',
+               'amenity:place_of_worship' => 'place_of_worship_unknown3',
+               'amenity:pub' => 'food_pub',
+               'amenity:bar' => 'food_bar',
+               'amenity:university' => 'education_university',
+               'tourism:museum' => 'tourist_museum',
+               'amenity:arts_centre' => 'tourist_art_gallery2',
+               'tourism:zoo' => 'tourist_zoo',
+               'tourism:theme_park' => 'poi_point_of_interest',
+               'tourism:attraction' => 'poi_point_of_interest',
+               'leisure:golf_course' => 'sport_golf',
+               'historic:castle' => 'tourist_castle',
+               'amenity:hospital' => 'health_hospital',
+               'amenity:school' => 'education_school',
+               'amenity:theatre' => 'tourist_theatre',
+               'amenity:library' => 'amenity_library',
+               'amenity:fire_station' => 'amenity_firestation3',
+               'amenity:police' => 'amenity_police2',
+               'amenity:bank' => 'money_bank2',
+               'amenity:post_office' => 'amenity_post_office',
+               'tourism:hotel' => 'accommodation_hotel2',
+               'amenity:cinema' => 'tourist_cinema',
+               'tourism:artwork' => 'tourist_art_gallery2',
+               'historic:archaeological_site' => 'tourist_archaeological2',
+               'amenity:doctors' => 'health_doctors',
+               'leisure:sports_centre' => 'sport_leisure_centre',
+               'leisure:swimming_pool' => 'sport_swimming_outdoor',
+               'shop:supermarket' => 'shopping_supermarket',
+               'shop:convenience' => 'shopping_convenience',
+               'amenity:restaurant' => 'food_restaurant',
+               'amenity:fast_food' => 'food_fastfood',
+               'amenity:cafe' => 'food_cafe',
+               'tourism:guest_house' => 'accommodation_bed_and_breakfast',
+               'amenity:pharmacy' => 'health_pharmacy_dispensing',
+               'amenity:fuel' => 'transport_fuel',
+               'natural:peak' => 'poi_peak',
+               'natural:wood' => 'landuse_coniferous_and_deciduous',
+               'shop:bicycle' => 'shopping_bicycle',
+               'shop:clothes' => 'shopping_clothes',
+               'shop:hairdresser' => 'shopping_hairdresser',
+               'shop:doityourself' => 'shopping_diy',
+               'shop:estate_agent' => 'shopping_estateagent2',
+               'shop:car' => 'shopping_car',
+               'shop:garden_centre' => 'shopping_garden_centre',
+               'shop:car_repair' => 'shopping_car_repair',
+               'shop:bakery' => 'shopping_bakery',
+               'shop:butcher' => 'shopping_butcher',
+               'shop:apparel' => 'shopping_clothes',
+               'shop:laundry' => 'shopping_laundrette',
+               'shop:beverages' => 'shopping_alcohol',
+               'shop:alcohol' => 'shopping_alcohol',
+               'shop:optician' => 'health_opticians',
+               'shop:chemist' => 'health_pharmacy',
+               'shop:gallery' => 'tourist_art_gallery2',
+               'shop:jewelry' => 'shopping_jewelry',
+               'tourism:information' => 'amenity_information',
+               'historic:ruins' => 'tourist_ruin',
+               'amenity:college' => 'education_school',
+               'historic:monument' => 'tourist_monument',
+               'historic:memorial' => 'tourist_monument',
+               'historic:mine' => 'poi_mine',
+               'tourism:caravan_site' => 'accommodation_caravan_park',
+               'amenity:bus_station' => 'transport_bus_station',
+               'amenity:atm' => 'money_atm2',
+               'tourism:viewpoint' => 'tourist_view_point',
+               'tourism:guesthouse' => 'accommodation_bed_and_breakfast',
+               'railway:tram' => 'transport_tram_stop',
+               'amenity:courthouse' => 'amenity_court',
+               'amenity:recycling' => 'amenity_recycling',
+               'amenity:dentist' => 'health_dentist',
+               'natural:beach' => 'tourist_beach',
+               'railway:tram_stop' => 'transport_tram_stop',
+               'amenity:prison' => 'amenity_prison',
+               'highway:bus_stop' => 'transport_bus_stop2'
+    );
+
+    $sClassPlace = $aPlace['class'].':'.$aPlace['type'];
+
+    return $aIcons[$sClassPlace] ?? null;
+}
+
+/**
+ * Get an icon for the given object with its full URL.
+ */
+function getIconFile($aPlace)
+{
+    if (CONST_MapIcon_URL === false) {
+        return null;
+    }
+
+    $sIcon = getIcon($aPlace);
+
+    if (!isset($sIcon)) {
+        return null;
+    }
+
+    return CONST_MapIcon_URL.'/'.$sIcon.'.p.20.png';
+}
+
+/**
+ * Return a class importance value for the given place.
+ *
+ * @param array[] $aPlace  Information about the place.
+ *
+ * @return int  An importance value. The lower the value, the more
+ *              important the class.
+ */
+function getImportance($aPlace)
+{
+    static $aWithImportance = null;
+
+    if ($aWithImportance === null) {
+        $aWithImportance = array_flip(array(
+                                           'boundary:administrative',
+                                           'place:country',
+                                           'place:state',
+                                           'place:province',
+                                           'place:county',
+                                           'place:city',
+                                           'place:region',
+                                           'place:island',
+                                           'place:town',
+                                           'place:village',
+                                           'place:hamlet',
+                                           'place:suburb',
+                                           'place:locality',
+                                           'landuse:farm',
+                                           'place:farm',
+                                           'highway:motorway_junction',
+                                           'highway:motorway',
+                                           'highway:trunk',
+                                           'highway:primary',
+                                           'highway:secondary',
+                                           'highway:tertiary',
+                                           'highway:residential',
+                                           'highway:unclassified',
+                                           'highway:living_street',
+                                           'highway:service',
+                                           'highway:track',
+                                           'highway:road',
+                                           'highway:byway',
+                                           'highway:bridleway',
+                                           'highway:cycleway',
+                                           'highway:pedestrian',
+                                           'highway:footway',
+                                           'highway:steps',
+                                           'highway:motorway_link',
+                                           'highway:trunk_link',
+                                           'highway:primary_link',
+                                           'landuse:industrial',
+                                           'landuse:residential',
+                                           'landuse:retail',
+                                           'landuse:commercial',
+                                           'place:airport',
+                                           'aeroway:aerodrome',
+                                           'railway:station',
+                                           'amenity:place_of_worship',
+                                           'amenity:pub',
+                                           'amenity:bar',
+                                           'amenity:university',
+                                           'tourism:museum',
+                                           'amenity:arts_centre',
+                                           'tourism:zoo',
+                                           'tourism:theme_park',
+                                           'tourism:attraction',
+                                           'leisure:golf_course',
+                                           'historic:castle',
+                                           'amenity:hospital',
+                                           'amenity:school',
+                                           'amenity:theatre',
+                                           'amenity:public_building',
+                                           'amenity:library',
+                                           'amenity:townhall',
+                                           'amenity:community_centre',
+                                           'amenity:fire_station',
+                                           'amenity:police',
+                                           'amenity:bank',
+                                           'amenity:post_office',
+                                           'leisure:park',
+                                           'amenity:park',
+                                           'landuse:park',
+                                           'landuse:recreation_ground',
+                                           'tourism:hotel',
+                                           'tourism:motel',
+                                           'amenity:cinema',
+                                           'tourism:artwork',
+                                           'historic:archaeological_site',
+                                           'amenity:doctors',
+                                           'leisure:sports_centre',
+                                           'leisure:swimming_pool',
+                                           'shop:supermarket',
+                                           'shop:convenience',
+                                           'amenity:restaurant',
+                                           'amenity:fast_food',
+                                           'amenity:cafe',
+                                           'tourism:guest_house',
+                                           'amenity:pharmacy',
+                                           'amenity:fuel',
+                                           'natural:peak',
+                                           'waterway:waterfall',
+                                           'natural:wood',
+                                           'natural:water',
+                                           'landuse:forest',
+                                           'landuse:cemetery',
+                                           'landuse:allotments',
+                                           'landuse:farmyard',
+                                           'railway:rail',
+                                           'waterway:canal',
+                                           'waterway:river',
+                                           'waterway:stream',
+                                           'shop:bicycle',
+                                           'shop:clothes',
+                                           'shop:hairdresser',
+                                           'shop:doityourself',
+                                           'shop:estate_agent',
+                                           'shop:car',
+                                           'shop:garden_centre',
+                                           'shop:car_repair',
+                                           'shop:newsagent',
+                                           'shop:bakery',
+                                           'shop:furniture',
+                                           'shop:butcher',
+                                           'shop:apparel',
+                                           'shop:electronics',
+                                           'shop:department_store',
+                                           'shop:books',
+                                           'shop:yes',
+                                           'shop:outdoor',
+                                           'shop:mall',
+                                           'shop:florist',
+                                           'shop:charity',
+                                           'shop:hardware',
+                                           'shop:laundry',
+                                           'shop:shoes',
+                                           'shop:beverages',
+                                           'shop:dry_cleaning',
+                                           'shop:carpet',
+                                           'shop:computer',
+                                           'shop:alcohol',
+                                           'shop:optician',
+                                           'shop:chemist',
+                                           'shop:gallery',
+                                           'shop:mobile_phone',
+                                           'shop:sports',
+                                           'shop:jewelry',
+                                           'shop:pet',
+                                           'shop:beauty',
+                                           'shop:stationery',
+                                           'shop:shopping_centre',
+                                           'shop:general',
+                                           'shop:electrical',
+                                           'shop:toys',
+                                           'shop:jeweller',
+                                           'shop:betting',
+                                           'shop:household',
+                                           'shop:travel_agency',
+                                           'shop:hifi',
+                                           'amenity:shop',
+                                           'tourism:information',
+                                           'place:house',
+                                           'place:house_name',
+                                           'place:house_number',
+                                           'place:country_code',
+                                           'leisure:pitch',
+                                           'highway:unsurfaced',
+                                           'historic:ruins',
+                                           'amenity:college',
+                                           'historic:monument',
+                                           'railway:subway',
+                                           'historic:memorial',
+                                           'leisure:nature_reserve',
+                                           'leisure:common',
+                                           'waterway:lock_gate',
+                                           'natural:fell',
+                                           'amenity:nightclub',
+                                           'highway:path',
+                                           'leisure:garden',
+                                           'landuse:reservoir',
+                                           'leisure:playground',
+                                           'leisure:stadium',
+                                           'historic:mine',
+                                           'natural:cliff',
+                                           'tourism:caravan_site',
+                                           'amenity:bus_station',
+                                           'amenity:kindergarten',
+                                           'highway:construction',
+                                           'amenity:atm',
+                                           'amenity:emergency_phone',
+                                           'waterway:lock',
+                                           'waterway:riverbank',
+                                           'natural:coastline',
+                                           'tourism:viewpoint',
+                                           'tourism:hostel',
+                                           'tourism:bed_and_breakfast',
+                                           'railway:halt',
+                                           'railway:platform',
+                                           'railway:tram',
+                                           'amenity:courthouse',
+                                           'amenity:recycling',
+                                           'amenity:dentist',
+                                           'natural:beach',
+                                           'place:moor',
+                                           'amenity:grave_yard',
+                                           'waterway:drain',
+                                           'landuse:grass',
+                                           'landuse:village_green',
+                                           'natural:bay',
+                                           'railway:tram_stop',
+                                           'leisure:marina',
+                                           'highway:stile',
+                                           'natural:moor',
+                                           'railway:light_rail',
+                                           'railway:narrow_gauge',
+                                           'natural:land',
+                                           'amenity:village_hall',
+                                           'waterway:dock',
+                                           'amenity:veterinary',
+                                           'landuse:brownfield',
+                                           'leisure:track',
+                                           'railway:historic_station',
+                                           'landuse:construction',
+                                           'amenity:prison',
+                                           'landuse:quarry',
+                                           'amenity:telephone',
+                                           'highway:traffic_signals',
+                                           'natural:heath',
+                                           'historic:house',
+                                           'amenity:social_club',
+                                           'landuse:military',
+                                           'amenity:health_centre',
+                                           'historic:building',
+                                           'amenity:clinic',
+                                           'highway:services',
+                                           'amenity:ferry_terminal',
+                                           'natural:marsh',
+                                           'natural:hill',
+                                           'highway:raceway',
+                                           'amenity:taxi',
+                                           'amenity:take_away',
+                                           'amenity:car_rental',
+                                           'place:islet',
+                                           'amenity:nursery',
+                                           'amenity:nursing_home',
+                                           'amenity:toilets',
+                                           'amenity:hall',
+                                           'waterway:boatyard',
+                                           'highway:mini_roundabout',
+                                           'historic:manor',
+                                           'tourism:chalet',
+                                           'amenity:bicycle_parking',
+                                           'amenity:hotel',
+                                           'waterway:weir',
+                                           'natural:wetland',
+                                           'natural:cave_entrance',
+                                           'amenity:crematorium',
+                                           'tourism:picnic_site',
+                                           'landuse:wood',
+                                           'landuse:basin',
+                                           'natural:tree',
+                                           'leisure:slipway',
+                                           'landuse:meadow',
+                                           'landuse:piste',
+                                           'amenity:care_home',
+                                           'amenity:club',
+                                           'amenity:medical_centre',
+                                           'historic:roman_road',
+                                           'historic:fort',
+                                           'railway:subway_entrance',
+                                           'historic:yes',
+                                           'highway:gate',
+                                           'leisure:fishing',
+                                           'historic:museum',
+                                           'amenity:car_wash',
+                                           'railway:level_crossing',
+                                           'leisure:bird_hide',
+                                           'natural:headland',
+                                           'tourism:apartments',
+                                           'amenity:shopping',
+                                           'natural:scrub',
+                                           'natural:fen',
+                                           'building:yes',
+                                           'mountain_pass:yes',
+                                           'amenity:parking',
+                                           'highway:bus_stop',
+                                           'place:postcode',
+                                           'amenity:post_box',
+                                           'place:houses',
+                                           'railway:preserved',
+                                           'waterway:derelict_canal',
+                                           'amenity:dead_pub',
+                                           'railway:disused_station',
+                                           'railway:abandoned',
+                                           'railway:disused'
+                ));
+    }
+
+    $sClassPlace = $aPlace['class'].':'.$aPlace['type'];
+
+    return $aWithImportance[$sClassPlace] ?? null;
+}
--- a/lib-php/DB.php
+++ b/lib-php/DB.php
@@ -0,0 +1,360 @@
+<?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */
+
+namespace Nominatim;
+
+require_once(CONST_LibDir.'/DatabaseError.php');
+
+/**
+ * Uses PDO to access the database specified in the CONST_Database_DSN
+ * setting.
+ */
+class DB
+{
+    protected $connection;
+
+    public function __construct($sDSN = null)
+    {
+        $this->sDSN = $sDSN ?? getSetting('DATABASE_DSN');
+    }
+
+    public function connect($bNew = false, $bPersistent = true)
+    {
+        if (isset($this->connection) && !$bNew) {
+            return true;
+        }
+        $aConnOptions = array(
+                         \PDO::ATTR_ERRMODE            => \PDO::ERRMODE_EXCEPTION,
+                         \PDO::ATTR_DEFAULT_FETCH_MODE => \PDO::FETCH_ASSOC,
+                         \PDO::ATTR_PERSISTENT         => $bPersistent
+        );
+
+        // https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
+        try {
+            $conn = new \PDO($this->sDSN, null, null, $aConnOptions);
+        } catch (\PDOException $e) {
+            $sMsg = 'Failed to establish database connection:' . $e->getMessage();
+            throw new \Nominatim\DatabaseError($sMsg, 500, null, $e->getMessage());
+        }
+
+        $conn->exec("SET DateStyle TO 'sql,european'");
+        $conn->exec("SET client_encoding TO 'utf-8'");
+        // Disable JIT and parallel workers. They interfere badly with search SQL.
+        $conn->exec("UPDATE pg_settings SET setting = -1 WHERE name = 'jit_above_cost'");
+        $conn->exec("UPDATE pg_settings SET setting = 0 WHERE name = 'max_parallel_workers_per_gather'");
+        $iMaxExecution = ini_get('max_execution_time');
+        if ($iMaxExecution > 0) {
+            $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds
+        }
+
+        $this->connection = $conn;
+        return true;
+    }
+
+    // returns the number of rows that were modified or deleted by the SQL
+    // statement. If no rows were affected returns 0.
+    public function exec($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
+    {
+        $val = null;
+        try {
+            if (isset($aInputVars)) {
+                $stmt = $this->connection->prepare($sSQL);
+                $stmt->execute($aInputVars);
+            } else {
+                $val = $this->connection->exec($sSQL);
+            }
+        } catch (\PDOException $e) {
+            throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
+        }
+        return $val;
+    }
+
+    /**
+     * Executes query. Returns first row as array.
+     * Returns false if no result found.
+     *
+     * @param string  $sSQL
+     *
+     * @return array[]
+     */
+    public function getRow($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
+    {
+        try {
+            $stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
+            $row = $stmt->fetch();
+        } catch (\PDOException $e) {
+            throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
+        }
+        return $row;
+    }
+
+    /**
+     * Executes query. Returns first value of first result.
+     * Returns false if no results found.
+     *
+     * @param string  $sSQL
+     *
+     * @return array[]
+     */
+    public function getOne($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
+    {
+        try {
+            $stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
+            $row = $stmt->fetch(\PDO::FETCH_NUM);
+            if ($row === false) {
+                return false;
+            }
+        } catch (\PDOException $e) {
+            throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
+        }
+        return $row[0];
+    }
+
+    /**
+     * Executes query. Returns array of results (arrays).
+     * Returns empty array if no results found.
+     *
+     * @param string  $sSQL
+     *
+     * @return array[]
+     */
+    public function getAll($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
+    {
+        try {
+            $stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
+            $rows = $stmt->fetchAll();
+        } catch (\PDOException $e) {
+            throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
+        }
+        return $rows;
+    }
+
+    /**
+     * Executes query. Returns array of the first value of each result.
+     * Returns empty array if no results found.
+     *
+     * @param string  $sSQL
+     *
+     * @return array[]
+     */
+    public function getCol($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
+    {
+        $aVals = array();
+        try {
+            $stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
+
+            while (($val = $stmt->fetchColumn(0)) !== false) { // returns first column or false
+                $aVals[] = $val;
+            }
+        } catch (\PDOException $e) {
+            throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
+        }
+        return $aVals;
+    }
+
+    /**
+     * Executes query. Returns associate array mapping first value to second value of each result.
+     * Returns empty array if no results found.
+     *
+     * @param string  $sSQL
+     *
+     * @return array[]
+     */
+    public function getAssoc($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
+    {
+        try {
+            $stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage);
+
+            $aList = array();
+            while ($aRow = $stmt->fetch(\PDO::FETCH_NUM)) {
+                $aList[$aRow[0]] = $aRow[1];
+            }
+        } catch (\PDOException $e) {
+            throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
+        }
+        return $aList;
+    }
+
+    /**
+     * Executes query. Returns a PDO statement to iterate over.
+     *
+     * @param string  $sSQL
+     *
+     * @return PDOStatement
+     */
+    public function getQueryStatement($sSQL, $aInputVars = null, $sErrMessage = 'Database query failed')
+    {
+        try {
+            if (isset($aInputVars)) {
+                $stmt = $this->connection->prepare($sSQL);
+                $stmt->execute($aInputVars);
+            } else {
+                $stmt = $this->connection->query($sSQL);
+            }
+        } catch (\PDOException $e) {
+            throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL);
+        }
+        return $stmt;
+    }
+
+    /**
+     * St. John's Way => 'St. John\'s Way'
+     *
+     * @param string  $sVal  Text to be quoted.
+     *
+     * @return string
+     */
+    public function getDBQuoted($sVal)
+    {
+        return $this->connection->quote($sVal);
+    }
+
+    /**
+     * Like getDBQuoted, but takes an array.
+     *
+     * @param array  $aVals  List of text to be quoted.
+     *
+     * @return array[]
+     */
+    public function getDBQuotedList($aVals)
+    {
+        return array_map(function ($sVal) {
+            return $this->getDBQuoted($sVal);
+        }, $aVals);
+    }
+
+    /**
+     * [1,2,'b'] => 'ARRAY[1,2,'b']''
+     *
+     * @param array  $aVals  List of text to be quoted.
+     *
+     * @return string
+     */
+    public function getArraySQL($a)
+    {
+        return 'ARRAY['.join(',', $a).']';
+    }
+
+    /**
+     * Check if a table exists in the database. Returns true if it does.
+     *
+     * @param string  $sTableName
+     *
+     * @return boolean
+     */
+    public function tableExists($sTableName)
+    {
+        $sSQL = 'SELECT count(*) FROM pg_tables WHERE tablename = :tablename';
+        return ($this->getOne($sSQL, array(':tablename' => $sTableName)) == 1);
+    }
+
+    /**
+     * Deletes a table. Returns true if deleted or didn't exist.
+     *
+     * @param string  $sTableName
+     *
+     * @return boolean
+     */
+    public function deleteTable($sTableName)
+    {
+        return $this->exec('DROP TABLE IF EXISTS '.$sTableName.' CASCADE') == 0;
+    }
+
+    /**
+     * Tries to connect to the database but on failure doesn't throw an exception.
+     *
+     * @return boolean
+     */
+    public function checkConnection()
+    {
+        $bExists = true;
+        try {
+            $this->connect(true);
+        } catch (\Nominatim\DatabaseError $e) {
+            $bExists = false;
+        }
+        return $bExists;
+    }
+
+    /**
+     * e.g. 9.6, 10, 11.2
+     *
+     * @return float
+     */
+    public function getPostgresVersion()
+    {
+        $sVersionString = $this->getOne('SHOW server_version_num');
+        preg_match('#([0-9]?[0-9])([0-9][0-9])[0-9][0-9]#', $sVersionString, $aMatches);
+        return (float) ($aMatches[1].'.'.$aMatches[2]);
+    }
+
+    /**
+     * e.g. 2, 2.2
+     *
+     * @return float
+     */
+    public function getPostgisVersion()
+    {
+        $sVersionString = $this->getOne('select postgis_lib_version()');
+        preg_match('#^([0-9]+)[.]([0-9]+)[.]#', $sVersionString, $aMatches);
+        return (float) ($aMatches[1].'.'.$aMatches[2]);
+    }
+
+    /**
+     * Returns an associate array of postgresql database connection settings. Keys can
+     * be 'database', 'hostspec', 'port', 'username', 'password'.
+     * Returns empty array on failure, thus check if at least 'database' is set.
+     *
+     * @return array[]
+     */
+    public static function parseDSN($sDSN)
+    {
+        // https://secure.php.net/manual/en/ref.pdo-pgsql.connection.php
+        $aInfo = array();
+        if (preg_match('/^pgsql:(.+)$/', $sDSN, $aMatches)) {
+            foreach (explode(';', $aMatches[1]) as $sKeyVal) {
+                list($sKey, $sVal) = explode('=', $sKeyVal, 2);
+                if ($sKey == 'host') {
+                    $sKey = 'hostspec';
+                } elseif ($sKey == 'dbname') {
+                    $sKey = 'database';
+                } elseif ($sKey == 'user') {
+                    $sKey = 'username';
+                }
+                $aInfo[$sKey] = $sVal;
+            }
+        }
+        return $aInfo;
+    }
+
+    /**
+     * Takes an array of settings and return the DNS string. Key names can be
+     * 'database', 'hostspec', 'port', 'username', 'password' but aliases
+     * 'dbname', 'host' and 'user' are also supported.
+     *
+     * @return string
+     *
+     */
+    public static function generateDSN($aInfo)
+    {
+        $sDSN = sprintf(
+            'pgsql:host=%s;port=%s;dbname=%s;user=%s;password=%s;',
+            $aInfo['host'] ?? $aInfo['hostspec'] ?? '',
+            $aInfo['port'] ?? '',
+            $aInfo['dbname'] ?? $aInfo['database'] ?? '',
+            $aInfo['user'] ?? '',
+            $aInfo['password'] ?? ''
+        );
+        $sDSN = preg_replace('/\b\w+=;/', '', $sDSN);
+        $sDSN = preg_replace('/;\Z/', '', $sDSN);
+
+        return $sDSN;
+    }
+}
--- a/lib-php/DatabaseError.php
+++ b/lib-php/DatabaseError.php
@@ -0,0 +1,42 @@
+<?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */
+
+namespace Nominatim;
+
+class DatabaseError extends \Exception
+{
+
+    public function __construct($message, $code, $previous, $oPDOErr, $sSql = null)
+    {
+        parent::__construct($message, $code, $previous);
+        // https://secure.php.net/manual/en/class.pdoexception.php
+        $this->oPDOErr = $oPDOErr;
+        $this->sSql = $sSql;
+    }
+
+    public function __toString()
+    {
+        return __CLASS__ . ": [{$this->code}]: {$this->message}\n";
+    }
+
+    public function getSqlError()
+    {
+        return $this->oPDOErr->getMessage();
+    }
+
+    public function getSqlDebugDump()
+    {
+        if (CONST_Debug) {
+            return var_export($this->oPDOErr, true);
+        } else {
+            return $this->sSql;
+        }
+    }
+}
--- a/lib-php/DebugHtml.php
+++ b/lib-php/DebugHtml.php
@@ -1,4 +1,12 @@
 <?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */

 namespace Nominatim;

@@ -78,7 +86,7 @@ class Debug
        echo '<th>Address Tokens</th><th>Address Not</th>';
        echo '<th>country</th><th>operator</th>';
        echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
-        foreach ($aSearches as $iRank => $aRankedSet) {
+        foreach ($aSearches as $aRankedSet) {
            foreach ($aRankedSet as $aRow) {
                $aRow->dumpAsHtmlTableRow($aWordsIDs);
            }
--- a/Show More
+++ b/Show More