Compare commits

..

108 Commits

Author SHA1 Message Date
Sarah Hoffmann
6c00169666 Merge pull request #3997 from lonvia/fix-postcode-index
Reenable index on centroid column for location_postcodes
2026-02-22 17:20:15 +01:00
Sarah Hoffmann
f0d32501e4 location_postcodes does geometry lookups on centroid 2026-02-22 15:51:38 +01:00
Sarah Hoffmann
3e35d7fe26 Merge pull request #3996 from lonvia/improved-postcode-import
Avoid updates on initial filling of postcode table
2026-02-22 13:12:49 +01:00
Sarah Hoffmann
fff5858b53 add option to force a postcode reimport 2026-02-21 13:03:04 +01:00
Sarah Hoffmann
2507d5a298 avoid updates on initial filling of postcode table 2026-02-20 18:53:48 +01:00
Sarah Hoffmann
af9458a601 Merge pull request #3981 from Itz-Agasta/test
Implement Lazy Loading Search Endpoint
2026-02-18 19:38:05 +01:00
Itz-Agasta
855f451a5f Adds lazy loading for search endpoint availability
Introduces a mechanism to defer the search endpoint's availability check until the first request, improving startup robustness. If the search table is unavailable due to DB issues, the endpoint now responds with a 503 or 404 as appropriate, and retries the check on subsequent requests. This ensures that downtime or partial DB failures no longer prevent the API from initializing or serving reverse-only mode.
2026-02-18 21:46:55 +05:30
Sarah Hoffmann
bf17f1d01a Merge pull request #3991 from lonvia/interpolation-on-addresses
Add support for addr:interpolation on housenumbers
2026-02-18 14:25:38 +01:00
Sarah Hoffmann
9ac56c2078 add support for expanding interpolations on housenumbers 2026-02-18 11:52:21 +01:00
Sarah Hoffmann
fbe0be9301 Merge pull request #3923 from kad-link/ci/windows-smoke
CI: add dev-only Windows smoke workflow
2026-02-16 09:27:23 +01:00
Sarah Hoffmann
0249cd54da Merge pull request #3989 from lonvia/rework-misc-tests
More production SQL use in unit tests
2026-02-16 09:20:37 +01:00
Sarah Hoffmann
52b5337f36 Merge pull request #3988 from jayaddison/pr-3957-followup/empty-name-field
Indexing: fixup: add presence check for hstore 'name' field
2026-02-16 09:17:36 +01:00
James Addison
53e8334206 Indexing: fixup: add presence check for hstore name field 2026-02-16 00:01:57 +00:00
Sarah Hoffmann
c31abf58d0 make database import unit tests against real SQL 2026-02-15 21:43:17 +01:00
Sarah Hoffmann
d0bd42298e use original tables for database check tests 2026-02-15 21:43:17 +01:00
Sarah Hoffmann
d1b0bcaea7 Merge pull request #3987 from lonvia/rework-postcode-tests
Rework postcode unit tests
2026-02-15 21:42:54 +01:00
Sarah Hoffmann
c3e8fa8c43 replace postcode mock with standard postcode table fixtures 2026-02-15 16:48:31 +01:00
Sri CHaRan
24ba9651ba ci/windows: install osm2pgsql binary and enable full unit tests suite 2026-02-13 22:01:39 +05:30
Sri CHaRan
bf5ef0140a ci/windows: enable full python unit test setup for windows 2026-02-13 21:47:14 +05:30
Sri CHaRan
238f3dd1d9 ci/windows: add Postgresql setup action to tests 2026-02-13 21:47:14 +05:30
Sri Charan Chittineni
abd7c302f8 implement stage 1 : python unit tests 2026-02-13 21:47:14 +05:30
Sri CHaRan
2197236872 Add experimental Windows CI workflow 2026-02-13 21:47:14 +05:30
Sarah Hoffmann
2ddb19c0b0 Merge pull request #3986 from lonvia/rework-tiger-tests
Rework unit tests for import of tiger data
2026-02-13 14:11:04 +01:00
Sarah Hoffmann
3f14f89bdf Merge pull request #3985 from lonvia/rework-indexing-test
Rework unit tests for indexing
2026-02-13 12:06:51 +01:00
Sarah Hoffmann
8ed7a3875a rework tiger unit tests to use production pgsql functions 2026-02-13 11:16:40 +01:00
Sarah Hoffmann
70b9140f13 pass reverse-only as a separate parameter
... instead of hacking the internal structures of SQLPreprocessor
2026-02-13 11:12:23 +01:00
Sarah Hoffmann
3285948130 rewrite indexing tests to use standard table fixtures 2026-02-13 11:03:18 +01:00
Sarah Hoffmann
9d0732a941 add fixtures for postcode, interpolation table creation and filling 2026-02-13 10:57:59 +01:00
Sarah Hoffmann
5314e6c881 Merge pull request #3984 from lonvia/avoid-custom-table-definition-in-tests
Reuse table creation SQL in unittest
2026-02-13 09:16:44 +01:00
Sarah Hoffmann
2750d66470 use load_sql fixture instead of explicit SQLPreprocessor 2026-02-12 22:42:58 +01:00
Sarah Hoffmann
0d423ad7a7 reorganise fixtures for placex table
Now follows the same pattern as fixtures for other tables and
uses the production SQL for table creation.
2026-02-12 22:14:15 +01:00
Sarah Hoffmann
dd332caa4d simplify property test table implementation 2026-02-12 21:15:03 +01:00
Sarah Hoffmann
d691cfc35d switch table definitions in conftest to use production SQL 2026-02-12 21:12:10 +01:00
Sarah Hoffmann
d274a5aecc add fixtures for country table 2026-02-12 20:55:59 +01:00
Sarah Hoffmann
35a023d133 add function for inserting data to testing cursor 2026-02-12 20:44:04 +01:00
Sarah Hoffmann
79682a94ce use better SQL quoting in test cursor implementation 2026-02-12 20:44:04 +01:00
Sarah Hoffmann
aa42dc8a93 fix potential cancelling race with full queue 2026-02-12 20:44:04 +01:00
Sarah Hoffmann
29fcd0b763 Merge pull request #3982 from lonvia/split-table-creation
Split up table creation SQL
2026-02-12 17:42:45 +01:00
Sarah Hoffmann
2237ce7124 split up table creation SQL into separate files 2026-02-12 16:36:10 +01:00
Sarah Hoffmann
58295e0643 remove unused indexes and sequences 2026-02-12 16:33:45 +01:00
Sarah Hoffmann
fed64cda5a Merge pull request #3957 from jayaddison/issue-2714/linked-places-default-language
Indexing: add default-language placename from linked places
2026-02-11 15:08:18 +01:00
Sarah Hoffmann
b995803c66 Merge pull request #3979 from jayaddison/issue-2714-prep/extract-rank-zero-specialcasing
Indexer: relocate zero-ranked-address indexing
2026-02-11 15:05:28 +01:00
Sarah Hoffmann
986d303c95 Merge pull request #3980 from lonvia/security-smells
Improve SQL query assembly
2026-02-10 15:26:34 +01:00
James Addison
310d6e3c92 Indexer: relocate zero-ranked-address indexing 2026-02-10 11:51:18 +00:00
Sarah Hoffmann
7a3ea55f3d ignore tables with odd names in SQLPreprocessor 2026-02-10 11:40:52 +01:00
Sarah Hoffmann
d10d70944d avoid f-strings in SQL creation in tests 2026-02-10 11:39:19 +01:00
Sarah Hoffmann
73590baf15 use psycopg.sql for SQL building in tokenizer 2026-02-10 11:39:19 +01:00
Sarah Hoffmann
e17d0cb5cf only allow alphanumeric and dash in DATABASE_WEBUSER
This variable is used a lot in raw SQL. Avoid injection issues.
2026-02-10 11:39:17 +01:00
Sarah Hoffmann
7a62c7d812 sanity check class names before inserting into classtype tables
The subsequent INSERT is done on an unqouted table name, making in
theory an SQL injection through an OSM value possible. In practise
this cannot happen because we check for the existance of the table.
During the creation of the classtype tables there is a sanity
check in place to disallow any table names that consist of anything
other than alphanumeric characters.
2026-02-10 11:38:26 +01:00
Sarah Hoffmann
615804b1b3 Merge pull request #3978 from jayaddison/issue-2714-prep/index-boundaries-method-signature-nitpick
Refactor: add default params to Indexer.index_boundaries
2026-02-10 09:45:29 +01:00
Sarah Hoffmann
79bbdfd55c Merge pull request #3975 from kad-link/fix/utf8-encoding-clean
Fix: Enforce explicit UTF-8 encoding in file I/O
2026-02-10 09:32:06 +01:00
James Addison
509f59b193 Refactor: add default params to index_boundaries 2026-02-09 21:36:30 +00:00
Sri CHaRan
f84b279540 fix: add utf-8 encoding in read-write files 2026-02-10 00:38:40 +05:30
James Addison
e62811cf97 Indexing: invert boolean logic to factor-out empty ELSE clause
Relates-to commit fa2a789e27.
2026-02-09 18:33:02 +00:00
Sarah Hoffmann
cd2f6e458b Merge pull request #3970 from lonvia/improve-dev-docs
Some minor improvement to developer docs
2026-02-05 21:57:54 +01:00
James Addison
fa2a789e27 Indexing: manage the case where no default-language exists
Relates-to commit 6fee784c9f.
2026-02-05 20:48:01 +00:00
Sarah Hoffmann
fc49a77e70 Merge pull request #3960 from jayaddison/tests/has-pending-monkeypatch-robustness
Tests: parameter-agnostic 'Indexer.has_pending' monkeypatching
2026-02-05 21:05:57 +01:00
Sarah Hoffmann
28baa34bdc point to developer docs from CONTRIBUTING.md 2026-02-05 20:51:41 +01:00
Sarah Hoffmann
151a5b64a8 docs: fix list of packages for development install 2026-02-05 20:45:18 +01:00
James Addison
6fee784c9f Indexing: add default-language placename from linked places 2026-02-05 15:19:48 +00:00
James Addison
3db7c6d804 Tests: parameter-agnostic has_pending monkeypatching
Instead of relying on runtime parameter compatibility between
the patched `has_pending` method and `list.pop`, use a proxy
lambda function that accepts arbitrary keyword params.
2026-02-05 15:09:09 +00:00
Sarah Hoffmann
b2f868d2fc Merge pull request #3966 from remo-lab/fix/sql-injection-truncate
Fix SQL injection in truncate_data_tables
2026-02-05 14:44:55 +01:00
remo-lab
ae7301921a Fix SQL injection in truncate_data_tables
Signed-off-by: remo-lab <remopanda7@gmail.com>
2026-02-05 17:04:10 +05:30
Sarah Hoffmann
8188689765 Merge pull request #3962 from lonvia/docs-deploy
Docs: switch deployment to use gunicorn's asgi/uwsgi support
2026-02-03 11:45:57 +01:00
Sarah Hoffmann
135453e463 docs: switch deployment to use gunicorn's asgi/uwsgi support 2026-02-03 09:08:06 +01:00
Sarah Hoffmann
cc9c8963f3 Merge pull request #3949 from Itz-Agasta/try
Feat: Add admin function for granting access to read-only user
2026-02-02 09:53:24 +01:00
Sarah Hoffmann
c882718355 Merge pull request #3959 from Aditya30ag/fix/readme-nominatim-api-module-path
Fix README: update Nominatim API server module path
2026-02-02 09:12:24 +01:00
Aditya30ag
3f02a4e33b Fix README: update Nominatim API server module path 2026-02-02 11:43:03 +05:30
Sarah Hoffmann
1cf5464d3a Merge pull request #3955 from AmmarYasser455/fix/typos
docs: fix multiple typos in documentation and source code
2026-02-01 10:05:34 +01:00
Sarah Hoffmann
dcbfa2a3d0 Merge pull request #3952 from jayaddison/pr-3687-followup/boundary-admin-level-for-linkage
Tests: resolve an issue in the place-linkage name expansion test case
2026-02-01 10:05:16 +01:00
James Addison
5cdc6724de Tests: set boundary admin level to enable linking 2026-01-31 22:00:23 +00:00
Itz-Agasta
45972811e3 Preserve import error tables during freeze
- Remove 'import_polygon_%' from UPDATE_TABLES to keep import_polygon_error
and import_polygon_delete tables in frozen databases.

- These tables contain permanent import error tracking data and should not
be deleted during freeze. The ro-access grant system expects them to exist
in all database states.
2026-01-31 22:50:18 +05:30
Itz-Agasta
e021f558bf Restore grants for dynamic tables in tokenizer, migration, and tiger import 2026-01-30 20:43:57 +05:30
AmmarYasser455
fcc5ce3f92 docs: fix multiple typos in documentation and source code 2026-01-30 12:13:23 +02:00
Sarah Hoffmann
9a979b7429 Merge pull request #3951 from Itz-Agasta/cli
Feat: Adds layer filtering option to search cli command
2026-01-29 09:58:06 +01:00
Itz-Agasta
6ad87db1eb Updates layer selection to allow optional default
- Modifies layer argument handling to permit no default layers appropriate.
- Update the help text for the layer parameter in the reverse command
2026-01-29 11:33:21 +05:30
Sarah Hoffmann
f4820bed0e Merge pull request #3950 from jayaddison/fixup/sql-debug-output-escaping
Fixup: add single-quote escaping within debug message
2026-01-28 20:30:11 +01:00
Itz-Agasta
bf6eb01d68 Adds layer filtering option to search command
Introduces a cli argument to restrict search results
to specified data layers, enabling more targeted queries.
2026-01-28 12:16:43 +05:30
James Addison
f07676a376 Fixup: add single-quote escaping within debug message 2026-01-28 01:27:53 +00:00
Itz-Agasta
5e2ce10fe0 Adds mock grants SQL file for import test 2026-01-27 17:55:51 +05:30
Itz-Agasta
58cae70596 Adds option to grant web user read-only DB access
Introduces a command-line flag to grant read-only access to the web user for all tables, improving ease of permissions management during refresh operations.
2026-01-27 17:54:10 +05:30
Itz-Agasta
bf0ee6685b Grants read-only access after import
Adds execution of grant statements to provide read-only privileges
for the web user following table creation or via a dedicated function.
Facilitates easier post-import permission management.
2026-01-27 17:53:25 +05:30
Itz-Agasta
ff1f1b06d9 Moves db grant statements to dedicated script
Centralizes all read-only access grants into a single SQL script, ensuring permissions are managed in one place.
2026-01-27 17:49:51 +05:30
Sarah Hoffmann
67ecf5f6a0 Merge pull request #3943 from Itz-Agasta/test_fix
Tests: Replace eval() with ast.literal_eval() for safer parsing
2026-01-25 10:10:15 +01:00
Itz-Agasta
e77a4c2f35 Switch to ast.literal_eval for dict parsing
Due to  some test data in the BDD feature files includes Python raw strings and escape sequences that standard json.loads() cannot parse switching to safer Python literal evaluation
for converting string representations of dictionaries.
2026-01-24 15:32:47 +05:30
Itz-Agasta
9fa980bca2 Replaces eval with json.loads for safer dict parsing
Switches from eval to json.loads when parsing string representations
of dictionaries to  prevent arbitrary code
execution.
2026-01-24 15:32:47 +05:30
Sarah Hoffmann
fe773c12b2 Merge pull request #3946 from lonvia/enable-entrances-for-reverse
Enable entrance lookup for reverse and lookup
2026-01-23 22:10:43 +01:00
Sarah Hoffmann
cc96912580 Merge pull request #3906 from AyushDharDubey/fix/issue_2463-Use-search_name-table-for-TIGER-data-imports-on-'dropped'-databases
Use `search_name` as fallback for TIGER imports when update tables are dropped
2026-01-23 20:52:40 +01:00
Sarah Hoffmann
77a3ecd72d Merge pull request #3945 from lonvia/fix-starlette-tests
Update Starlette tests to using their TestClient
2026-01-23 20:45:15 +01:00
Sarah Hoffmann
6a6a064ef7 enable entrances for reverse and lookup 2026-01-23 17:38:47 +01:00
Sarah Hoffmann
35b42ad9ce update Starlette tests to using their TestClient 2026-01-23 16:28:13 +01:00
Sri Charan Chittineni
c4dc2c862e fix mypy typing for Starlette state object (#3944) 2026-01-22 13:21:34 +01:00
Sarah Hoffmann
7e44256f4a Merge pull request #3939 from lonvia/more-table-constraints
Add NOT NULL and UNIQUE contraints on tables
2026-01-14 15:04:45 +01:00
Ayush Dhar Dubey
eefd0efa59 update test frozen db: new tiger import mechanism 2026-01-09 17:47:07 +05:30
Ayush Dhar Dubey
2698382552 permit import of tiger after freeze 2026-01-09 17:35:01 +05:30
Ayush Dhar Dubey
954771a42d Add fallback search mechanism for dropped databases lookup 2026-01-09 17:35:01 +05:30
Sarah Hoffmann
e47601754a do not attempt to delete old data for newly created placex entries 2026-01-07 17:08:28 +01:00
Sarah Hoffmann
2cdf2db184 add NOT NULL and UNIQUE constraints where possible 2026-01-07 15:46:05 +01:00
Sarah Hoffmann
5200e11f33 ignore countries without geometry or country code for location_area 2026-01-07 11:43:32 +01:00
Sarah Hoffmann
ba1fc5a5b8 do not insert entries with empty name into search name 2026-01-07 11:27:55 +01:00
Sarah Hoffmann
d35a71c123 ensure correct indexed_status transitions 2026-01-07 11:12:35 +01:00
Sarah Hoffmann
e31862b7b5 make sure that importance is always set to a non-null value
Secondary importance might return invalid values in some cases.
2026-01-07 10:29:45 +01:00
Sarah Hoffmann
9ac5e0256d make sure array_merge() never returns null 2026-01-07 10:22:03 +01:00
Sarah Hoffmann
a4a2176ded immediately terminate indexing when a task catches an exception 2026-01-07 09:58:40 +01:00
Sarah Hoffmann
f30fcdcd9d BDD: make sure randomly generated names always contain a letter 2026-01-07 09:58:40 +01:00
otbutz
77b8e76be6 Add PR template (#3934) 2026-01-05 17:42:35 +01:00
Sarah Hoffmann
20a333dd9b Merge pull request #3930 from lonvia/remove-new-query-log-table
Remove unused new_query_log table
2026-01-02 09:58:05 +01:00
Sarah Hoffmann
084e1b8177 remove unused new_query_log table 2026-01-01 20:30:37 +01:00
89 changed files with 2175 additions and 1472 deletions

12
.github/PULL_REQUEST_TEMPLATE.md vendored Normal file
View File

@@ -0,0 +1,12 @@
## Summary
<!-- Describe the purpose of your pull request and, if present, link to existing issues. -->
## AI usage
<!-- Please list where and to what extent AI was used. -->
## Contributor guidelines (mandatory)
<!-- We only accept pull requests that follow our guidelines. A deliberate violation may result in a ban. -->
- [ ] I have adhered to the [coding style](https://github.com/osm-search/Nominatim/blob/master/CONTRIBUTING.md#coding-style)
- [ ] I have [tested](https://github.com/osm-search/Nominatim/blob/master/CONTRIBUTING.md#testing) the proposed changes
- [ ] I have [disclosed](https://github.com/osm-search/Nominatim/blob/master/CONTRIBUTING.md#using-ai-assisted-code-generators) above any use of AI to generate code, documentation, or the pull request description

View File

@@ -0,0 +1,95 @@
name: 'Setup Postgresql and Postgis on Windows'
description: 'Installs PostgreSQL and PostGIS for Windows and configures it for CI tests'
inputs:
postgresql-version:
description: 'Version of PostgreSQL to install'
required: true
runs:
using: "composite"
steps:
- name: Set up PostgreSQL variables
shell: pwsh
run: |
$version = "${{ inputs.postgresql-version }}"
$root = "C:\Program Files\PostgreSQL\$version"
$bin = "$root\bin"
echo "PGROOT=$root" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
echo "PGBIN=$bin" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
echo "$bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Decide Postgis version (Windows)
id: postgis-ver
shell: pwsh
run: |
echo "PowerShell version: ${PSVersionTable.PSVersion}"
$PG_VERSION = Split-Path $env:PGROOT -Leaf
$postgis_page = "https://download.osgeo.org/postgis/windows/pg$PG_VERSION"
echo "Detecting PostGIS version from $postgis_page for PostgreSQL $PG_VERSION"
$pgis_bundle = (Invoke-WebRequest -Uri $postgis_page -ErrorAction Stop).Links.Where({$_.href -match "^postgis.*zip$"}).href
if (!$pgis_bundle) {
Write-Error "Could not find latest PostGIS version in $postgis_page that would match ^postgis.*zip$ pattern"
exit 1
}
$pgis_bundle = [IO.Path]::ChangeExtension($pgis_bundle, [NullString]::Value)
$pgis_bundle_url = "$postgis_page/$pgis_bundle.zip"
Add-Content $env:GITHUB_OUTPUT "postgis_file=$pgis_bundle"
Add-Content $env:GITHUB_OUTPUT "postgis_bundle_url=$pgis_bundle_url"
- uses: actions/cache@v4
with:
path: |
C:/postgis.zip
key: postgis-cache-${{ steps.postgis-ver.outputs.postgis_file }}
- name: Download postgis
shell: pwsh
run: |
if (!(Test-Path "C:\postgis.zip")){(new-object net.webclient).DownloadFile($env:PGIS_BUNDLE_URL, "c:\postgis.zip")}
if (Test-path "c:\postgis_archive"){Remove-Item "c:\postgis_archive" -Recurse -Force}
7z x c:\postgis.zip -oc:\postgis_archive
env:
PGIS_BUNDLE_URL: ${{ steps.postgis-ver.outputs.postgis_bundle_url }}
- name: Install postgis
shell: bash
run: |
echo "Root: $PGROOT, Bin: $PGBIN"
cp -r c:/postgis_archive/postgis-bundle-*/* "$PGROOT"
- name: Start PostgreSQL on Windows
run: |
$pgService = Get-Service -Name postgresql*
Set-Service -InputObject $pgService -Status running -StartupType automatic
Start-Process -FilePath "$env:PGBIN\pg_isready" -Wait -PassThru
shell: pwsh
- name: Adapt postgresql configuration
shell: pwsh
env:
PGPASSWORD: root
run: |
& "$env:PGBIN\psql" -U postgres -d postgres -c "ALTER SYSTEM SET fsync = 'off';"
& "$env:PGBIN\psql" -U postgres -d postgres -c "ALTER SYSTEM SET synchronous_commit = 'off';"
& "$env:PGBIN\psql" -U postgres -d postgres -c "ALTER SYSTEM SET full_page_writes = 'off';"
& "$env:PGBIN\psql" -U postgres -d postgres -c "ALTER SYSTEM SET shared_buffers = '1GB';"
& "$env:PGBIN\psql" -U postgres -d postgres -c "ALTER SYSTEM SET port = 5432;"
Restart-Service -Name postgresql*
Start-Process -FilePath "$env:PGBIN\pg_isready" -Wait -PassThru
- name: Setup database users
shell: pwsh
env:
PGPASSWORD: root
run: |
& "$env:PGBIN\createuser" -U postgres -S www-data
& "$env:PGBIN\createuser" -U postgres -s runner

View File

@@ -1,5 +1,7 @@
name: 'Setup Postgresql and Postgis' name: 'Setup Postgresql and Postgis'
description: 'Installs PostgreSQL and PostGIS and configures it for CI tests'
inputs: inputs:
postgresql-version: postgresql-version:
description: 'Version of PostgreSQL to install' description: 'Version of PostgreSQL to install'

View File

@@ -140,6 +140,65 @@ jobs:
../venv/bin/python -m pytest test/bdd --nominatim-purge ../venv/bin/python -m pytest test/bdd --nominatim-purge
working-directory: Nominatim working-directory: Nominatim
tests-windows:
needs: create-archive
runs-on: windows-latest
steps:
- uses: actions/download-artifact@v4
with:
name: full-source
- name: Unpack Nominatim
run: tar xf nominatim-src.tar.bz2
- uses: ./Nominatim/.github/actions/setup-postgresql-windows
with:
postgresql-version: 17
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.14'
- name: Install Spatialite
run: |
Invoke-WebRequest -Uri "https://www.gaia-gis.it/gaia-sins/windows-bin-amd64/mod_spatialite-5.1.0-win-amd64.7z" -OutFile "spatialite.7z"
7z x spatialite.7z -o"C:\spatialite"
echo "C:\spatialite\mod_spatialite-5.1.0-win-amd64" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
- name: Install osm2pgsql
run: |
Invoke-WebRequest -Uri "https://osm2pgsql.org/download/windows/osm2pgsql-latest-x64.zip" -OutFile "osm2pgsql.zip"
Expand-Archive -Path "osm2pgsql.zip" -DestinationPath "C:\osm2pgsql"
$BinDir = Get-ChildItem -Path "C:\osm2pgsql" -Recurse -Filter "osm2pgsql.exe" | Select-Object -ExpandProperty DirectoryName | Select-Object -First 1
if (-not $BinDir) {
Write-Error "Could not find osm2pgsql.exe"
exit 1
}
echo "$BinDir" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
$FullExePath = Join-Path $BinDir "osm2pgsql.exe"
echo "NOMINATIM_OSM2PGSQL_BINARY=$FullExePath" | Out-File -FilePath $env:GITHUB_ENV -Encoding utf8 -Append
- name: Set UTF-8 encoding
run: |
echo "PYTHONUTF8=1" >> $env:GITHUB_ENV
[System.Console]::OutputEncoding = [System.Text.Encoding]::UTF8
- name: Install PyICU from wheel
run: |
python -m pip install https://github.com/cgohlke/pyicu-build/releases/download/v2.16.0/pyicu-2.16-cp314-cp314-win_amd64.whl
- name: Install test prerequisites
run: |
python -m pip install -U pip
python -m pip install pytest pytest-asyncio "psycopg[binary]!=3.3.0" python-dotenv pyyaml jinja2 psutil sqlalchemy pytest-bdd falcon starlette uvicorn asgi_lifespan aiosqlite osmium mwparserfromhell
- name: Python unit tests
run: |
python -m pytest test/python -k "not (import_osm or run_osm2pgsql)"
working-directory: Nominatim
install: install:
runs-on: ubuntu-latest runs-on: ubuntu-latest
needs: create-archive needs: create-archive

View File

@@ -4,7 +4,7 @@
Bugs can be reported at https://github.com/openstreetmap/Nominatim/issues. Bugs can be reported at https://github.com/openstreetmap/Nominatim/issues.
Please always open a separate issue for each problem. In particular, do Please always open a separate issue for each problem. In particular, do
not add your bugs to closed issues. They may looks similar to you but not add your bugs to closed issues. They may look similar to you but
often are completely different from the maintainer's point of view. often are completely different from the maintainer's point of view.
## Workflow for Pull Requests ## Workflow for Pull Requests
@@ -21,7 +21,7 @@ that you are responsible for your pull requests. You should be prepared
to get change requests because as the maintainers we have to make sure to get change requests because as the maintainers we have to make sure
that your contribution fits well with the rest of the code. Please make that your contribution fits well with the rest of the code. Please make
sure that you have time to react to these comments and amend the code or sure that you have time to react to these comments and amend the code or
engage in a conversion. Do not expect that others will pick up your code, engage in a conversation. Do not expect that others will pick up your code,
it will almost never happen. it will almost never happen.
Please open a separate pull request for each issue you want to address. Please open a separate pull request for each issue you want to address.
@@ -38,10 +38,19 @@ description or in documentation need to
1. clearly mark the AI-generated sections as such, for example, by 1. clearly mark the AI-generated sections as such, for example, by
mentioning all use of AI in the PR description, and mentioning all use of AI in the PR description, and
2. include proof that you have run the generated code on an actual 2. include proof that you have run the generated code on an actual
installation of Nominatim. Adding and excuting tests will not be installation of Nominatim. Adding and executing tests will not be
sufficient. You need to show that the code actually solves the problem sufficient. You need to show that the code actually solves the problem
the PR claims to solve. the PR claims to solve.
## Getting Started with Development
Please see the development section of the Nominatim documentation for
* [an architecture overview](https://nominatim.org/release-docs/develop/develop/overview/)
and backgrounds on some of the algorithms
* [how to set up a development environment](https://nominatim.org/release-docs/develop/develop/Development-Environment/)
* and background on [how tests are organised](https://nominatim.org/release-docs/develop/develop/Testing/)
## Coding style ## Coding style

View File

@@ -10,14 +10,14 @@ Nominatim. Please refer to the documentation of
[Nginx](https://nginx.org/en/docs/) for background information on how [Nginx](https://nginx.org/en/docs/) for background information on how
to configure it. to configure it.
!!! Note
Throughout this page, we assume your Nominatim project directory is
located in `/srv/nominatim-project`. If you have put it somewhere else,
you need to adjust the commands and configuration accordingly.
### Installing the required packages ### Installing the required packages
!!! warning
ASGI support in gunicorn requires at least version 25.0. If you need
to work with an older version of gunicorn, please refer to
[older Nominatim deployment documentation](https://nominatim.org/release-docs/5.2/admin/Deployment-Python/)
to learn how to run gunicorn with uvicorn.
The Nominatim frontend is best run from its own virtual environment. If The Nominatim frontend is best run from its own virtual environment. If
you have already created one for the database backend during the you have already created one for the database backend during the
[installation](Installation.md#building-nominatim), you can use that. Otherwise [installation](Installation.md#building-nominatim), you can use that. Otherwise
@@ -37,23 +37,27 @@ cd Nominatim
``` ```
The recommended way to deploy a Python ASGI application is to run The recommended way to deploy a Python ASGI application is to run
the ASGI runner [uvicorn](https://www.uvicorn.org/) the [gunicorn](https://gunicorn.org/) HTTP server. We use
together with [gunicorn](https://gunicorn.org/) HTTP server. We use
Falcon here as the web framework. Falcon here as the web framework.
Add the necessary packages to your virtual environment: Add the necessary packages to your virtual environment:
``` sh ``` sh
/srv/nominatim-venv/bin/pip install falcon uvicorn gunicorn /srv/nominatim-venv/bin/pip install falcon gunicorn
``` ```
### Setting up Nominatim as a systemd job ### Setting up Nominatim as a systemd job
!!! Note
These instructions assume your Nominatim project directory is
located in `/srv/nominatim-project`. If you have put it somewhere else,
you need to adjust the commands and configuration accordingly.
Next you need to set up the service that runs the Nominatim frontend. This is Next you need to set up the service that runs the Nominatim frontend. This is
easiest done with a systemd job. easiest done with a systemd job.
First you need to tell systemd to create a socket file to be used by First you need to tell systemd to create a socket file to be used by
hunicorn. Create the following file `/etc/systemd/system/nominatim.socket`: gunicorn. Create the following file `/etc/systemd/system/nominatim.socket`:
``` systemd ``` systemd
[Unit] [Unit]
@@ -81,10 +85,8 @@ Type=simple
User=www-data User=www-data
Group=www-data Group=www-data
WorkingDirectory=/srv/nominatim-project WorkingDirectory=/srv/nominatim-project
ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker "nominatim_api.server.falcon.server:run_wsgi()" ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 --worker-class asgi --protocol uwsgi --worker-connections 1000 "nominatim_api.server.falcon.server:run_wsgi()"
ExecReload=/bin/kill -s HUP $MAINPID ExecReload=/bin/kill -s HUP $MAINPID
StandardOutput=append:/var/log/gunicorn-nominatim.log
StandardError=inherit
PrivateTmp=true PrivateTmp=true
TimeoutStopSec=5 TimeoutStopSec=5
KillMode=mixed KillMode=mixed
@@ -96,7 +98,10 @@ WantedBy=multi-user.target
This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
its own Python process using its own Python process using
[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size) [`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
connections to the database to serve requests in parallel. connections to the database to serve requests in parallel. The parameter
`--worker-connections` restricts how many requests gunicorn will queue for
each worker. This can help distribute work better when the server is under
high load.
Make the new services known to systemd and start it: Make the new services known to systemd and start it:
@@ -108,13 +113,15 @@ sudo systemctl enable nominatim.service
sudo systemctl start nominatim.service sudo systemctl start nominatim.service
``` ```
This sets the service up, so that Nominatim is automatically started This sets the service up so that Nominatim is automatically started
on reboot. on reboot.
### Configuring nginx ### Configuring nginx
To make the service available to the world, you need to proxy it through To make the service available to the world, you need to proxy it through
nginx. Add the following definition to the default configuration: nginx. We use the binary uwsgi protocol to speed up communication
between nginx and gunicorn. Add the following definition to the default
configuration:
``` nginx ``` nginx
upstream nominatim_service { upstream nominatim_service {
@@ -129,11 +136,8 @@ server {
index /search; index /search;
location / { location / {
proxy_set_header Host $http_host; uwsgi_pass nominatim_service;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; include uwsgi_params;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_redirect off;
proxy_pass http://nominatim_service;
} }
} }
``` ```

View File

@@ -56,7 +56,7 @@ The easiest way, to handle these Python dependencies is to run your
development from within a virtual environment. development from within a virtual environment.
```sh ```sh
sudo apt install libsqlite3-mod-spatialite osm2pgsql \ sudo apt install build-essential libsqlite3-mod-spatialite osm2pgsql \
postgresql-postgis postgresql-postgis-scripts \ postgresql-postgis postgresql-postgis-scripts \
pkg-config libicu-dev virtualenv pkg-config libicu-dev virtualenv
``` ```
@@ -68,11 +68,11 @@ virtualenv ~/nominatim-dev-venv
~/nominatim-dev-venv/bin/pip install\ ~/nominatim-dev-venv/bin/pip install\
psutil 'psycopg[binary]' PyICU SQLAlchemy \ psutil 'psycopg[binary]' PyICU SQLAlchemy \
python-dotenv jinja2 pyYAML \ python-dotenv jinja2 pyYAML \
mkdocs 'mkdocstrings[python]' mkdocs-gen-files \ mkdocs 'mkdocstrings[python]' mkdocs-gen-files mkdocs-material \
pytest pytest-asyncio pytest-bdd flake8 \ pytest pytest-asyncio pytest-bdd flake8 \
types-jinja2 types-markupsafe types-psutil types-psycopg2 \ types-jinja2 types-markupsafe types-psutil types-psycopg2 \
types-pygments types-pyyaml types-requests types-ujson \ types-pygments types-pyyaml types-requests types-ujson \
types-urllib3 typing-extensions unicorn falcon starlette \ types-urllib3 typing-extensions gunicorn falcon starlette \
uvicorn mypy osmium aiosqlite mwparserfromhell uvicorn mypy osmium aiosqlite mwparserfromhell
``` ```

View File

@@ -13,7 +13,8 @@ for infile in VAGRANT_PATH.glob('Install-on-*.sh'):
outfile = f"admin/{infile.stem}.md" outfile = f"admin/{infile.stem}.md"
title = infile.stem.replace('-', ' ') title = infile.stem.replace('-', ' ')
with mkdocs_gen_files.open(outfile, "w") as outfd, infile.open() as infd: with mkdocs_gen_files.open(outfile, "w", encoding='utf-8') as outfd, \
infile.open(encoding='utf-8') as infd:
print("#", title, file=outfd) print("#", title, file=outfd)
has_empty = False has_empty = False
for line in infd: for line in infd:

View File

@@ -2,7 +2,7 @@
-- --
-- This file is part of Nominatim. (https://nominatim.org) -- This file is part of Nominatim. (https://nominatim.org)
-- --
-- Copyright (C) 2022 by the Nominatim developer community. -- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log. -- For a full list of authors see the git log.
-- Functions for interpreting wkipedia/wikidata tags and computing importance. -- Functions for interpreting wkipedia/wikidata tags and computing importance.
@@ -166,7 +166,7 @@ BEGIN
END LOOP; END LOOP;
-- Nothing? Then try with the wikidata tag. -- Nothing? Then try with the wikidata tag.
IF result.importance is null AND extratags ? 'wikidata' THEN IF extratags ? 'wikidata' THEN
FOR match IN FOR match IN
{% if 'wikimedia_importance' in db.tables %} {% if 'wikimedia_importance' in db.tables %}
SELECT * FROM wikimedia_importance SELECT * FROM wikimedia_importance
@@ -185,18 +185,18 @@ BEGIN
END IF; END IF;
-- Still nothing? Fall back to a default. -- Still nothing? Fall back to a default.
IF result.importance is null THEN result.importance := 0.40001 - (rank_search::float / 75);
result.importance := 0.40001 - (rank_search::float / 75);
END IF;
{% if 'secondary_importance' in db.tables %} {% if 'secondary_importance' in db.tables %}
FOR match IN FOR match IN
SELECT ST_Value(rast, centroid) as importance SELECT ST_Value(rast, centroid) as importance
FROM secondary_importance FROM secondary_importance
WHERE ST_Intersects(ST_ConvexHull(rast), centroid) LIMIT 1 WHERE ST_Intersects(ST_ConvexHull(rast), centroid) LIMIT 1
LOOP LOOP
-- Secondary importance as tie breaker with 0.0001 weight. IF match.importance is not NULL THEN
result.importance := result.importance + match.importance::float / 655350000; -- Secondary importance as tie breaker with 0.0001 weight.
result.importance := result.importance + match.importance::float / 655350000;
END IF;
END LOOP; END LOOP;
{% endif %} {% endif %}

View File

@@ -2,7 +2,7 @@
-- --
-- This file is part of Nominatim. (https://nominatim.org) -- This file is part of Nominatim. (https://nominatim.org)
-- --
-- Copyright (C) 2022 by the Nominatim developer community. -- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log. -- For a full list of authors see the git log.
DROP TYPE IF EXISTS nearfeaturecentr CASCADE; DROP TYPE IF EXISTS nearfeaturecentr CASCADE;
@@ -123,10 +123,12 @@ BEGIN
RETURN TRUE; RETURN TRUE;
END IF; END IF;
IF in_rank_search <= 4 and not in_estimate THEN IF in_rank_search <= 4 THEN
INSERT INTO location_area_country (place_id, country_code, geometry) IF not in_estimate and in_country_code is not NULL THEN
(SELECT in_place_id, in_country_code, geom INSERT INTO location_area_country (place_id, country_code, geometry)
FROM split_geometry(in_geometry) as geom); (SELECT in_place_id, in_country_code, geom
FROM split_geometry(in_geometry) as geom);
END IF;
RETURN TRUE; RETURN TRUE;
END IF; END IF;
@@ -212,7 +214,6 @@ DECLARE
BEGIN BEGIN
{% for partition in db.partitions %} {% for partition in db.partitions %}
IF in_partition = {{ partition }} THEN IF in_partition = {{ partition }} THEN
DELETE FROM search_name_{{ partition }} values WHERE place_id = in_place_id;
IF in_rank_address > 0 THEN IF in_rank_address > 0 THEN
INSERT INTO search_name_{{ partition }} (place_id, address_rank, name_vector, centroid) INSERT INTO search_name_{{ partition }} (place_id, address_rank, name_vector, centroid)
values (in_place_id, in_rank_address, in_name_vector, in_geometry); values (in_place_id, in_rank_address, in_name_vector, in_geometry);
@@ -251,7 +252,6 @@ BEGIN
{% for partition in db.partitions %} {% for partition in db.partitions %}
IF in_partition = {{ partition }} THEN IF in_partition = {{ partition }} THEN
DELETE FROM location_road_{{ partition }} where place_id = in_place_id;
INSERT INTO location_road_{{ partition }} (partition, place_id, country_code, geometry) INSERT INTO location_road_{{ partition }} (partition, place_id, country_code, geometry)
values (in_partition, in_place_id, in_country_code, in_geometry); values (in_partition, in_place_id, in_country_code, in_geometry);
RETURN TRUE; RETURN TRUE;

View File

@@ -2,7 +2,7 @@
-- --
-- This file is part of Nominatim. (https://nominatim.org) -- This file is part of Nominatim. (https://nominatim.org)
-- --
-- Copyright (C) 2025 by the Nominatim developer community. -- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log. -- For a full list of authors see the git log.
CREATE OR REPLACE FUNCTION place_insert() CREATE OR REPLACE FUNCTION place_insert()
@@ -66,7 +66,8 @@ BEGIN
-- They get their parent from the interpolation. -- They get their parent from the interpolation.
UPDATE placex p SET indexed_status = 2 UPDATE placex p SET indexed_status = 2
FROM planet_osm_ways w FROM planet_osm_ways w
WHERE w.id = NEW.osm_id and p.osm_type = 'N' and p.osm_id = any(w.nodes); WHERE w.id = NEW.osm_id and p.osm_type = 'N' and p.osm_id = any(w.nodes)
and indexed_status = 0;
-- If there is already an entry in place, just update that, if necessary. -- If there is already an entry in place, just update that, if necessary.
IF existing.osm_type is not null THEN IF existing.osm_type is not null THEN

View File

@@ -2,7 +2,7 @@
-- --
-- This file is part of Nominatim. (https://nominatim.org) -- This file is part of Nominatim. (https://nominatim.org)
-- --
-- Copyright (C) 2025 by the Nominatim developer community. -- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log. -- For a full list of authors see the git log.
-- Trigger functions for the placex table. -- Trigger functions for the placex table.
@@ -29,6 +29,7 @@ DECLARE
location RECORD; location RECORD;
result prepare_update_info; result prepare_update_info;
extra_names HSTORE; extra_names HSTORE;
default_language VARCHAR(10);
BEGIN BEGIN
IF not p.address ? '_inherited' THEN IF not p.address ? '_inherited' THEN
result.address := p.address; result.address := p.address;
@@ -85,6 +86,13 @@ BEGIN
IF location.name is not NULL THEN IF location.name is not NULL THEN
{% if debug %}RAISE WARNING 'Names original: %, location: %', result.name, location.name;{% endif %} {% if debug %}RAISE WARNING 'Names original: %, location: %', result.name, location.name;{% endif %}
-- Add the linked-place (e.g. city) name as a searchable placename in the default language (if any)
default_language := get_country_language_code(location.country_code);
IF default_language is not NULL AND location.name ? 'name' AND NOT location.name ? ('name:' || default_language) THEN
location.name := location.name || hstore('name:' || default_language, location.name->'name');
END IF;
-- Add all names from the place nodes that deviate from the name -- Add all names from the place nodes that deviate from the name
-- in the relation with the prefix '_place_'. Deviation means that -- in the relation with the prefix '_place_'. Deviation means that
-- either the value is different or a given key is missing completely -- either the value is different or a given key is missing completely
@@ -465,7 +473,7 @@ BEGIN
END IF; END IF;
END LOOP; END LOOP;
name_vector := token_get_name_search_tokens(token_info); name_vector := COALESCE(token_get_name_search_tokens(token_info), '{}'::INTEGER[]);
-- Check if the parent covers all address terms. -- Check if the parent covers all address terms.
-- If not, create a search name entry with the house number as the name. -- If not, create a search name entry with the house number as the name.
@@ -672,7 +680,7 @@ CREATE OR REPLACE FUNCTION placex_insert()
AS $$ AS $$
DECLARE DECLARE
postcode TEXT; postcode TEXT;
result BOOLEAN; result INT;
is_area BOOLEAN; is_area BOOLEAN;
country_code VARCHAR(2); country_code VARCHAR(2);
diameter FLOAT; diameter FLOAT;
@@ -777,11 +785,12 @@ BEGIN
-- add to tables for special search -- add to tables for special search
-- Note: won't work on initial import because the classtype tables
-- do not yet exist. It won't hurt either.
classtable := 'place_classtype_' || NEW.class || '_' || NEW.type; classtable := 'place_classtype_' || NEW.class || '_' || NEW.type;
SELECT count(*)>0 FROM pg_tables WHERE tablename = classtable and schemaname = current_schema() INTO result; SELECT count(*) INTO result
IF result THEN FROM pg_tables
WHERE classtable NOT SIMILAR TO '%\W%'
AND tablename = classtable and schemaname = current_schema();
IF result > 0 THEN
EXECUTE 'INSERT INTO ' || classtable::regclass || ' (place_id, centroid) VALUES ($1,$2)' EXECUTE 'INSERT INTO ' || classtable::regclass || ' (place_id, centroid) VALUES ($1,$2)'
USING NEW.place_id, NEW.centroid; USING NEW.place_id, NEW.centroid;
END IF; END IF;
@@ -840,13 +849,15 @@ BEGIN
NEW.indexed_date = now(); NEW.indexed_date = now();
{% if 'search_name' in db.tables %} IF OLD.indexed_status > 1 THEN
DELETE from search_name WHERE place_id = NEW.place_id; {% if 'search_name' in db.tables %}
{% endif %} DELETE from search_name WHERE place_id = NEW.place_id;
result := deleteSearchName(NEW.partition, NEW.place_id); {% endif %}
DELETE FROM place_addressline WHERE place_id = NEW.place_id; result := deleteSearchName(NEW.partition, NEW.place_id);
result := deleteRoad(NEW.partition, NEW.place_id); DELETE FROM place_addressline WHERE place_id = NEW.place_id;
result := deleteLocationArea(NEW.partition, NEW.place_id, NEW.rank_search); result := deleteRoad(NEW.partition, NEW.place_id);
result := deleteLocationArea(NEW.partition, NEW.place_id, NEW.rank_search);
END IF;
NEW.extratags := NEW.extratags - 'linked_place'::TEXT; NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
IF NEW.extratags = ''::hstore THEN IF NEW.extratags = ''::hstore THEN
@@ -859,10 +870,13 @@ BEGIN
NEW.linked_place_id := OLD.linked_place_id; NEW.linked_place_id := OLD.linked_place_id;
-- Remove linkage, if we have computed a different new linkee. -- Remove linkage, if we have computed a different new linkee.
UPDATE placex SET linked_place_id = null, indexed_status = 2 IF OLD.indexed_status > 1 THEN
WHERE linked_place_id = NEW.place_id UPDATE placex
and (linked_place is null or place_id != linked_place); SET linked_place_id = null,
-- update not necessary for osmline, cause linked_place_id does not exist indexed_status = CASE WHEN indexed_status = 0 THEN 2 ELSE indexed_status END
WHERE linked_place_id = NEW.place_id
and (linked_place is null or place_id != linked_place);
END IF;
-- Compute a preliminary centroid. -- Compute a preliminary centroid.
NEW.centroid := get_center_point(NEW.geometry); NEW.centroid := get_center_point(NEW.geometry);
@@ -1032,7 +1046,9 @@ BEGIN
LOOP LOOP
UPDATE placex SET linked_place_id = NEW.place_id WHERE place_id = linked_node_id; UPDATE placex SET linked_place_id = NEW.place_id WHERE place_id = linked_node_id;
{% if 'search_name' in db.tables %} {% if 'search_name' in db.tables %}
DELETE FROM search_name WHERE place_id = linked_node_id; IF OLD.indexed_status > 1 THEN
DELETE FROM search_name WHERE place_id = linked_node_id;
END IF;
{% endif %} {% endif %}
END LOOP; END LOOP;
END IF; END IF;
@@ -1181,11 +1197,6 @@ BEGIN
-- reset the address rank if necessary. -- reset the address rank if necessary.
UPDATE placex set linked_place_id = NEW.place_id, indexed_status = 2 UPDATE placex set linked_place_id = NEW.place_id, indexed_status = 2
WHERE place_id = location.place_id; WHERE place_id = location.place_id;
-- ensure that those places are not found anymore
{% if 'search_name' in db.tables %}
DELETE FROM search_name WHERE place_id = location.place_id;
{% endif %}
PERFORM deleteLocationArea(NEW.partition, location.place_id, NEW.rank_search);
SELECT wikipedia, importance SELECT wikipedia, importance
FROM compute_importance(location.extratags, NEW.country_code, FROM compute_importance(location.extratags, NEW.country_code,
@@ -1196,7 +1207,7 @@ BEGIN
IF linked_importance is not null AND IF linked_importance is not null AND
(NEW.importance is null or NEW.importance < linked_importance) (NEW.importance is null or NEW.importance < linked_importance)
THEN THEN
NEW.importance = linked_importance; NEW.importance := linked_importance;
END IF; END IF;
ELSE ELSE
-- No linked place? As a last resort check if the boundary is tagged with -- No linked place? As a last resort check if the boundary is tagged with
@@ -1238,7 +1249,7 @@ BEGIN
LIMIT 1 LIMIT 1
LOOP LOOP
IF location.osm_id = NEW.osm_id THEN IF location.osm_id = NEW.osm_id THEN
{% if debug %}RAISE WARNING 'Updating names for country '%' with: %', NEW.country_code, NEW.name;{% endif %} {% if debug %}RAISE WARNING 'Updating names for country ''%'' with: %', NEW.country_code, NEW.name;{% endif %}
UPDATE country_name SET derived_name = NEW.name WHERE country_code = NEW.country_code; UPDATE country_name SET derived_name = NEW.name WHERE country_code = NEW.country_code;
END IF; END IF;
END LOOP; END LOOP;
@@ -1277,10 +1288,10 @@ BEGIN
NEW.postcode := coalesce(token_get_postcode(NEW.token_info), NEW.postcode); NEW.postcode := coalesce(token_get_postcode(NEW.token_info), NEW.postcode);
-- if we have a name add this to the name search table -- if we have a name add this to the name search table
IF NEW.name IS NOT NULL THEN name_vector := token_get_name_search_tokens(NEW.token_info);
IF array_length(name_vector, 1) is not NULL THEN
-- Initialise the name vector using our name -- Initialise the name vector using our name
NEW.name := add_default_place_name(NEW.country_code, NEW.name); NEW.name := add_default_place_name(NEW.country_code, NEW.name);
name_vector := token_get_name_search_tokens(NEW.token_info);
IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN
result := add_location(NEW.place_id, NEW.country_code, NEW.partition, result := add_location(NEW.place_id, NEW.country_code, NEW.partition,
@@ -1335,15 +1346,16 @@ CREATE OR REPLACE FUNCTION placex_delete()
AS $$ AS $$
DECLARE DECLARE
b BOOLEAN; b BOOLEAN;
result INT;
classtable TEXT; classtable TEXT;
BEGIN BEGIN
-- RAISE WARNING 'placex_delete % %',OLD.osm_type,OLD.osm_id; -- RAISE WARNING 'placex_delete % %',OLD.osm_type,OLD.osm_id;
IF OLD.linked_place_id is null THEN IF OLD.linked_place_id is null THEN
update placex set linked_place_id = null, indexed_status = 2 where linked_place_id = OLD.place_id and indexed_status = 0; UPDATE placex
{% if debug %}RAISE WARNING 'placex_delete:01 % %',OLD.osm_type,OLD.osm_id;{% endif %} SET linked_place_id = NULL,
update placex set linked_place_id = null where linked_place_id = OLD.place_id; indexed_status = CASE WHEN indexed_status = 0 THEN 2 ELSE indexed_status END
{% if debug %}RAISE WARNING 'placex_delete:02 % %',OLD.osm_type,OLD.osm_id;{% endif %} WHERE linked_place_id = OLD.place_id;
ELSE ELSE
update placex set indexed_status = 2 where place_id = OLD.linked_place_id and indexed_status = 0; update placex set indexed_status = 2 where place_id = OLD.linked_place_id and indexed_status = 0;
END IF; END IF;
@@ -1367,6 +1379,7 @@ BEGIN
-- reparenting also for OSM Interpolation Lines (and for Tiger?) -- reparenting also for OSM Interpolation Lines (and for Tiger?)
update location_property_osmline set indexed_status = 2 where indexed_status = 0 and parent_place_id = OLD.place_id; update location_property_osmline set indexed_status = 2 where indexed_status = 0 and parent_place_id = OLD.place_id;
UPDATE location_postcodes SET indexed_status = 2 WHERE parent_place_id = OLD.place_id;
END IF; END IF;
{% if debug %}RAISE WARNING 'placex_delete:08 % %',OLD.osm_type,OLD.osm_id;{% endif %} {% if debug %}RAISE WARNING 'placex_delete:08 % %',OLD.osm_type,OLD.osm_id;{% endif %}
@@ -1392,15 +1405,16 @@ BEGIN
-- remove from tables for special search -- remove from tables for special search
classtable := 'place_classtype_' || OLD.class || '_' || OLD.type; classtable := 'place_classtype_' || OLD.class || '_' || OLD.type;
SELECT count(*)>0 FROM pg_tables WHERE tablename = classtable and schemaname = current_schema() INTO b; SELECT count(*) INTO result
IF b THEN FROM pg_tables
WHERE classtable NOT SIMILAR TO '%\W%'
AND tablename = classtable and schemaname = current_schema();
IF result > 0 THEN
EXECUTE 'DELETE FROM ' || classtable::regclass || ' WHERE place_id = $1' USING OLD.place_id; EXECUTE 'DELETE FROM ' || classtable::regclass || ' WHERE place_id = $1' USING OLD.place_id;
END IF; END IF;
{% if debug %}RAISE WARNING 'placex_delete:12 % %',OLD.osm_type,OLD.osm_id;{% endif %} {% if debug %}RAISE WARNING 'placex_delete:12 % %',OLD.osm_type,OLD.osm_id;{% endif %}
UPDATE location_postcodes SET indexed_status = 2 WHERE parent_place_id = OLD.place_id;
RETURN OLD; RETURN OLD;
END; END;

View File

@@ -2,7 +2,7 @@
-- --
-- This file is part of Nominatim. (https://nominatim.org) -- This file is part of Nominatim. (https://nominatim.org)
-- --
-- Copyright (C) 2025 by the Nominatim developer community. -- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log. -- For a full list of authors see the git log.
-- Assorted helper functions for the triggers. -- Assorted helper functions for the triggers.
@@ -46,13 +46,13 @@ DECLARE
r INTEGER[]; r INTEGER[];
BEGIN BEGIN
IF array_upper(a, 1) IS NULL THEN IF array_upper(a, 1) IS NULL THEN
RETURN b; RETURN COALESCE(b, '{}'::INTEGER[]);
END IF; END IF;
IF array_upper(b, 1) IS NULL THEN IF array_upper(b, 1) IS NULL THEN
RETURN a; RETURN COALESCE(a, '{}'::INTEGER[]);
END IF; END IF;
r := a; r := a;
FOR i IN 1..array_upper(b, 1) LOOP FOR i IN 1..array_upper(b, 1) LOOP
IF NOT (ARRAY[b[i]] <@ r) THEN IF NOT (ARRAY[b[i]] <@ r) THEN
r := r || b[i]; r := r || b[i];
END IF; END IF;
@@ -153,8 +153,7 @@ BEGIN
IF ST_GeometryType(geom) in ('ST_Polygon','ST_MultiPolygon') THEN IF ST_GeometryType(geom) in ('ST_Polygon','ST_MultiPolygon') THEN
SELECT min(postcode), count(*) FROM SELECT min(postcode), count(*) FROM
(SELECT postcode FROM location_postcodes (SELECT postcode FROM location_postcodes
WHERE geom && location_postcodes.geometry -- want to use the index WHERE ST_Contains(geom, location_postcodes.centroid)
AND ST_Contains(geom, location_postcodes.centroid)
AND country_code = country AND country_code = country
LIMIT 2) sub LIMIT 2) sub
INTO outcode, cnt; INTO outcode, cnt;
@@ -368,8 +367,6 @@ CREATE OR REPLACE FUNCTION add_location(place_id BIGINT, country_code varchar(2)
DECLARE DECLARE
postcode TEXT; postcode TEXT;
BEGIN BEGIN
PERFORM deleteLocationArea(partition, place_id, rank_search);
-- add postcode only if it contains a single entry, i.e. ignore postcode lists -- add postcode only if it contains a single entry, i.e. ignore postcode lists
postcode := NULL; postcode := NULL;
IF in_postcode is not null AND in_postcode not similar to '%(,|;)%' THEN IF in_postcode is not null AND in_postcode not similar to '%(,|;)%' THEN

47
lib-sql/grants.sql Normal file
View File

@@ -0,0 +1,47 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
--
-- Grant read-only access to the web user for all Nominatim tables.
-- Core tables
GRANT SELECT ON import_status TO "{{config.DATABASE_WEBUSER}}";
GRANT SELECT ON country_name TO "{{config.DATABASE_WEBUSER}}";
GRANT SELECT ON nominatim_properties TO "{{config.DATABASE_WEBUSER}}";
-- Location tables
GRANT SELECT ON location_property_tiger TO "{{config.DATABASE_WEBUSER}}";
GRANT SELECT ON location_property_osmline TO "{{config.DATABASE_WEBUSER}}";
GRANT SELECT ON location_postcodes TO "{{config.DATABASE_WEBUSER}}";
-- Search tables
{% if not db.reverse_only %}
GRANT SELECT ON search_name TO "{{config.DATABASE_WEBUSER}}";
{% endif %}
-- Main place tables
GRANT SELECT ON placex TO "{{config.DATABASE_WEBUSER}}";
GRANT SELECT ON place_addressline TO "{{config.DATABASE_WEBUSER}}";
GRANT SELECT ON placex_entrance TO "{{config.DATABASE_WEBUSER}}";
-- Error/delete tracking tables
GRANT SELECT ON import_polygon_error TO "{{config.DATABASE_WEBUSER}}";
GRANT SELECT ON import_polygon_delete TO "{{config.DATABASE_WEBUSER}}";
-- Country grid
GRANT SELECT ON country_osm_grid TO "{{config.DATABASE_WEBUSER}}";
-- Tokenizer tables (word table)
{% if 'word' in db.tables %}
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
{% endif %}
-- Special phrase tables
{% for table in db.tables %}
{% if table.startswith('place_classtype_') %}
GRANT SELECT ON {{ table }} TO "{{config.DATABASE_WEBUSER}}";
{% endif %}
{% endfor %}

View File

@@ -2,36 +2,48 @@
-- --
-- This file is part of Nominatim. (https://nominatim.org) -- This file is part of Nominatim. (https://nominatim.org)
-- --
-- Copyright (C) 2022 by the Nominatim developer community. -- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log. -- For a full list of authors see the git log.
drop table IF EXISTS search_name_blank CASCADE; drop table IF EXISTS search_name_blank CASCADE;
CREATE TABLE search_name_blank ( CREATE TABLE search_name_blank (
place_id BIGINT, place_id BIGINT NOT NULL,
address_rank smallint, address_rank smallint NOT NULL,
name_vector integer[], name_vector integer[] NOT NULL,
centroid GEOMETRY(Geometry, 4326) centroid GEOMETRY(Geometry, 4326) NOT NULL
); );
{% for partition in db.partitions %} {% for partition in db.partitions %}
CREATE TABLE location_area_large_{{ partition }} () INHERITS (location_area_large) {{db.tablespace.address_data}}; CREATE TABLE location_area_large_{{ partition }} () INHERITS (location_area_large) {{db.tablespace.address_data}};
CREATE INDEX idx_location_area_large_{{ partition }}_place_id ON location_area_large_{{ partition }} USING BTREE (place_id) {{db.tablespace.address_index}}; CREATE INDEX idx_location_area_large_{{ partition }}_place_id
CREATE INDEX idx_location_area_large_{{ partition }}_geometry ON location_area_large_{{ partition }} USING GIST (geometry) {{db.tablespace.address_index}}; ON location_area_large_{{ partition }}
USING BTREE (place_id) {{db.tablespace.address_index}};
CREATE INDEX idx_location_area_large_{{ partition }}_geometry
ON location_area_large_{{ partition }}
USING GIST (geometry) {{db.tablespace.address_index}};
CREATE TABLE search_name_{{ partition }} () INHERITS (search_name_blank) {{db.tablespace.address_data}}; CREATE TABLE search_name_{{ partition }} () INHERITS (search_name_blank) {{db.tablespace.address_data}};
CREATE INDEX idx_search_name_{{ partition }}_place_id ON search_name_{{ partition }} USING BTREE (place_id) {{db.tablespace.address_index}}; CREATE UNIQUE INDEX idx_search_name_{{ partition }}_place_id
CREATE INDEX idx_search_name_{{ partition }}_centroid_street ON search_name_{{ partition }} USING GIST (centroid) {{db.tablespace.address_index}} where address_rank between 26 and 27; ON search_name_{{ partition }}
CREATE INDEX idx_search_name_{{ partition }}_centroid_place ON search_name_{{ partition }} USING GIST (centroid) {{db.tablespace.address_index}} where address_rank between 2 and 25; USING BTREE (place_id) {{db.tablespace.address_index}};
CREATE INDEX idx_search_name_{{ partition }}_centroid_street
ON search_name_{{ partition }} USING GIST (centroid) {{db.tablespace.address_index}}
WHERE address_rank between 26 and 27;
CREATE INDEX idx_search_name_{{ partition }}_centroid_place
ON search_name_{{ partition }} USING GIST (centroid) {{db.tablespace.address_index}}
WHERE address_rank between 2 and 25;
DROP TABLE IF EXISTS location_road_{{ partition }}; DROP TABLE IF EXISTS location_road_{{ partition }};
CREATE TABLE location_road_{{ partition }} ( CREATE TABLE location_road_{{ partition }} (
place_id BIGINT, place_id BIGINT NOT NULL,
partition SMALLINT, partition SMALLINT NOT NULL,
country_code VARCHAR(2), country_code VARCHAR(2),
geometry GEOMETRY(Geometry, 4326) geometry GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.address_data}}; ) {{db.tablespace.address_data}};
CREATE INDEX idx_location_road_{{ partition }}_geometry ON location_road_{{ partition }} USING GIST (geometry) {{db.tablespace.address_index}}; CREATE INDEX idx_location_road_{{ partition }}_geometry
CREATE INDEX idx_location_road_{{ partition }}_place_id ON location_road_{{ partition }} USING BTREE (place_id) {{db.tablespace.address_index}}; ON location_road_{{ partition }}
USING GIST (geometry) {{db.tablespace.address_index}};
CREATE UNIQUE INDEX idx_location_road_{{ partition }}_place_id
ON location_road_{{ partition }}
USING BTREE (place_id) {{db.tablespace.address_index}};
{% endfor %} {% endfor %}

View File

@@ -2,312 +2,24 @@
-- --
-- This file is part of Nominatim. (https://nominatim.org) -- This file is part of Nominatim. (https://nominatim.org)
-- --
-- Copyright (C) 2025 by the Nominatim developer community. -- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log. -- For a full list of authors see the git log.
drop table if exists import_status;
CREATE TABLE import_status (
lastimportdate timestamp with time zone NOT NULL,
sequence_id integer,
indexed boolean
);
GRANT SELECT ON import_status TO "{{config.DATABASE_WEBUSER}}" ;
drop table if exists import_osmosis_log;
CREATE TABLE import_osmosis_log (
batchend timestamp,
batchseq integer,
batchsize bigint,
starttime timestamp,
endtime timestamp,
event text
);
CREATE TABLE new_query_log (
type text,
starttime timestamp,
ipaddress text,
useragent text,
language text,
query text,
searchterm text,
endtime timestamp,
results integer,
format text,
secret text
);
CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime);
GRANT INSERT ON new_query_log TO "{{config.DATABASE_WEBUSER}}" ;
GRANT UPDATE ON new_query_log TO "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT ON new_query_log TO "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT ON TABLE country_name TO "{{config.DATABASE_WEBUSER}}";
DROP TABLE IF EXISTS nominatim_properties;
CREATE TABLE nominatim_properties (
property TEXT NOT NULL,
value TEXT
);
GRANT SELECT ON TABLE nominatim_properties TO "{{config.DATABASE_WEBUSER}}";
drop table IF EXISTS location_area CASCADE;
CREATE TABLE location_area (
place_id BIGINT,
keywords INTEGER[],
partition SMALLINT,
rank_search SMALLINT NOT NULL,
rank_address SMALLINT NOT NULL,
country_code VARCHAR(2),
isguess BOOL,
postcode TEXT,
centroid GEOMETRY(Point, 4326),
geometry GEOMETRY(Geometry, 4326)
);
CREATE TABLE location_area_large () INHERITS (location_area);
DROP TABLE IF EXISTS location_area_country;
CREATE TABLE location_area_country (
place_id BIGINT,
country_code varchar(2),
geometry GEOMETRY(Geometry, 4326)
) {{db.tablespace.address_data}};
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {{db.tablespace.address_index}};
CREATE TABLE location_property_tiger (
place_id BIGINT,
parent_place_id BIGINT,
startnumber INTEGER,
endnumber INTEGER,
step SMALLINT,
partition SMALLINT,
linegeo GEOMETRY,
postcode TEXT);
GRANT SELECT ON location_property_tiger TO "{{config.DATABASE_WEBUSER}}";
drop table if exists location_property_osmline;
CREATE TABLE location_property_osmline (
place_id BIGINT NOT NULL,
osm_id BIGINT,
parent_place_id BIGINT,
geometry_sector INTEGER,
indexed_date TIMESTAMP,
startnumber INTEGER,
endnumber INTEGER,
step SMALLINT,
partition SMALLINT,
indexed_status SMALLINT,
linegeo GEOMETRY,
address HSTORE,
token_info JSONB, -- custom column for tokenizer use only
postcode TEXT,
country_code VARCHAR(2)
){{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline USING BTREE (place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline USING BTREE (geometry_sector) {{db.tablespace.address_index}};
CREATE INDEX idx_osmline_linegeo ON location_property_osmline USING GIST (linegeo) {{db.tablespace.search_index}}
WHERE startnumber is not null;
GRANT SELECT ON location_property_osmline TO "{{config.DATABASE_WEBUSER}}";
drop table IF EXISTS search_name;
{% if not db.reverse_only %}
CREATE TABLE search_name (
place_id BIGINT,
importance FLOAT,
search_rank SMALLINT,
address_rank SMALLINT,
name_vector integer[],
nameaddress_vector integer[],
country_code varchar(2),
centroid GEOMETRY(Geometry, 4326)
) {{db.tablespace.search_data}};
CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) {{db.tablespace.search_index}};
GRANT SELECT ON search_name to "{{config.DATABASE_WEBUSER}}" ;
{% endif %}
drop table IF EXISTS place_addressline;
CREATE TABLE place_addressline (
place_id BIGINT,
address_place_id BIGINT,
distance FLOAT,
cached_rank_address SMALLINT,
fromarea boolean,
isaddress boolean
) {{db.tablespace.search_data}};
CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {{db.tablespace.search_index}};
--------- PLACEX - storage for all indexed places -----------------
DROP TABLE IF EXISTS placex;
CREATE TABLE placex (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
indexed_date TIMESTAMP,
geometry_sector INTEGER,
rank_address SMALLINT,
rank_search SMALLINT,
partition SMALLINT,
indexed_status SMALLINT,
LIKE place INCLUDING CONSTRAINTS,
wikipedia TEXT, -- calculated wikipedia article name (language:title)
token_info JSONB, -- custom column for tokenizer use only
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326)
) {{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}};
{% for osm_type in ('N', 'W', 'R') %}
CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex
USING BTREE (osm_id) {{db.tablespace.search_index}}
WHERE osm_type = '{{osm_type}}';
{% endfor %}
-- Usage: - removing linkage status on update
-- - lookup linked places for /details
CREATE INDEX idx_placex_linked_place_id ON placex
USING BTREE (linked_place_id) {{db.tablespace.address_index}}
WHERE linked_place_id IS NOT NULL;
-- Usage: - check that admin boundaries do not overtake each other rank-wise
-- - check that place node in a admin boundary with the same address level
-- - boundary is not completely contained in a place area
-- - parenting of large-area or unparentable features
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
USING gist (geometry) {{db.tablespace.address_index}}
WHERE rank_address between 1 and 25
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
-- Usage: - POI is within building with housenumber
CREATE INDEX idx_placex_geometry_buildings ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE address is not null and rank_search = 30
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
-- Usage: - linking of similar named places to boundaries
-- - linking of place nodes with same type to boundaries
CREATE INDEX idx_placex_geometry_placenode ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'N' and rank_search < 26 and class = 'place';
-- Usage: - is node part of a way?
-- - find parent of interpolation spatially
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'W' and rank_search >= 26;
-- Usage: - linking place nodes by wikidata tag to boundaries
CREATE INDEX idx_placex_wikidata on placex
USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}}
WHERE extratags ? 'wikidata' and class = 'place'
and osm_type = 'N' and rank_search < 26;
-- The following two indexes function as a todo list for indexing.
CREATE INDEX idx_placex_rank_address_sector ON placex
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
WHERE indexed_status > 0;
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
WHERE class = 'boundary' and type = 'administrative'
and indexed_status > 0;
DROP SEQUENCE IF EXISTS seq_place; DROP SEQUENCE IF EXISTS seq_place;
CREATE SEQUENCE seq_place start 1; CREATE SEQUENCE seq_place start 1;
GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT on place_addressline to "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT ON planet_osm_ways to "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ;
GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ;
-- Table for synthetic postcodes. {% include('tables/status.sql') %}
DROP TABLE IF EXISTS location_postcodes; {% include('tables/nominatim_properties.sql') %}
CREATE TABLE location_postcodes ( {% include('tables/location_area.sql') %}
place_id BIGINT NOT NULL, {% include('tables/tiger.sql') %}
parent_place_id BIGINT, {% include('tables/interpolation.sql') %}
osm_id BIGINT, {% include('tables/search_name.sql') %}
rank_search SMALLINT, {% include('tables/addressline.sql') %}
indexed_status SMALLINT, {% include('tables/placex.sql') %}
indexed_date TIMESTAMP, {% include('tables/postcodes.sql') %}
country_code varchar(2), {% include('tables/entrance.sql') %}
postcode TEXT NOT NULL, {% include('tables/import_reports.sql') %}
centroid GEOMETRY(Geometry, 4326) NOT NULL, {% include('tables/importance_tables.sql') %}
geometry GEOMETRY(Geometry, 4326) NOT NULL
);
CREATE UNIQUE INDEX idx_location_postcodes_id ON location_postcodes
USING BTREE (place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_location_postcodes_geometry ON location_postcodes
USING GIST (geometry) {{db.tablespace.search_index}};
CREATE INDEX IF NOT EXISTS idx_location_postcodes_postcode
ON location_postcodes USING BTREE (postcode, country_code)
{{db.tablespace.search_index}};
CREATE INDEX IF NOT EXISTS idx_location_postcodes_osmid
ON location_postcodes USING BTREE (osm_id) {{db.tablespace.search_index}};
GRANT SELECT ON location_postcodes TO "{{config.DATABASE_WEBUSER}}" ;
-- Table to store location of entrance nodes
DROP TABLE IF EXISTS placex_entrance;
CREATE TABLE placex_entrance (
place_id BIGINT NOT NULL,
osm_id BIGINT NOT NULL,
type TEXT NOT NULL,
location GEOMETRY(Point, 4326) NOT NULL,
extratags HSTORE
);
CREATE UNIQUE INDEX idx_placex_entrance_place_id_osm_id ON placex_entrance
USING BTREE (place_id, osm_id) {{db.tablespace.search_index}};
GRANT SELECT ON placex_entrance TO "{{config.DATABASE_WEBUSER}}" ;
-- Create an index on the place table for lookups to populate the entrance
-- table
CREATE INDEX IF NOT EXISTS idx_placex_entrance_lookup ON place
USING BTREE (osm_id)
WHERE class IN ('routing:entrance', 'entrance');
DROP TABLE IF EXISTS import_polygon_error;
CREATE TABLE import_polygon_error (
osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL,
name HSTORE,
country_code varchar(2),
updated timestamp,
errormessage text,
prevgeometry GEOMETRY(Geometry, 4326),
newgeometry GEOMETRY(Geometry, 4326)
);
CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
GRANT SELECT ON import_polygon_error TO "{{config.DATABASE_WEBUSER}}";
DROP TABLE IF EXISTS import_polygon_delete;
CREATE TABLE import_polygon_delete (
osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL
);
CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);
GRANT SELECT ON import_polygon_delete TO "{{config.DATABASE_WEBUSER}}";
DROP SEQUENCE IF EXISTS file;
CREATE SEQUENCE file start 1;
{% if 'wikimedia_importance' not in db.tables and 'wikipedia_article' not in db.tables %}
-- create dummy tables here, if nothing was imported
CREATE TABLE wikimedia_importance (
language TEXT NOT NULL,
title TEXT NOT NULL,
importance double precision NOT NULL,
wikidata TEXT
) {{db.tablespace.address_data}};
{% endif %}
-- osm2pgsql does not create indexes on the middle tables for Nominatim -- osm2pgsql does not create indexes on the middle tables for Nominatim
-- Add one for lookup of associated street relations. -- Add one for lookup of associated street relations.
@@ -325,5 +37,3 @@ CREATE INDEX planet_osm_rels_relation_members_idx ON planet_osm_rels USING gin(p
CREATE INDEX IF NOT EXISTS idx_place_interpolations CREATE INDEX IF NOT EXISTS idx_place_interpolations
ON place USING gist(geometry) {{db.tablespace.address_index}} ON place USING gist(geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'W' and address ? 'interpolation'; WHERE osm_type = 'W' and address ? 'interpolation';
GRANT SELECT ON table country_osm_grid to "{{config.DATABASE_WEBUSER}}";

View File

@@ -0,0 +1,20 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS place_addressline;
CREATE TABLE place_addressline (
place_id BIGINT NOT NULL,
address_place_id BIGINT NOT NULL,
distance FLOAT NOT NULL,
cached_rank_address SMALLINT NOT NULL,
fromarea boolean NOT NULL,
isaddress boolean NOT NULL
) {{db.tablespace.search_data}};
CREATE INDEX idx_place_addressline_place_id ON place_addressline
USING BTREE (place_id) {{db.tablespace.search_index}};

View File

@@ -0,0 +1,20 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- Table to store location of entrance nodes
DROP TABLE IF EXISTS placex_entrance;
CREATE TABLE placex_entrance (
place_id BIGINT NOT NULL,
osm_id BIGINT NOT NULL,
type TEXT NOT NULL,
location GEOMETRY(Point, 4326) NOT NULL,
extratags HSTORE
);
CREATE UNIQUE INDEX idx_placex_entrance_place_id_osm_id ON placex_entrance
USING BTREE (place_id, osm_id) {{db.tablespace.search_index}};

View File

@@ -0,0 +1,35 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS import_polygon_error;
CREATE TABLE import_polygon_error (
osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL,
name HSTORE,
country_code varchar(2),
updated timestamp,
errormessage text,
prevgeometry GEOMETRY(Geometry, 4326),
newgeometry GEOMETRY(Geometry, 4326)
);
CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error
USING BTREE (osm_type, osm_id);
DROP TABLE IF EXISTS import_polygon_delete;
CREATE TABLE import_polygon_delete (
osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL
);
CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete
USING BTREE (osm_type, osm_id);

View File

@@ -0,0 +1,16 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
{% if 'wikimedia_importance' not in db.tables and 'wikipedia_article' not in db.tables %}
-- create dummy tables here if nothing was imported
CREATE TABLE wikimedia_importance (
language TEXT NOT NULL,
title TEXT NOT NULL,
importance double precision NOT NULL,
wikidata TEXT
) {{db.tablespace.address_data}};
{% endif %}

View File

@@ -0,0 +1,34 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS location_property_osmline;
CREATE TABLE location_property_osmline (
place_id BIGINT NOT NULL,
osm_id BIGINT NOT NULL,
parent_place_id BIGINT,
geometry_sector INTEGER NOT NULL,
indexed_date TIMESTAMP,
startnumber INTEGER,
endnumber INTEGER,
step SMALLINT,
partition SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
linegeo GEOMETRY NOT NULL,
address HSTORE,
token_info JSONB, -- custom column for tokenizer use only
postcode TEXT,
country_code VARCHAR(2)
){{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline
USING BTREE (place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline
USING BTREE (geometry_sector) {{db.tablespace.address_index}};
CREATE INDEX idx_osmline_linegeo ON location_property_osmline
USING GIST (linegeo) {{db.tablespace.search_index}}
WHERE startnumber is not null;

View File

@@ -0,0 +1,32 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS location_area CASCADE;
CREATE TABLE location_area (
place_id BIGINT NOT NULL,
keywords INTEGER[] NOT NULL,
partition SMALLINT NOT NULL,
rank_search SMALLINT NOT NULL,
rank_address SMALLINT NOT NULL,
country_code VARCHAR(2),
isguess BOOL NOT NULL,
postcode TEXT,
centroid GEOMETRY(Point, 4326) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
);
CREATE TABLE location_area_large () INHERITS (location_area);
DROP TABLE IF EXISTS location_area_country;
CREATE TABLE location_area_country (
place_id BIGINT NOT NULL,
country_code varchar(2) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.address_data}};
CREATE INDEX idx_location_area_country_geometry ON location_area_country
USING GIST (geometry) {{db.tablespace.address_index}};

View File

@@ -0,0 +1,12 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS nominatim_properties;
CREATE TABLE nominatim_properties (
property TEXT NOT NULL,
value TEXT
);

87
lib-sql/tables/placex.sql Normal file
View File

@@ -0,0 +1,87 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- placex - main table for searchable places
DROP TABLE IF EXISTS placex;
CREATE TABLE placex (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
indexed_date TIMESTAMP,
geometry_sector INTEGER NOT NULL,
rank_address SMALLINT NOT NULL,
rank_search SMALLINT NOT NULL,
partition SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
LIKE place INCLUDING CONSTRAINTS,
wikipedia TEXT, -- calculated wikipedia article name (language:title)
token_info JSONB, -- custom column for tokenizer use only
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}};
{% for osm_type in ('N', 'W', 'R') %}
CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex
USING BTREE (osm_id) {{db.tablespace.search_index}}
WHERE osm_type = '{{osm_type}}';
{% endfor %}
-- Usage: - removing linkage status on update
-- - lookup linked places for /details
CREATE INDEX idx_placex_linked_place_id ON placex
USING BTREE (linked_place_id) {{db.tablespace.address_index}}
WHERE linked_place_id IS NOT NULL;
-- Usage: - check that admin boundaries do not overtake each other rank-wise
-- - check that place node in a admin boundary with the same address level
-- - boundary is not completely contained in a place area
-- - parenting of large-area or unparentable features
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
USING gist (geometry) {{db.tablespace.address_index}}
WHERE rank_address between 1 and 25
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
-- Usage: - POI is within building with housenumber
CREATE INDEX idx_placex_geometry_buildings ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE address is not null and rank_search = 30
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
-- Usage: - linking of similar named places to boundaries
-- - linking of place nodes with same type to boundaries
CREATE INDEX idx_placex_geometry_placenode ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'N' and rank_search < 26 and class = 'place';
-- Usage: - is node part of a way?
-- - find parent of interpolation spatially
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'W' and rank_search >= 26;
-- Usage: - linking place nodes by wikidata tag to boundaries
CREATE INDEX idx_placex_wikidata on placex
USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}}
WHERE extratags ? 'wikidata' and class = 'place'
and osm_type = 'N' and rank_search < 26;
-- The following two indexes function as a todo list for indexing.
CREATE INDEX idx_placex_rank_address_sector ON placex
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
WHERE indexed_status > 0;
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
WHERE class = 'boundary' and type = 'administrative'
and indexed_status > 0;

View File

@@ -0,0 +1,32 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS location_postcodes;
CREATE TABLE location_postcodes (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
osm_id BIGINT,
rank_search SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
indexed_date TIMESTAMP,
country_code varchar(2) NOT NULL,
postcode TEXT NOT NULL,
centroid GEOMETRY(Geometry, 4326) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
);
CREATE UNIQUE INDEX idx_location_postcodes_id ON location_postcodes
USING BTREE (place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_location_postcodes_geometry ON location_postcodes
USING GIST (geometry) {{db.tablespace.search_index}};
CREATE INDEX idx_location_postcodes_centroid ON location_postcodes
USING GIST (centroid) {{db.tablespace.search_index}};
CREATE INDEX IF NOT EXISTS idx_location_postcodes_postcode ON location_postcodes
USING BTREE (postcode, country_code) {{db.tablespace.search_index}};
CREATE INDEX IF NOT EXISTS idx_location_postcodes_osmid ON location_postcodes
USING BTREE (osm_id) {{db.tablespace.search_index}};

View File

@@ -0,0 +1,26 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS search_name;
{% if not create_reverse_only %}
CREATE TABLE search_name (
place_id BIGINT NOT NULL,
importance FLOAT NOT NULL,
search_rank SMALLINT NOT NULL,
address_rank SMALLINT NOT NULL,
name_vector integer[] NOT NULL,
nameaddress_vector integer[] NOT NULL,
country_code varchar(2),
centroid GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_search_name_place_id
ON search_name USING BTREE (place_id) {{db.tablespace.search_index}};
{% endif %}

23
lib-sql/tables/status.sql Normal file
View File

@@ -0,0 +1,23 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS import_status;
CREATE TABLE import_status (
lastimportdate TIMESTAMP WITH TIME ZONE NOT NULL,
sequence_id INTEGER,
indexed BOOLEAN
);
DROP TABLE IF EXISTS import_osmosis_log;
CREATE TABLE import_osmosis_log (
batchend TIMESTAMP,
batchseq INTEGER,
batchsize BIGINT,
starttime TIMESTAMP,
endtime TIMESTAMP,
event TEXT
);

17
lib-sql/tables/tiger.sql Normal file
View File

@@ -0,0 +1,17 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS location_property_tiger;
CREATE TABLE location_property_tiger (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
startnumber INTEGER NOT NULL,
endnumber INTEGER NOT NULL,
step SMALLINT NOT NULL,
partition SMALLINT NOT NULL,
linegeo GEOMETRY NOT NULL,
postcode TEXT);

View File

@@ -15,6 +15,99 @@ CREATE TABLE location_property_tiger_import (
step SMALLINT, step SMALLINT,
postcode TEXT); postcode TEXT);
-- Lookup functions for tiger import when update
-- tables are dropped (see gh-issue #2463)
CREATE OR REPLACE FUNCTION getNearestNamedRoadPlaceIdSlow(in_centroid GEOMETRY,
in_token_info JSONB)
RETURNS BIGINT
AS $$
DECLARE
out_place_id BIGINT;
BEGIN
SELECT place_id INTO out_place_id
FROM search_name
WHERE
-- finds rows where name_vector shares elements with search tokens.
token_matches_street(in_token_info, name_vector)
-- limits search area
AND centroid && ST_Expand(in_centroid, 0.015)
AND address_rank BETWEEN 26 AND 27
ORDER BY ST_Distance(centroid, in_centroid) ASC
LIMIT 1;
RETURN out_place_id;
END
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getNearestParallelRoadFeatureSlow(line GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
r RECORD;
search_diameter FLOAT;
p1 GEOMETRY;
p2 GEOMETRY;
p3 GEOMETRY;
BEGIN
IF ST_GeometryType(line) not in ('ST_LineString') THEN
RETURN NULL;
END IF;
p1 := ST_LineInterpolatePoint(line,0);
p2 := ST_LineInterpolatePoint(line,0.5);
p3 := ST_LineInterpolatePoint(line,1);
search_diameter := 0.0005;
WHILE search_diameter < 0.01 LOOP
FOR r IN
SELECT place_id FROM placex
WHERE ST_DWithin(line, geometry, search_diameter)
AND rank_address BETWEEN 26 AND 27
ORDER BY (ST_distance(geometry, p1)+
ST_distance(geometry, p2)+
ST_distance(geometry, p3)) ASC limit 1
LOOP
RETURN r.place_id;
END LOOP;
search_diameter := search_diameter * 2;
END LOOP;
RETURN NULL;
END
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getNearestRoadPlaceIdSlow(point GEOMETRY)
RETURNS BIGINT
AS $$
DECLARE
r RECORD;
search_diameter FLOAT;
BEGIN
search_diameter := 0.00005;
WHILE search_diameter < 0.1 LOOP
FOR r IN
SELECT place_id FROM placex
WHERE ST_DWithin(geometry, point, search_diameter)
AND rank_address BETWEEN 26 AND 27
ORDER BY ST_Distance(geometry, point) ASC limit 1
LOOP
RETURN r.place_id;
END LOOP;
search_diameter := search_diameter * 2;
END LOOP;
RETURN NULL;
END
$$
LANGUAGE plpgsql;
-- Tiger import function
CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, in_startnumber INTEGER, CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, in_startnumber INTEGER,
in_endnumber INTEGER, interpolationtype TEXT, in_endnumber INTEGER, interpolationtype TEXT,
token_info JSONB, in_postcode TEXT) RETURNS INTEGER token_info JSONB, in_postcode TEXT) RETURNS INTEGER
@@ -71,28 +164,51 @@ BEGIN
place_centroid := ST_Centroid(linegeo); place_centroid := ST_Centroid(linegeo);
out_partition := get_partition('us'); out_partition := get_partition('us');
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid, -- HYBRID LOOKUP LOGIC (see gh-issue #2463)
-- if partition tables exist, use them for fast spatial lookups
{% if 'location_road_0' in db.tables %}
out_parent_place_id := getNearestNamedRoadPlaceId(out_partition, place_centroid,
token_info); token_info);
IF out_parent_place_id IS NULL THEN IF out_parent_place_id IS NULL THEN
SELECT getNearestParallelRoadFeature(out_partition, linegeo) SELECT getNearestParallelRoadFeature(out_partition, linegeo)
INTO out_parent_place_id; INTO out_parent_place_id;
END IF;
IF out_parent_place_id IS NULL THEN
SELECT getNearestRoadPlaceId(out_partition, place_centroid)
INTO out_parent_place_id;
END IF;
-- When updatable information has been dropped:
-- Partition tables no longer exist, but search_name still persists.
{% elif 'search_name' in db.tables %}
-- Fallback: Look up in 'search_name' table
-- though spatial lookups here can be slower.
out_parent_place_id := getNearestNamedRoadPlaceIdSlow(place_centroid, token_info);
IF out_parent_place_id IS NULL THEN
out_parent_place_id := getNearestParallelRoadFeatureSlow(linegeo);
END IF;
IF out_parent_place_id IS NULL THEN
out_parent_place_id := getNearestRoadPlaceIdSlow(place_centroid);
END IF;
{% endif %}
-- If parent was found, insert street(line) into import table
IF out_parent_place_id IS NOT NULL THEN
INSERT INTO location_property_tiger_import (linegeo, place_id, partition,
parent_place_id, startnumber, endnumber,
step, postcode)
VALUES (linegeo, nextval('seq_place'), out_partition,
out_parent_place_id, startnumber, endnumber,
stepsize, in_postcode);
RETURN 1;
END IF; END IF;
RETURN 0;
IF out_parent_place_id IS NULL THEN
SELECT getNearestRoadPlaceId(out_partition, place_centroid)
INTO out_parent_place_id;
END IF;
--insert street(line) into import table
insert into location_property_tiger_import (linegeo, place_id, partition,
parent_place_id, startnumber, endnumber,
step, postcode)
values (linegeo, nextval('seq_place'), out_partition,
out_parent_place_id, startnumber, endnumber,
stepsize, in_postcode);
RETURN 1;
END; END;
$$ $$
LANGUAGE plpgsql; LANGUAGE plpgsql;

View File

@@ -23,7 +23,7 @@ an ASGI-capable server like uvicorn. To install them from pypi run:
You need to have a Nominatim database imported with the 'nominatim-db' You need to have a Nominatim database imported with the 'nominatim-db'
package. Go to the project directory, then run uvicorn as: package. Go to the project directory, then run uvicorn as:
uvicorn --factory nominatim.server.falcon.server:run_wsgi uvicorn --factory nominatim_api.server.falcon.server:run_wsgi
## Documentation ## Documentation

View File

@@ -43,7 +43,7 @@ class FormatDispatcher:
return decorator return decorator
def error_format_func(self, func: ErrorFormatFunc) -> ErrorFormatFunc: def error_format_func(self, func: ErrorFormatFunc) -> ErrorFormatFunc:
""" Decorator for a function that formats error messges. """ Decorator for a function that formats error messages.
There is only one error formatter per dispatcher. Using There is only one error formatter per dispatcher. Using
the decorator repeatedly will overwrite previous functions. the decorator repeatedly will overwrite previous functions.
""" """
@@ -79,7 +79,7 @@ class FormatDispatcher:
def set_content_type(self, fmt: str, content_type: str) -> None: def set_content_type(self, fmt: str, content_type: str) -> None:
""" Set the content type for the given format. This is the string """ Set the content type for the given format. This is the string
that will be returned in the Content-Type header of the HTML that will be returned in the Content-Type header of the HTML
response, when the given format is choosen. response, when the given format is chosen.
""" """
self.content_types[fmt] = content_type self.content_types[fmt] = content_type

View File

@@ -22,7 +22,7 @@ class CountedTokenIDs:
""" A list of token IDs with their respective counts, sorted """ A list of token IDs with their respective counts, sorted
from least frequent to most frequent. from least frequent to most frequent.
If a token count is one, then statistics are likely to be unavaible If a token count is one, then statistics are likely to be unavailable
and a relatively high count is assumed instead. and a relatively high count is assumed instead.
""" """

View File

@@ -17,7 +17,7 @@ import dataclasses
# The x value for the regression computation will be the position of the # The x value for the regression computation will be the position of the
# token in the query. Thus we know the x values will be [0, query length). # token in the query. Thus we know the x values will be [0, query length).
# As the denominator only depends on the x values, we can pre-compute here # As the denominator only depends on the x values, we can pre-compute here
# the denominatior to use for a given query length. # the denominator to use for a given query length.
# Note that query length of two or less is special cased and will not use # Note that query length of two or less is special cased and will not use
# the values from this array. Thus it is not a problem that they are 0. # the values from this array. Thus it is not a problem that they are 0.
LINFAC = [i * (sum(si * si for si in range(i)) - (i - 1) * i * (i - 1) / 4) LINFAC = [i * (sum(si * si for si in range(i)) - (i - 1) * i * (i - 1) / 4)
@@ -129,7 +129,7 @@ class Token(ABC):
@abstractmethod @abstractmethod
def get_country(self) -> str: def get_country(self) -> str:
""" Return the country code this tojen is associated with """ Return the country code this token is associated with
(currently for country tokens only). (currently for country tokens only).
""" """
@@ -231,7 +231,7 @@ class QueryNode:
return max(0, -self.penalty) return max(0, -self.penalty)
def name_address_ratio(self) -> float: def name_address_ratio(self) -> float:
""" Return the propability that the partial token belonging to """ Return the probability that the partial token belonging to
this node forms part of a name (as opposed of part of the address). this node forms part of a name (as opposed of part of the address).
""" """
if self.partial is None: if self.partial is None:
@@ -275,7 +275,7 @@ class QueryStruct:
directed acyclic graph. directed acyclic graph.
A query also has a direction penalty 'dir_penalty'. This describes A query also has a direction penalty 'dir_penalty'. This describes
the likelyhood if the query should be read from left-to-right or the likelihood if the query should be read from left-to-right or
vice versa. A negative 'dir_penalty' should be read as a penalty on vice versa. A negative 'dir_penalty' should be read as a penalty on
right-to-left reading, while a positive value represents a penalty right-to-left reading, while a positive value represents a penalty
for left-to-right reading. The default value is 0, which is equivalent for left-to-right reading. The default value is 0, which is equivalent

View File

@@ -184,6 +184,10 @@ class APIMiddleware:
formatter = load_format_dispatcher('v1', self.api.config.project_dir) formatter = load_format_dispatcher('v1', self.api.config.project_dir)
for name, func in await api_impl.get_routes(self.api): for name, func in await api_impl.get_routes(self.api):
endpoint = EndpointWrapper(name, func, self.api, formatter) endpoint = EndpointWrapper(name, func, self.api, formatter)
# If func is a LazySearchEndpoint, give it a reference to wrapper
# so it can replace wrapper.func dynamically
if hasattr(func, 'set_wrapper'):
func.set_wrapper(endpoint)
self.app.add_route(f"/{name}", endpoint) self.app.add_route(f"/{name}", endpoint)
if legacy_urls: if legacy_urls:
self.app.add_route(f"/{name}.php", endpoint) self.app.add_route(f"/{name}.php", endpoint)

View File

@@ -50,7 +50,7 @@ class ParamWrapper(ASGIAdaptor):
headers={'content-type': self.content_type}) headers={'content-type': self.content_type})
def create_response(self, status: int, output: str, num_results: int) -> Response: def create_response(self, status: int, output: str, num_results: int) -> Response:
self.request.state.num_results = num_results setattr(self.request.state, 'num_results', num_results)
return Response(output, status_code=status, media_type=self.content_type) return Response(output, status_code=status, media_type=self.content_type)
def base_uri(self) -> str: def base_uri(self) -> str:
@@ -95,7 +95,7 @@ class FileLoggingMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, async def dispatch(self, request: Request,
call_next: RequestResponseEndpoint) -> Response: call_next: RequestResponseEndpoint) -> Response:
qs = QueryStatistics() qs = QueryStatistics()
request.state.query_stats = qs setattr(request.state, 'query_stats', qs)
response = await call_next(request) response = await call_next(request)
if response.status_code != 200 or 'start' not in qs: if response.status_code != 200 or 'start' not in qs:

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Generic part of the server implementation of the v1 API. Generic part of the server implementation of the v1 API.
@@ -12,6 +12,7 @@ from typing import Optional, Any, Type, Dict, cast, Sequence, Tuple
from functools import reduce from functools import reduce
import dataclasses import dataclasses
from urllib.parse import urlencode from urllib.parse import urlencode
import asyncio
import sqlalchemy as sa import sqlalchemy as sa
@@ -124,6 +125,12 @@ def parse_geometry_details(adaptor: ASGIAdaptor, fmt: str) -> Dict[str, Any]:
} }
def has_search_name(conn: sa.engine.Connection) -> bool:
""" Check if the search_name table exists in the database.
"""
return sa.inspect(conn).has_table('search_name')
async def status_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: async def status_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
""" Server glue for /status endpoint. See API docs for details. """ Server glue for /status endpoint. See API docs for details.
""" """
@@ -200,6 +207,7 @@ async def reverse_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18)) details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18))
details['layers'] = get_layers(params) details['layers'] = get_layers(params)
details['query_stats'] = params.query_stats() details['query_stats'] = params.query_stats()
details['entrances'] = params.get_bool('entrances', False)
result = await api.reverse(coord, **details) result = await api.reverse(coord, **details)
@@ -238,6 +246,7 @@ async def lookup_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
debug = setup_debugging(params) debug = setup_debugging(params)
details = parse_geometry_details(params, fmt) details = parse_geometry_details(params, fmt)
details['query_stats'] = params.query_stats() details['query_stats'] = params.query_stats()
details['entrances'] = params.get_bool('entrances', False)
places = [] places = []
for oid in (params.get('osm_ids') or '').split(','): for oid in (params.get('osm_ids') or '').split(','):
@@ -439,6 +448,61 @@ async def polygons_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
return build_response(params, params.formatting().format_result(results, fmt, {})) return build_response(params, params.formatting().format_result(results, fmt, {}))
async def search_unavailable_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
""" Server glue for /search endpoint in reverse-only mode.
Returns 404 when search functionality is not available.
"""
params.raise_error('Search not available (reverse-only mode)', 404)
class LazySearchEndpoint:
"""
Lazy-loading search endpoint that replaces itself after first successful check.
- Falcon: EndpointWrapper stores this instance in wrapper.func
On first request, replace wrapper.func directly with real endpoint
- Starlette: _wrap_endpoint wraps this instance in a callback
store a delegate function and call it on subsequent requests
"""
def __init__(self, api: NominatimAPIAsync, real_endpoint: EndpointFunc):
self.api = api
self.real_endpoint = real_endpoint
self._lock = asyncio.Lock()
self._wrapper: Any = None # Store reference to Falcon's EndpointWrapper
self._delegate: Optional[EndpointFunc] = None
def set_wrapper(self, wrapper: Any) -> None:
self._wrapper = wrapper
async def __call__(self, api: NominatimAPIAsync, params: ASGIAdaptor) -> Any:
if self._delegate is None:
async with self._lock:
# Double-check after acquiring lock (thread safety)
if self._delegate is None:
try:
async with api.begin() as conn:
has_table = await conn.connection.run_sync(
has_search_name)
if has_table:
# For Starlette
self._delegate = self.real_endpoint
# For Falcon
if self._wrapper is not None:
self._wrapper.func = self.real_endpoint
else:
self._delegate = search_unavailable_endpoint
if self._wrapper is not None:
self._wrapper.func = search_unavailable_endpoint
except (PGCORE_ERROR, sa.exc.OperationalError, OSError):
# No _delegate set, so retry on next request
params.raise_error('Search temporarily unavailable', 503)
return await self._delegate(api, params)
async def get_routes(api: NominatimAPIAsync) -> Sequence[Tuple[str, EndpointFunc]]: async def get_routes(api: NominatimAPIAsync) -> Sequence[Tuple[str, EndpointFunc]]:
routes = [ routes = [
('status', status_endpoint), ('status', status_endpoint),
@@ -449,15 +513,13 @@ async def get_routes(api: NominatimAPIAsync) -> Sequence[Tuple[str, EndpointFunc
('polygons', polygons_endpoint), ('polygons', polygons_endpoint),
] ]
def has_search_name(conn: sa.engine.Connection) -> bool:
insp = sa.inspect(conn)
return insp.has_table('search_name')
try: try:
async with api.begin() as conn: async with api.begin() as conn:
if await conn.connection.run_sync(has_search_name): if await conn.connection.run_sync(has_search_name):
routes.append(('search', search_endpoint)) routes.append(('search', search_endpoint))
except (PGCORE_ERROR, sa.exc.OperationalError): else:
pass # ignored routes.append(('search', search_unavailable_endpoint))
except (PGCORE_ERROR, sa.exc.OperationalError, OSError):
routes.append(('search', LazySearchEndpoint(api, search_endpoint)))
return routes return routes

View File

@@ -65,14 +65,14 @@ class UpdateAddData:
def run(self, args: NominatimArgs) -> int: def run(self, args: NominatimArgs) -> int:
from ..tools import add_osm_data from ..tools import add_osm_data
if args.tiger_data:
return asyncio.run(self._add_tiger_data(args))
with connect(args.config.get_libpq_dsn()) as conn: with connect(args.config.get_libpq_dsn()) as conn:
if is_frozen(conn): if is_frozen(conn):
print('Database is marked frozen. New data can\'t be added.') print('Database is marked frozen. New data can\'t be added.')
return 1 return 1
if args.tiger_data:
return asyncio.run(self._add_tiger_data(args))
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1) osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
if args.file or args.diff: if args.file or args.diff:
return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(), return add_osm_data.add_data_from_file(args.config.get_libpq_dsn(),

View File

@@ -104,7 +104,7 @@ def _get_locales(args: NominatimArgs, config: Configuration) -> napi.Locales:
return napi.Locales() return napi.Locales()
def _get_layers(args: NominatimArgs, default: napi.DataLayer) -> Optional[napi.DataLayer]: def _get_layers(args: NominatimArgs, default: Optional[napi.DataLayer]) -> Optional[napi.DataLayer]:
""" Get the list of selected layers as a DataLayer enum. """ Get the list of selected layers as a DataLayer enum.
""" """
if not args.layers: if not args.layers:
@@ -136,7 +136,7 @@ def _print_output(formatter: napi.FormatDispatcher, result: Any,
json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False) json.dump(json.loads(output), sys.stdout, indent=4, ensure_ascii=False)
except json.decoder.JSONDecodeError as err: except json.decoder.JSONDecodeError as err:
# Catch the error here, so that data can be debugged, # Catch the error here, so that data can be debugged,
# when people are developping custom result formatters. # when people are developing custom result formatters.
LOG.fatal("Parsing json failed: %s\nUnformatted output:\n%s", err, output) LOG.fatal("Parsing json failed: %s\nUnformatted output:\n%s", err, output)
else: else:
sys.stdout.write(output) sys.stdout.write(output)
@@ -173,6 +173,10 @@ class APISearch:
help='Preferred area to find search results') help='Preferred area to find search results')
group.add_argument('--bounded', action='store_true', group.add_argument('--bounded', action='store_true',
help='Strictly restrict results to viewbox area') help='Strictly restrict results to viewbox area')
group.add_argument('--layer', metavar='LAYER',
choices=[n.name.lower() for n in napi.DataLayer if n.name],
action='append', required=False, dest='layers',
help='Restrict results to one or more layers (may be repeated)')
group.add_argument('--no-dedupe', action='store_false', dest='dedupe', group.add_argument('--no-dedupe', action='store_false', dest='dedupe',
help='Do not remove duplicates from the result list') help='Do not remove duplicates from the result list')
_add_list_format(parser) _add_list_format(parser)
@@ -189,6 +193,8 @@ class APISearch:
raise UsageError(f"Unsupported format '{args.format}'. " raise UsageError(f"Unsupported format '{args.format}'. "
'Use --list-formats to see supported formats.') 'Use --list-formats to see supported formats.')
layers = _get_layers(args, None)
try: try:
with napi.NominatimAPI(args.project_dir) as api: with napi.NominatimAPI(args.project_dir) as api:
params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10), params: Dict[str, Any] = {'max_results': args.limit + min(args.limit, 10),
@@ -199,6 +205,7 @@ class APISearch:
'excluded': args.exclude_place_ids, 'excluded': args.exclude_place_ids,
'viewbox': args.viewbox, 'viewbox': args.viewbox,
'bounded_viewbox': args.bounded, 'bounded_viewbox': args.bounded,
'layers': layers,
'entrances': args.entrances, 'entrances': args.entrances,
} }
@@ -255,7 +262,7 @@ class APIReverse:
group.add_argument('--layer', metavar='LAYER', group.add_argument('--layer', metavar='LAYER',
choices=[n.name.lower() for n in napi.DataLayer if n.name], choices=[n.name.lower() for n in napi.DataLayer if n.name],
action='append', required=False, dest='layers', action='append', required=False, dest='layers',
help='OSM id to lookup in format <NRW><id> (may be repeated)') help='Restrict results to one or more layers (may be repeated)')
_add_api_output_arguments(parser) _add_api_output_arguments(parser)
_add_list_format(parser) _add_list_format(parser)

View File

@@ -119,6 +119,8 @@ class NominatimArgs:
enable_debug_statements: bool enable_debug_statements: bool
data_object: Sequence[Tuple[str, int]] data_object: Sequence[Tuple[str, int]]
data_area: Sequence[Tuple[str, int]] data_area: Sequence[Tuple[str, int]]
ro_access: bool
postcode_force_reimport: bool
# Arguments to 'replication' # Arguments to 'replication'
init: bool init: bool

View File

@@ -65,6 +65,8 @@ class UpdateRefresh:
help='Update secondary importance raster data') help='Update secondary importance raster data')
group.add_argument('--importance', action='store_true', group.add_argument('--importance', action='store_true',
help='Recompute place importances (expensive!)') help='Recompute place importances (expensive!)')
group.add_argument('--ro-access', action='store_true',
help='Grant read-only access to web user for all tables')
group.add_argument('--website', action='store_true', group.add_argument('--website', action='store_true',
help='DEPRECATED. This function has no function anymore' help='DEPRECATED. This function has no function anymore'
' and will be removed in a future version.') ' and will be removed in a future version.')
@@ -82,6 +84,10 @@ class UpdateRefresh:
help='Do not enable code for propagating updates') help='Do not enable code for propagating updates')
group.add_argument('--enable-debug-statements', action='store_true', group.add_argument('--enable-debug-statements', action='store_true',
help='Enable debug warning statements in functions') help='Enable debug warning statements in functions')
group = parser.add_argument_group('Arguments for postcode refresh')
group.add_argument('--force-reimport', action='store_true',
dest='postcode_force_reimport',
help='Recompute the postcodes from scratch instead of updating')
def run(self, args: NominatimArgs) -> int: def run(self, args: NominatimArgs) -> int:
from ..tools import refresh, postcodes from ..tools import refresh, postcodes
@@ -94,7 +100,8 @@ class UpdateRefresh:
LOG.warning("Update postcodes centroid") LOG.warning("Update postcodes centroid")
tokenizer = self._get_tokenizer(args.config) tokenizer = self._get_tokenizer(args.config)
postcodes.update_postcodes(args.config.get_libpq_dsn(), postcodes.update_postcodes(args.config.get_libpq_dsn(),
args.project_dir, tokenizer) args.project_dir, tokenizer,
force_reimport=args.postcode_force_reimport)
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
args.threads or 1) args.threads or 1)
asyncio.run(indexer.index_postcodes()) asyncio.run(indexer.index_postcodes())
@@ -159,6 +166,11 @@ class UpdateRefresh:
LOG.error('WARNING: Website setup is no longer required. ' LOG.error('WARNING: Website setup is no longer required. '
'This function will be removed in future version of Nominatim.') 'This function will be removed in future version of Nominatim.')
if args.ro_access:
from ..tools import admin
LOG.warning('Grant read-only access to web user')
admin.grant_ro_access(args.config.get_libpq_dsn(), args.config)
if args.data_object or args.data_area: if args.data_object or args.data_area:
with connect(args.config.get_libpq_dsn()) as conn: with connect(args.config.get_libpq_dsn()) as conn:
for obj in args.data_object or []: for obj in args.data_object or []:

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Nominatim configuration accessor. Nominatim configuration accessor.
@@ -12,6 +12,7 @@ import importlib.util
import logging import logging
import os import os
import sys import sys
import re
from pathlib import Path from pathlib import Path
import json import json
import yaml import yaml
@@ -80,6 +81,10 @@ class Configuration:
self.lib_dir = _LibDirs() self.lib_dir = _LibDirs()
self._private_plugins: Dict[str, object] = {} self._private_plugins: Dict[str, object] = {}
if re.fullmatch(r'[\w-]+', self.DATABASE_WEBUSER) is None:
raise UsageError("Misconfigured DATABASE_WEBUSER. "
"Only alphnumberic characters, - and _ are allowed.")
def set_libdirs(self, **kwargs: StrPath) -> None: def set_libdirs(self, **kwargs: StrPath) -> None:
""" Set paths to library functions and data. """ Set paths to library functions and data.
""" """

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2024 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
A connection pool that executes incoming queries in parallel. A connection pool that executes incoming queries in parallel.
@@ -27,20 +27,30 @@ class QueryPool:
The results of the queries is discarded. The results of the queries is discarded.
""" """
def __init__(self, dsn: str, pool_size: int = 1, **conn_args: Any) -> None: def __init__(self, dsn: str, pool_size: int = 1, **conn_args: Any) -> None:
self.is_cancelled = False
self.wait_time = 0.0 self.wait_time = 0.0
self.query_queue: 'asyncio.Queue[QueueItem]' = asyncio.Queue(maxsize=2 * pool_size) self.query_queue: 'asyncio.Queue[QueueItem]' = asyncio.Queue(maxsize=2 * pool_size)
self.pool = [asyncio.create_task(self._worker_loop(dsn, **conn_args)) self.pool = [asyncio.create_task(self._worker_loop_cancellable(dsn, **conn_args))
for _ in range(pool_size)] for _ in range(pool_size)]
async def put_query(self, query: psycopg.abc.Query, params: Any) -> None: async def put_query(self, query: psycopg.abc.Query, params: Any) -> None:
""" Schedule a query for execution. """ Schedule a query for execution.
""" """
if self.is_cancelled:
self.clear_queue()
await self.finish()
return
tstart = time.time() tstart = time.time()
await self.query_queue.put((query, params)) await self.query_queue.put((query, params))
self.wait_time += time.time() - tstart self.wait_time += time.time() - tstart
await asyncio.sleep(0) await asyncio.sleep(0)
if self.is_cancelled:
self.clear_queue()
await self.finish()
async def finish(self) -> None: async def finish(self) -> None:
""" Wait for all queries to finish and close the pool. """ Wait for all queries to finish and close the pool.
""" """
@@ -56,6 +66,25 @@ class QueryPool:
if excp is not None: if excp is not None:
raise excp raise excp
def clear_queue(self) -> None:
""" Drop all items silently that might still be queued.
"""
try:
while True:
self.query_queue.get_nowait()
except asyncio.QueueEmpty:
pass # expected
async def _worker_loop_cancellable(self, dsn: str, **conn_args: Any) -> None:
try:
await self._worker_loop(dsn, **conn_args)
except Exception as e:
# Make sure the exception is forwarded to the main function
self.is_cancelled = True
# clear the queue here to ensure that any put() that may be blocked returns
self.clear_queue()
raise e
async def _worker_loop(self, dsn: str, **conn_args: Any) -> None: async def _worker_loop(self, dsn: str, **conn_args: Any) -> None:
conn_args['autocommit'] = True conn_args['autocommit'] = True
aconn = await psycopg.AsyncConnection.connect(dsn, **conn_args) aconn = await psycopg.AsyncConnection.connect(dsn, **conn_args)

View File

@@ -2,12 +2,13 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2024 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Preprocessing of SQL files. Preprocessing of SQL files.
""" """
from typing import Set, Dict, Any, cast from typing import Set, Dict, Any, cast
import re
import jinja2 import jinja2
@@ -34,7 +35,9 @@ def _get_tables(conn: Connection) -> Set[str]:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'") cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
return set((row[0] for row in list(cur))) # paranoia check: make sure we don't get table names that cause
# an SQL injection later
return {row[0] for row in list(cur) if re.fullmatch(r'\w+', row[0])}
def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str: def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:

View File

@@ -56,10 +56,10 @@ class Indexer:
cur.execute('ANALYZE') cur.execute('ANALYZE')
while True: while True:
if await self.index_by_rank(0, 4) > 0: if await self.index_by_rank(1, 4) > 0:
_analyze() _analyze()
if await self.index_boundaries(0, 30) > 100: if await self.index_boundaries() > 100:
_analyze() _analyze()
if await self.index_by_rank(5, 25) > 100: if await self.index_by_rank(5, 25) > 100:
@@ -68,13 +68,16 @@ class Indexer:
if await self.index_by_rank(26, 30) > 1000: if await self.index_by_rank(26, 30) > 1000:
_analyze() _analyze()
# Special case: rank zero depends on the previously-indexed [1..30] ranks
await self.index_by_rank(0, 0)
if await self.index_postcodes() > 100: if await self.index_postcodes() > 100:
_analyze() _analyze()
if not self.has_pending(): if not self.has_pending():
break break
async def index_boundaries(self, minrank: int, maxrank: int) -> int: async def index_boundaries(self, minrank: int = 0, maxrank: int = 30) -> int:
""" Index only administrative boundaries within the given rank range. """ Index only administrative boundaries within the given rank range.
""" """
total = 0 total = 0
@@ -147,8 +150,11 @@ class Indexer:
total += await self._index(runners.RankRunner(rank, analyzer), total += await self._index(runners.RankRunner(rank, analyzer),
batch=batch, total_tuples=total_tuples.get(rank, 0)) batch=batch, total_tuples=total_tuples.get(rank, 0))
if maxrank == 30: # Special case: rank zero depends on ranks [1..30]
if minrank == 0:
total += await self._index(runners.RankRunner(0, analyzer)) total += await self._index(runners.RankRunner(0, analyzer))
if maxrank == 30:
total += await self._index(runners.InterpolationRunner(analyzer), batch=20) total += await self._index(runners.InterpolationRunner(analyzer), batch=20)
return total return total
@@ -177,7 +183,7 @@ class Indexer:
`total_tuples` may contain the total number of rows to process. `total_tuples` may contain the total number of rows to process.
When not supplied, the value will be computed using the When not supplied, the value will be computed using the
approriate runner function. appropriate runner function.
""" """
LOG.warning("Starting %s (using batch size %s)", runner.name(), batch) LOG.warning("Starting %s (using batch size %s)", runner.name(), batch)

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tokenizer implementing normalisation as used before Nominatim 4 but using Tokenizer implementing normalisation as used before Nominatim 4 but using
@@ -294,13 +294,12 @@ class ICUTokenizer(AbstractTokenizer):
with connect(self.dsn) as conn: with connect(self.dsn) as conn:
drop_tables(conn, 'word') drop_tables(conn, 'word')
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(f"ALTER TABLE {old} RENAME TO word") cur.execute(pysql.SQL("ALTER TABLE {} RENAME TO word")
for idx in ('word_token', 'word_id'): .format(pysql.Identifier(old)))
cur.execute(f"""ALTER INDEX idx_{old}_{idx} for idx in ['word_token', 'word_id'] + [n[0] for n in WORD_TYPES]:
RENAME TO idx_word_{idx}""") cur.execute(pysql.SQL("ALTER INDEX {} RENAME TO {}")
for name, _ in WORD_TYPES: .format(pysql.Identifier(f"idx_{old}_{idx}"),
cur.execute(f"""ALTER INDEX idx_{old}_{name} pysql.Identifier(f"idx_word_{idx}")))
RENAME TO idx_word_{name}""")
conn.commit() conn.commit()

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2024 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Sanitizer that preprocesses address tags for house numbers. The sanitizer Sanitizer that preprocesses address tags for house numbers. The sanitizer
@@ -10,6 +10,7 @@ allows to
* define which tags are to be considered house numbers (see 'filter-kind') * define which tags are to be considered house numbers (see 'filter-kind')
* split house number lists into individual numbers (see 'delimiters') * split house number lists into individual numbers (see 'delimiters')
* expand interpolated house numbers
Arguments: Arguments:
delimiters: Define the set of characters to be used for delimiters: Define the set of characters to be used for
@@ -23,13 +24,19 @@ Arguments:
instead of a house number. Either takes a single string instead of a house number. Either takes a single string
or a list of strings, where each string is a regular or a list of strings, where each string is a regular
expression that must match the full house number value. expression that must match the full house number value.
expand-interpolations: When true, expand house number ranges to separate numbers
when an 'interpolation' is present. (default: true)
""" """
from typing import Callable, Iterator, List from typing import Callable, Iterator, Iterable, Union
import re
from ...data.place_name import PlaceName from ...data.place_name import PlaceName
from .base import ProcessInfo from .base import ProcessInfo
from .config import SanitizerConfig from .config import SanitizerConfig
RANGE_REGEX = re.compile(r'\d+-\d+')
class _HousenumberSanitizer: class _HousenumberSanitizer:
@@ -38,21 +45,40 @@ class _HousenumberSanitizer:
self.split_regexp = config.get_delimiter() self.split_regexp = config.get_delimiter()
self.filter_name = config.get_filter('convert-to-name', 'FAIL_ALL') self.filter_name = config.get_filter('convert-to-name', 'FAIL_ALL')
self.expand_interpolations = config.get_bool('expand-interpolations', True)
def __call__(self, obj: ProcessInfo) -> None: def __call__(self, obj: ProcessInfo) -> None:
if not obj.address: if not obj.address:
return return
new_address: List[PlaceName] = [] itype: Union[int, str, None] = None
if self.expand_interpolations:
itype = next((i.name for i in obj.address if i.kind == 'interpolation'), None)
if itype is not None:
if itype == 'all':
itype = 1
elif len(itype) == 1 and itype.isdigit():
itype = int(itype)
elif itype not in ('odd', 'even'):
itype = None
new_address: list[PlaceName] = []
for item in obj.address: for item in obj.address:
if self.filter_kind(item.kind): if self.filter_kind(item.kind):
if itype is not None and RANGE_REGEX.fullmatch(item.name):
hnrs = self._expand_range(itype, item.name)
if hnrs:
new_address.extend(item.clone(kind='housenumber', name=str(hnr))
for hnr in hnrs)
continue
if self.filter_name(item.name): if self.filter_name(item.name):
obj.names.append(item.clone(kind='housenumber')) obj.names.append(item.clone(kind='housenumber'))
else: else:
new_address.extend(item.clone(kind='housenumber', name=n) new_address.extend(item.clone(kind='housenumber', name=n)
for n in self.sanitize(item.name)) for n in self.sanitize(item.name))
else: elif item.kind != 'interpolation':
# Don't touch other address items. # Ignore interpolation, otherwise don't touch other address items.
new_address.append(item) new_address.append(item)
obj.address = new_address obj.address = new_address
@@ -70,6 +96,22 @@ class _HousenumberSanitizer:
def _regularize(self, hnr: str) -> Iterator[str]: def _regularize(self, hnr: str) -> Iterator[str]:
yield hnr yield hnr
def _expand_range(self, itype: Union[str, int], hnr: str) -> Iterable[int]:
first, last = (int(i) for i in hnr.split('-'))
if isinstance(itype, int):
step = itype
else:
step = 2
if (itype == 'even' and first % 2 == 1)\
or (itype == 'odd' and first % 2 == 0):
first += 1
if (last + 1 - first) / step < 10:
return range(first, last + 1, step)
return []
def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]: def create(config: SanitizerConfig) -> Callable[[ProcessInfo], None]:
""" Create a housenumber processing function. """ Create a housenumber processing function.

View File

@@ -16,6 +16,7 @@ from psycopg.types.json import Json
from ..typing import DictCursorResult from ..typing import DictCursorResult
from ..config import Configuration from ..config import Configuration
from ..db.connection import connect, Cursor, register_hstore from ..db.connection import connect, Cursor, register_hstore
from ..db.sql_preprocessor import SQLPreprocessor
from ..errors import UsageError from ..errors import UsageError
from ..tokenizer import factory as tokenizer_factory from ..tokenizer import factory as tokenizer_factory
from ..data.place_info import PlaceInfo from ..data.place_info import PlaceInfo
@@ -105,3 +106,12 @@ def clean_deleted_relations(config: Configuration, age: str) -> None:
except psycopg.DataError as exc: except psycopg.DataError as exc:
raise UsageError('Invalid PostgreSQL time interval format') from exc raise UsageError('Invalid PostgreSQL time interval format') from exc
conn.commit() conn.commit()
def grant_ro_access(dsn: str, config: Configuration) -> None:
""" Grant read-only access to the web user for all Nominatim tables.
This can be used to grant access to a different user after import.
"""
with connect(dsn) as conn:
sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'grants.sql')

View File

@@ -152,10 +152,11 @@ def create_tables(conn: Connection, config: Configuration, reverse_only: bool =
When `reverse_only` is True, then the main table for searching will When `reverse_only` is True, then the main table for searching will
be skipped and only reverse search is possible. be skipped and only reverse search is possible.
""" """
sql = SQLPreprocessor(conn, config) SQLPreprocessor(conn, config).run_sql_file(conn, 'tables.sql',
sql.env.globals['db']['reverse_only'] = reverse_only create_reverse_only=reverse_only)
sql.run_sql_file(conn, 'tables.sql') # reinitiate the preprocessor to get all the newly created tables
SQLPreprocessor(conn, config).run_sql_file(conn, 'grants.sql')
def create_table_triggers(conn: Connection, config: Configuration) -> None: def create_table_triggers(conn: Connection, config: Configuration) -> None:
@@ -193,7 +194,7 @@ def truncate_data_tables(conn: Connection) -> None:
WHERE tablename LIKE 'location_road_%'""") WHERE tablename LIKE 'location_road_%'""")
for table in [r[0] for r in list(cur)]: for table in [r[0] for r in list(cur)]:
cur.execute('TRUNCATE ' + table) cur.execute(pysql.SQL('TRUNCATE {}').format(pysql.Identifier(table)))
conn.commit() conn.commit()
@@ -225,7 +226,7 @@ async def load_data(dsn: str, threads: int) -> None:
total=pysql.Literal(placex_threads), total=pysql.Literal(placex_threads),
mod=pysql.Literal(imod)), None) mod=pysql.Literal(imod)), None)
# Interpolations need to be copied seperately # Interpolations need to be copied separately
await pool.put_query(""" await pool.put_query("""
INSERT INTO location_property_osmline (osm_id, address, linegeo) INSERT INTO location_property_osmline (osm_id, address, linegeo)
SELECT osm_id, address, geometry FROM place SELECT osm_id, address, geometry FROM place

View File

@@ -18,7 +18,6 @@ UPDATE_TABLES = [
'address_levels', 'address_levels',
'gb_postcode', 'gb_postcode',
'import_osmosis_log', 'import_osmosis_log',
'import_polygon_%',
'location_area%', 'location_area%',
'location_road%', 'location_road%',
'place', 'place',

View File

@@ -29,7 +29,7 @@ _MIGRATION_FUNCTIONS: List[Tuple[NominatimVersion, Callable[..., None]]] = []
def migrate(config: Configuration, paths: Any) -> int: def migrate(config: Configuration, paths: Any) -> int:
""" Check for the current database version and execute migrations, """ Check for the current database version and execute migrations,
if necesssary. if necessary.
""" """
with connect(config.get_libpq_dsn()) as conn: with connect(config.get_libpq_dsn()) as conn:
register_hstore(conn) register_hstore(conn)
@@ -143,7 +143,7 @@ def create_placex_entrance_table(conn: Connection, config: Configuration, **_: A
@_migration(5, 1, 99, 1) @_migration(5, 1, 99, 1)
def create_place_entrance_table(conn: Connection, config: Configuration, **_: Any) -> None: def create_place_entrance_table(conn: Connection, config: Configuration, **_: Any) -> None:
""" Add the place_entrance table to store incomming entrance nodes """ Add the place_entrance table to store incoming entrance nodes
""" """
if not table_exists(conn, 'place_entrance'): if not table_exists(conn, 'place_entrance'):
with conn.cursor() as cur: with conn.cursor() as cur:
@@ -252,7 +252,7 @@ def create_place_postcode_table(conn: Connection, config: Configuration, **_: An
""") """)
sqlp.run_string(conn, sqlp.run_string(conn,
'GRANT SELECT ON location_postcodes TO "{{config.DATABASE_WEBUSER}}"') 'GRANT SELECT ON location_postcodes TO "{{config.DATABASE_WEBUSER}}"')
# remove postcodes from the various auxillary tables # remove postcodes from the various auxiliary tables
cur.execute( cur.execute(
""" """
DELETE FROM place_addressline DELETE FROM place_addressline

View File

@@ -78,7 +78,7 @@ class _PostcodeCollector:
self.collected[normalized] += (x, y) self.collected[normalized] += (x, y)
def commit(self, conn: Connection, analyzer: AbstractAnalyzer, def commit(self, conn: Connection, analyzer: AbstractAnalyzer,
project_dir: Optional[Path]) -> None: project_dir: Optional[Path], is_initial: bool) -> None:
""" Update postcodes for the country from the postcodes selected so far. """ Update postcodes for the country from the postcodes selected so far.
When 'project_dir' is set, then any postcode files found in this When 'project_dir' is set, then any postcode files found in this
@@ -87,11 +87,14 @@ class _PostcodeCollector:
if project_dir is not None: if project_dir is not None:
self._update_from_external(analyzer, project_dir) self._update_from_external(analyzer, project_dir)
with conn.cursor() as cur: if is_initial:
cur.execute("""SELECT postcode FROM location_postcodes to_delete = []
WHERE country_code = %s AND osm_id is null""", else:
(self.country, )) with conn.cursor() as cur:
to_delete = [row[0] for row in cur if row[0] not in self.collected] cur.execute("""SELECT postcode FROM location_postcodes
WHERE country_code = %s AND osm_id is null""",
(self.country, ))
to_delete = [row[0] for row in cur if row[0] not in self.collected]
to_add = [dict(zip(('pc', 'x', 'y'), (k, *v.centroid()))) to_add = [dict(zip(('pc', 'x', 'y'), (k, *v.centroid())))
for k, v in self.collected.items()] for k, v in self.collected.items()]
@@ -102,22 +105,32 @@ class _PostcodeCollector:
with conn.cursor() as cur: with conn.cursor() as cur:
if to_add: if to_add:
cur.executemany(pysql.SQL( columns = ['country_code',
"""INSERT INTO location_postcodes 'rank_search',
(country_code, rank_search, postcode, centroid, geometry) 'postcode',
VALUES ({}, {}, %(pc)s, 'centroid',
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), 'geometry']
expand_by_meters(ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {})) values = [pysql.Literal(self.country),
""").format(pysql.Literal(self.country), pysql.Literal(_extent_to_rank(self.extent)),
pysql.Literal(_extent_to_rank(self.extent)), pysql.Placeholder('pc'),
pysql.Literal(self.extent)), pysql.SQL('ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326)'),
to_add) pysql.SQL("""expand_by_meters(
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), {})""")
.format(pysql.Literal(self.extent))]
if is_initial:
columns.extend(('place_id', 'indexed_status'))
values.extend((pysql.SQL("nextval('seq_place')"), pysql.Literal(1)))
cur.executemany(pysql.SQL("INSERT INTO location_postcodes ({}) VALUES ({})")
.format(pysql.SQL(',')
.join(pysql.Identifier(c) for c in columns),
pysql.SQL(',').join(values)),
to_add)
if to_delete: if to_delete:
cur.execute("""DELETE FROM location_postcodes cur.execute("""DELETE FROM location_postcodes
WHERE country_code = %s and postcode = any(%s) WHERE country_code = %s and postcode = any(%s)
AND osm_id is null AND osm_id is null
""", (self.country, to_delete)) """, (self.country, to_delete))
cur.execute("ANALYSE location_postcodes")
def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None: def _update_from_external(self, analyzer: AbstractAnalyzer, project_dir: Path) -> None:
""" Look for an external postcode file for the active country in """ Look for an external postcode file for the active country in
@@ -159,12 +172,13 @@ class _PostcodeCollector:
if fname.is_file(): if fname.is_file():
LOG.info("Using external postcode file '%s'.", fname) LOG.info("Using external postcode file '%s'.", fname)
return gzip.open(fname, 'rt') return gzip.open(fname, 'rt', encoding='utf-8')
return None return None
def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractTokenizer) -> None: def update_postcodes(dsn: str, project_dir: Optional[Path],
tokenizer: AbstractTokenizer, force_reimport: bool = False) -> None:
""" Update the table of postcodes from the input tables """ Update the table of postcodes from the input tables
placex and place_postcode. placex and place_postcode.
""" """
@@ -176,45 +190,76 @@ def update_postcodes(dsn: str, project_dir: Optional[Path], tokenizer: AbstractT
SET country_code = get_country_code(centroid) SET country_code = get_country_code(centroid)
WHERE country_code is null WHERE country_code is null
""") """)
if force_reimport:
conn.execute("TRUNCATE location_postcodes")
is_initial = True
else:
is_initial = _is_postcode_table_empty(conn)
if is_initial:
conn.execute("""ALTER TABLE location_postcodes
DISABLE TRIGGER location_postcodes_before_insert""")
# Now update first postcode areas # Now update first postcode areas
_update_postcode_areas(conn, analyzer, matcher) _update_postcode_areas(conn, analyzer, matcher, is_initial)
# Then fill with estimated postcode centroids from other info # Then fill with estimated postcode centroids from other info
_update_guessed_postcode(conn, analyzer, matcher, project_dir) _update_guessed_postcode(conn, analyzer, matcher, project_dir, is_initial)
if is_initial:
conn.execute("""ALTER TABLE location_postcodes
ENABLE TRIGGER location_postcodes_before_insert""")
conn.commit() conn.commit()
analyzer.update_postcodes_from_db() analyzer.update_postcodes_from_db()
def _is_postcode_table_empty(conn: Connection) -> bool:
""" Check if there are any entries in the location_postcodes table yet.
"""
with conn.cursor() as cur:
cur.execute("SELECT place_id FROM location_postcodes LIMIT 1")
return cur.fetchone() is None
def _insert_postcode_areas(conn: Connection, country_code: str, def _insert_postcode_areas(conn: Connection, country_code: str,
extent: int, pcs: list[dict[str, str]]) -> None: extent: int, pcs: list[dict[str, str]],
is_initial: bool) -> None:
if pcs: if pcs:
with conn.cursor() as cur: with conn.cursor() as cur:
columns = ['osm_id', 'country_code',
'rank_search', 'postcode',
'centroid', 'geometry']
values = [pysql.Identifier('osm_id'), pysql.Identifier('country_code'),
pysql.Literal(_extent_to_rank(extent)), pysql.Placeholder('out'),
pysql.Identifier('centroid'), pysql.Identifier('geometry')]
if is_initial:
columns.extend(('place_id', 'indexed_status'))
values.extend((pysql.SQL("nextval('seq_place')"), pysql.Literal(1)))
cur.executemany( cur.executemany(
pysql.SQL( pysql.SQL(
""" INSERT INTO location_postcodes """ INSERT INTO location_postcodes ({})
(osm_id, country_code, rank_search, postcode, centroid, geometry) SELECT {} FROM place_postcode
SELECT osm_id, country_code, {}, %(out)s, centroid, geometry
FROM place_postcode
WHERE osm_type = 'R' WHERE osm_type = 'R'
and country_code = {} and postcode = %(in)s and country_code = {} and postcode = %(in)s
and geometry is not null and geometry is not null
""").format(pysql.Literal(_extent_to_rank(extent)), """).format(pysql.SQL(',')
.join(pysql.Identifier(c) for c in columns),
pysql.SQL(',').join(values),
pysql.Literal(country_code)), pysql.Literal(country_code)),
pcs) pcs)
def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer, def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
matcher: PostcodeFormatter) -> None: matcher: PostcodeFormatter, is_initial: bool) -> None:
""" Update the postcode areas made from postcode boundaries. """ Update the postcode areas made from postcode boundaries.
""" """
# first delete all areas that have gone # first delete all areas that have gone
conn.execute(""" DELETE FROM location_postcodes pc if not is_initial:
WHERE pc.osm_id is not null conn.execute(""" DELETE FROM location_postcodes pc
AND NOT EXISTS( WHERE pc.osm_id is not null
SELECT * FROM place_postcode pp AND NOT EXISTS(
WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id SELECT * FROM place_postcode pp
and geometry is not null) WHERE pp.osm_type = 'R' and pp.osm_id = pc.osm_id
""") and geometry is not null)
""")
# now insert all in country batches, triggers will ensure proper updates # now insert all in country batches, triggers will ensure proper updates
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(""" SELECT country_code, postcode FROM place_postcode cur.execute(""" SELECT country_code, postcode FROM place_postcode
@@ -230,7 +275,8 @@ def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
fmt = matcher.get_matcher(country_code) fmt = matcher.get_matcher(country_code)
elif country_code != cc: elif country_code != cc:
_insert_postcode_areas(conn, country_code, _insert_postcode_areas(conn, country_code,
matcher.get_postcode_extent(country_code), pcs) matcher.get_postcode_extent(country_code), pcs,
is_initial)
country_code = cc country_code = cc
fmt = matcher.get_matcher(country_code) fmt = matcher.get_matcher(country_code)
pcs = [] pcs = []
@@ -241,21 +287,26 @@ def _update_postcode_areas(conn: Connection, analyzer: AbstractAnalyzer,
if country_code is not None and pcs: if country_code is not None and pcs:
_insert_postcode_areas(conn, country_code, _insert_postcode_areas(conn, country_code,
matcher.get_postcode_extent(country_code), pcs) matcher.get_postcode_extent(country_code), pcs,
is_initial)
def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer, def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
matcher: PostcodeFormatter, project_dir: Optional[Path]) -> None: matcher: PostcodeFormatter, project_dir: Optional[Path],
is_initial: bool) -> None:
""" Computes artificial postcode centroids from the placex table, """ Computes artificial postcode centroids from the placex table,
potentially enhances it with external data and then updates the potentially enhances it with external data and then updates the
postcodes in the table 'location_postcodes'. postcodes in the table 'location_postcodes'.
""" """
# First get the list of countries that currently have postcodes. # First get the list of countries that currently have postcodes.
# (Doing this before starting to insert, so it is fast on import.) # (Doing this before starting to insert, so it is fast on import.)
with conn.cursor() as cur: if is_initial:
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes todo_countries: set[str] = set()
WHERE osm_id is null""") else:
todo_countries = {row[0] for row in cur} with conn.cursor() as cur:
cur.execute("""SELECT DISTINCT country_code FROM location_postcodes
WHERE osm_id is null""")
todo_countries = {row[0] for row in cur}
# Next, get the list of postcodes that are already covered by areas. # Next, get the list of postcodes that are already covered by areas.
area_pcs = defaultdict(set) area_pcs = defaultdict(set)
@@ -275,6 +326,7 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
FROM place_postcode WHERE geometry is not null) FROM place_postcode WHERE geometry is not null)
""") """)
cur.execute("CREATE INDEX ON _global_postcode_area USING gist(geometry)") cur.execute("CREATE INDEX ON _global_postcode_area USING gist(geometry)")
# Recompute the list of valid postcodes from placex. # Recompute the list of valid postcodes from placex.
with conn.cursor(name="placex_postcodes") as cur: with conn.cursor(name="placex_postcodes") as cur:
cur.execute(""" cur.execute("""
@@ -296,7 +348,7 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
for country, postcode, x, y in cur: for country, postcode, x, y in cur:
if collector is None or country != collector.country: if collector is None or country != collector.country:
if collector is not None: if collector is not None:
collector.commit(conn, analyzer, project_dir) collector.commit(conn, analyzer, project_dir, is_initial)
collector = _PostcodeCollector(country, matcher.get_matcher(country), collector = _PostcodeCollector(country, matcher.get_matcher(country),
matcher.get_postcode_extent(country), matcher.get_postcode_extent(country),
exclude=area_pcs[country]) exclude=area_pcs[country])
@@ -304,14 +356,14 @@ def _update_guessed_postcode(conn: Connection, analyzer: AbstractAnalyzer,
collector.add(postcode, x, y) collector.add(postcode, x, y)
if collector is not None: if collector is not None:
collector.commit(conn, analyzer, project_dir) collector.commit(conn, analyzer, project_dir, is_initial)
# Now handle any countries that are only in the postcode table. # Now handle any countries that are only in the postcode table.
for country in todo_countries: for country in todo_countries:
fmt = matcher.get_matcher(country) fmt = matcher.get_matcher(country)
ext = matcher.get_postcode_extent(country) ext = matcher.get_postcode_extent(country)
_PostcodeCollector(country, fmt, ext, _PostcodeCollector(country, fmt, ext,
exclude=area_pcs[country]).commit(conn, analyzer, project_dir) exclude=area_pcs[country]).commit(conn, analyzer, project_dir, False)
conn.execute("DROP TABLE IF EXISTS _global_postcode_area") conn.execute("DROP TABLE IF EXISTS _global_postcode_area")

View File

@@ -141,7 +141,9 @@ def import_importance_csv(dsn: str, data_file: Path) -> int:
copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata) copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata)
FROM STDIN""" FROM STDIN"""
with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy: with gzip.open(
str(data_file), 'rt', encoding='utf-8') as fd, \
cur.copy(copy_cmd) as copy:
for row in csv.DictReader(fd, delimiter='\t', quotechar='|'): for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
wd_id = int(row['wikidata_id'][1:]) wd_id = int(row['wikidata_id'][1:])
copy.write_row((row['language'], copy.write_row((row['language'],

View File

@@ -17,13 +17,12 @@ import tarfile
from psycopg.types.json import Json from psycopg.types.json import Json
from ..config import Configuration from ..config import Configuration
from ..db.connection import connect from ..db.connection import connect, table_exists
from ..db.sql_preprocessor import SQLPreprocessor from ..db.sql_preprocessor import SQLPreprocessor
from ..errors import UsageError from ..errors import UsageError
from ..db.query_pool import QueryPool from ..db.query_pool import QueryPool
from ..data.place_info import PlaceInfo from ..data.place_info import PlaceInfo
from ..tokenizer.base import AbstractTokenizer from ..tokenizer.base import AbstractTokenizer
from . import freeze
LOG = logging.getLogger() LOG = logging.getLogger()
@@ -90,16 +89,19 @@ async def add_tiger_data(data_dir: str, config: Configuration, threads: int,
""" """
dsn = config.get_libpq_dsn() dsn = config.get_libpq_dsn()
with connect(dsn) as conn:
if freeze.is_frozen(conn):
raise UsageError("Tiger cannot be imported when database frozen (Github issue #3048)")
with TigerInput(data_dir) as tar: with TigerInput(data_dir) as tar:
if not tar: if not tar:
return 1 return 1
with connect(dsn) as conn: with connect(dsn) as conn:
sql = SQLPreprocessor(conn, config) sql = SQLPreprocessor(conn, config)
if not table_exists(conn, 'search_name'):
raise UsageError(
"Cannot perform tiger import: required tables are missing. "
"See https://github.com/osm-search/Nominatim/issues/2463 for details."
)
sql.run_sql_file(conn, 'tiger_import_start.sql') sql.run_sql_file(conn, 'tiger_import_start.sql')
# Reading files and then for each file line handling # Reading files and then for each file line handling

View File

@@ -42,6 +42,22 @@ Feature: Tests for finding places by osm_type and osm_id
| jsonv2 | json | | jsonv2 | json |
| geojson | geojson | | geojson | geojson |
Scenario Outline: Lookup with entrances
When sending v1/lookup with format <format>
| osm_ids | entrances |
| W429210603 | 1 |
Then a HTTP 200 is returned
And the result is valid <outformat>
And result 0 contains in field entrances+0
| osm_id | type | lat | lon |
| 6580031131 | yes | 47.2489382 | 9.5284033 |
Examples:
| format | outformat |
| json | json |
| jsonv2 | json |
| geojson | geojson |
Scenario: Linked places return information from the linkee Scenario: Linked places return information from the linkee
When sending v1/lookup with format geocodejson When sending v1/lookup with format geocodejson
| osm_ids | | osm_ids |

View File

@@ -167,3 +167,18 @@ Feature: v1/reverse Parameter Tests
| json | json | | json | json |
| jsonv2 | json | | jsonv2 | json |
| xml | xml | | xml | xml |
Scenario Outline: Reverse with entrances
When sending v1/reverse with format <format>
| lat | lon | entrances | zoom |
| 47.24942041089678 | 9.52854573737568 | 1 | 18 |
Then a HTTP 200 is returned
And the result is valid <outformat>
And the result contains array field entrances where element 0 contains
| osm_id | type | lat | lon |
| 6580031131 | yes | 47.2489382 | 9.5284033 |
Examples:
| format | outformat |
| json | json |
| jsonv2 | json |

View File

@@ -297,20 +297,30 @@ Feature: Linking of places
| R1 | LabelPlace | | R1 | LabelPlace |
@skip
Scenario: Linked places expand default language names Scenario: Linked places expand default language names
Given the grid Given the grid with origin CO
| 1 | | 2 | | 1 | | 2 | | 5 | | 6 |
| | 9 | | | | 9 | | | | 10 | |
| 4 | | 3 | | 4 | | 3 | | 8 | | 7 |
Given the places And the places
| osm | class | type | name+name | geometry | | osm | class | type | name+name | geometry |
| N9 | place | city | Popayán | 9 | | N9 | place | city | Popayán | 9 |
| R1 | boundary | administrative | Perímetro Urbano Popayán | (1,2,3,4,1) | And the places
| osm | class | type | name+name:en | geometry |
| N10 | place | city | Open | 10 |
And the places
| osm | class | type | name+name | geometry | admin |
| R1 | boundary | administrative | Perímetro Urbano Popayán | (1,2,3,4,1) | 8 |
| R2 | boundary | administrative | Abre | (5,6,7,8,5) | 8 |
And the relations And the relations
| id | members | | id | members |
| 1 | N9:label | | 1 | N9:label |
| 2 | N10:label |
When importing When importing
Then placex contains
| object | linked_place_id |
| N9:place | R1 |
| R1:boundary | - |
Then placex contains Then placex contains
| object | name+_place_name | name+_place_name:es | | object | name+_place_name | name+_place_name:es |
| R1 | Popayán | Popayán | | R1 | Popayán | Popayán |

View File

@@ -318,3 +318,64 @@ Feature: Searching of house numbers
Then the result set contains Then the result set contains
| object | | object |
| W20 | | W20 |
Scenario: A housenumber with interpolation is found
Given the places
| osm | class | type | housenr | addr+interpolation | geometry |
| N1 | building | yes | 1-5 | odd | 9 |
And the places
| osm | class | type | name | geometry |
| W10 | highway | path | Rue Paris | 1,2,3 |
When importing
When geocoding "Rue Paris 1"
Then the result set contains
| object | address+house_number |
| N1 | 1-5 |
When geocoding "Rue Paris 3"
Then the result set contains
| object | address+house_number |
| N1 | 1-5 |
When geocoding "Rue Paris 5"
Then the result set contains
| object | address+house_number |
| N1 | 1-5 |
When geocoding "Rue Paris 2"
Then the result set contains
| object |
| W10 |
Scenario: A housenumber with bad interpolation is ignored
Given the places
| osm | class | type | housenr | addr+interpolation | geometry |
| N1 | building | yes | 1-5 | bad | 9 |
And the places
| osm | class | type | name | geometry |
| W10 | highway | path | Rue Paris | 1,2,3 |
When importing
When geocoding "Rue Paris 1-5"
Then the result set contains
| object | address+house_number |
| N1 | 1-5 |
When geocoding "Rue Paris 3"
Then the result set contains
| object |
| W10 |
Scenario: A bad housenumber with a good interpolation is just a housenumber
Given the places
| osm | class | type | housenr | addr+interpolation | geometry |
| N1 | building | yes | 1-100 | all | 9 |
And the places
| osm | class | type | name | geometry |
| W10 | highway | path | Rue Paris | 1,2,3 |
When importing
When geocoding "Rue Paris 1-100"
Then the result set contains
| object | address+house_number |
| N1 | 1-100 |
When geocoding "Rue Paris 3"
Then the result set contains
| object |
| W10 |

View File

@@ -43,7 +43,7 @@ def opl_writer(tmp_path, node_grid):
def _write(data): def _write(data):
fname = tmp_path / f"test_osm_{nr[0]}.opl" fname = tmp_path / f"test_osm_{nr[0]}.opl"
nr[0] += 1 nr[0] += 1
with fname.open('wt') as fd: with fname.open('wt', encoding='utf-8') as fd:
for line in data.split('\n'): for line in data.split('\n'):
if line.startswith('n') and ' x' not in line: if line.startswith('n') and ' x' not in line:
coord = node_grid.get(line[1:].split(' ')[0]) \ coord = node_grid.get(line[1:].split(' ')[0]) \
@@ -59,7 +59,7 @@ def opl_writer(tmp_path, node_grid):
@given('the lua style file', target_fixture='osm2pgsql_options') @given('the lua style file', target_fixture='osm2pgsql_options')
def set_lua_style_file(osm2pgsql_options, docstring, tmp_path): def set_lua_style_file(osm2pgsql_options, docstring, tmp_path):
style = tmp_path / 'custom.lua' style = tmp_path / 'custom.lua'
style.write_text(docstring) style.write_text(docstring, encoding='utf-8')
osm2pgsql_options['osm2pgsql_style'] = str(style) osm2pgsql_options['osm2pgsql_style'] = str(style)
return osm2pgsql_options return osm2pgsql_options

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Various helper classes for running Nominatim commands. Various helper classes for running Nominatim commands.
@@ -54,15 +54,14 @@ class APIRunner:
def create_engine_starlette(self, environ): def create_engine_starlette(self, environ):
import nominatim_api.server.starlette.server import nominatim_api.server.starlette.server
from asgi_lifespan import LifespanManager from asgi_lifespan import LifespanManager
import httpx from starlette.testclient import TestClient
async def _request(endpoint, params, http_headers): async def _request(endpoint, params, http_headers):
app = nominatim_api.server.starlette.server.get_application(None, environ) app = nominatim_api.server.starlette.server.get_application(None, environ)
async with LifespanManager(app): async with LifespanManager(app):
async with httpx.AsyncClient(app=app, base_url="http://nominatim.test") as client: client = TestClient(app, base_url="http://nominatim.test")
response = await client.get("/" + endpoint, params=params, response = client.get("/" + endpoint, params=params, headers=http_headers)
headers=http_headers)
return APIResponse(endpoint, response.status_code, return APIResponse(endpoint, response.status_code,
response.text, response.headers) response.text, response.headers)

View File

@@ -7,6 +7,7 @@
""" """
Helper functions to compare expected values. Helper functions to compare expected values.
""" """
import ast
import collections.abc import collections.abc
import json import json
import re import re
@@ -58,7 +59,8 @@ COMPARISON_FUNCS = {
None: lambda val, exp: str(val) == exp, None: lambda val, exp: str(val) == exp,
'i': lambda val, exp: str(val).lower() == exp.lower(), 'i': lambda val, exp: str(val).lower() == exp.lower(),
'fm': lambda val, exp: re.fullmatch(exp, val) is not None, 'fm': lambda val, exp: re.fullmatch(exp, val) is not None,
'dict': lambda val, exp: val is None if exp == '-' else (val == eval('{' + exp + '}')), 'dict': lambda val, exp: (val is None if exp == '-'
else (val == ast.literal_eval('{' + exp + '}'))),
'in_box': within_box 'in_box': within_box
} }

View File

@@ -2,11 +2,12 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Helper classes for filling the place table. Helper classes for filling the place table.
""" """
import ast
import random import random
import string import string
@@ -35,7 +36,8 @@ class PlaceColumn:
self._add_hstore( self._add_hstore(
'name', 'name',
'name', 'name',
''.join(random.choices(string.printable, k=random.randrange(30))), ''.join(random.choices(string.ascii_uppercase)
+ random.choices(string.printable, k=random.randrange(30))),
) )
return self return self
@@ -50,7 +52,7 @@ class PlaceColumn:
elif key.startswith('addr+'): elif key.startswith('addr+'):
self._add_hstore('address', key[5:], value) self._add_hstore('address', key[5:], value)
elif key in ('name', 'address', 'extratags'): elif key in ('name', 'address', 'extratags'):
self.columns[key] = eval('{' + value + '}') self.columns[key] = ast.literal_eval('{' + value + '}')
else: else:
assert key in ('class', 'type'), "Unknown column '{}'.".format(key) assert key in ('class', 'type'), "Unknown column '{}'.".format(key)
self.columns[key] = None if value == '' else value self.columns[key] = None if value == '' else value

View File

@@ -58,7 +58,7 @@ gb:
pattern: "(l?ld[A-Z0-9]?) ?(dll)" pattern: "(l?ld[A-Z0-9]?) ?(dll)"
output: \1 \2 output: \1 \2
""") """, encoding='utf-8')
return project_env return project_env

View File

@@ -91,8 +91,9 @@ class TestCliWithDb:
postcode_mock = async_mock_func_factory(nominatim_db.indexer.indexer.Indexer, postcode_mock = async_mock_func_factory(nominatim_db.indexer.indexer.Indexer,
'index_postcodes') 'index_postcodes')
has_pending_retvals = [True, False]
monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending', monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending',
[False, True].pop) lambda *args, **kwargs: has_pending_retvals.pop(0))
assert self.call_nominatim('index', *params) == 0 assert self.call_nominatim('index', *params) == 0

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Test for loading dotenv configuration. Test for loading dotenv configuration.
@@ -48,7 +48,7 @@ def test_no_project_dir(make_config):
@pytest.mark.parametrize("val", ('apache', '"apache"')) @pytest.mark.parametrize("val", ('apache', '"apache"'))
def test_prefer_project_setting_over_default(make_config, val, tmp_path): def test_prefer_project_setting_over_default(make_config, val, tmp_path):
envfile = tmp_path / '.env' envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val)) envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val), encoding='utf-8')
config = make_config(tmp_path) config = make_config(tmp_path)
@@ -57,7 +57,7 @@ def test_prefer_project_setting_over_default(make_config, val, tmp_path):
def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_path): def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_path):
envfile = tmp_path / '.env' envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n') envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody') monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody')
@@ -68,13 +68,13 @@ def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_pa
def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path): def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path):
envfile = tmp_path / '.env' envfile = tmp_path / '.env'
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n') envfile.write_text('NOMINATIM_OSM2PGSQL_BINARY=osm2pgsql\n', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '') monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', '')
config = make_config(tmp_path) config = make_config(tmp_path)
assert config.DATABASE_WEBUSER == '' assert config.OSM2PGSQL_BINARY == ''
def test_get_os_env_add_defaults(make_config, monkeypatch): def test_get_os_env_add_defaults(make_config, monkeypatch):
@@ -200,14 +200,15 @@ def test_get_path_empty(make_config):
assert not config.get_path('TOKENIZER_CONFIG') assert not config.get_path('TOKENIZER_CONFIG')
def test_get_path_absolute(make_config, monkeypatch): def test_get_path_absolute(make_config, monkeypatch, tmp_path):
config = make_config() config = make_config()
monkeypatch.setenv('NOMINATIM_FOOBAR', '/dont/care') p = (tmp_path / "does_not_exist").resolve()
monkeypatch.setenv('NOMINATIM_FOOBAR', str(p))
result = config.get_path('FOOBAR') result = config.get_path('FOOBAR')
assert isinstance(result, Path) assert isinstance(result, Path)
assert str(result) == '/dont/care' assert str(result) == str(p)
def test_get_path_relative(make_config, monkeypatch, tmp_path): def test_get_path_relative(make_config, monkeypatch, tmp_path):
@@ -232,7 +233,7 @@ def test_get_import_style_intern(make_config, src_dir, monkeypatch):
def test_get_import_style_extern_relative(make_config_path, monkeypatch): def test_get_import_style_extern_relative(make_config_path, monkeypatch):
config = make_config_path() config = make_config_path()
(config.project_dir / 'custom.style').write_text('x') (config.project_dir / 'custom.style').write_text('x', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'custom.style') monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'custom.style')
@@ -243,7 +244,7 @@ def test_get_import_style_extern_absolute(make_config, tmp_path, monkeypatch):
config = make_config() config = make_config()
cfgfile = tmp_path / 'test.style' cfgfile = tmp_path / 'test.style'
cfgfile.write_text('x') cfgfile.write_text('x', encoding='utf-8')
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', str(cfgfile)) monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', str(cfgfile))
@@ -254,10 +255,10 @@ def test_load_subconf_from_project_dir(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.project_dir / 'test.yaml' testfile = config.project_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n') testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: miau\ncat: muh\n') testfile.write_text('cow: miau\ncat: muh\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')
@@ -268,7 +269,7 @@ def test_load_subconf_from_settings_dir(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n') testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')
@@ -280,7 +281,7 @@ def test_load_subconf_empty_env_conf(make_config_path, monkeypatch):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('cow: muh\ncat: miau\n') testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -291,8 +292,8 @@ def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml')) monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
config = make_config_path() config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
(tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n') (tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -303,7 +304,7 @@ def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml')) monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
config = make_config_path() config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'): with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml', config='MY_CONFIG') config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -314,8 +315,8 @@ def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml') monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
config = make_config_path() config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
(getattr(config, location) / 'other.yaml').write_text('dog: bark\n') (getattr(config, location) / 'other.yaml').write_text('dog: bark\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -326,7 +327,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml') monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
config = make_config_path() config = make_config_path()
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'): with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml', config='MY_CONFIG') config.load_sub_configuration('test.yaml', config='MY_CONFIG')
@@ -335,7 +336,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
def test_load_subconf_json(make_config_path): def test_load_subconf_json(make_config_path):
config = make_config_path() config = make_config_path()
(config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}') (config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}', encoding='utf-8')
rules = config.load_sub_configuration('test.json') rules = config.load_sub_configuration('test.json')
@@ -352,7 +353,7 @@ def test_load_subconf_not_found(make_config_path):
def test_load_subconf_env_unknown_format(make_config_path): def test_load_subconf_env_unknown_format(make_config_path):
config = make_config_path() config = make_config_path()
(config.project_dir / 'test.xml').write_text('<html></html>') (config.project_dir / 'test.xml').write_text('<html></html>', encoding='utf-8')
with pytest.raises(UsageError, match='unknown format'): with pytest.raises(UsageError, match='unknown format'):
config.load_sub_configuration('test.xml') config.load_sub_configuration('test.xml')
@@ -362,8 +363,8 @@ def test_load_subconf_include_absolute(make_config_path, tmp_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n') testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n', encoding='utf-8')
(tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n') (tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')
@@ -375,8 +376,8 @@ def test_load_subconf_include_relative(make_config_path, tmp_path, location):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.yaml\n') testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
(getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n') (getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')
@@ -387,8 +388,8 @@ def test_load_subconf_include_bad_format(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.txt\n') testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
(config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n') (config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
with pytest.raises(UsageError, match='Cannot handle config file format.'): with pytest.raises(UsageError, match='Cannot handle config file format.'):
config.load_sub_configuration('test.yaml') config.load_sub_configuration('test.yaml')
@@ -398,7 +399,7 @@ def test_load_subconf_include_not_found(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.txt\n') testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
with pytest.raises(UsageError, match='Config file not found.'): with pytest.raises(UsageError, match='Config file not found.'):
config.load_sub_configuration('test.yaml') config.load_sub_configuration('test.yaml')
@@ -408,9 +409,9 @@ def test_load_subconf_include_recursive(make_config_path):
config = make_config_path() config = make_config_path()
testfile = config.config_dir / 'test.yaml' testfile = config.config_dir / 'test.yaml'
testfile.write_text('base: !include inc.yaml\n') testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
(config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n') (config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n', encoding='utf-8')
(config.config_dir / 'more.yaml').write_text('- the end\n') (config.config_dir / 'more.yaml').write_text('- the end\n', encoding='utf-8')
rules = config.load_sub_configuration('test.yaml') rules = config.load_sub_configuration('test.yaml')

View File

@@ -41,7 +41,7 @@ def test_load_default_module_with_hyphen(test_config):
def test_load_plugin_module(test_config, tmp_path): def test_load_plugin_module(test_config, tmp_path):
(tmp_path / 'project' / 'testpath').mkdir() (tmp_path / 'project' / 'testpath').mkdir()
(tmp_path / 'project' / 'testpath' / 'mymod.py')\ (tmp_path / 'project' / 'testpath' / 'mymod.py')\
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'") .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something') module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
@@ -49,7 +49,7 @@ def test_load_plugin_module(test_config, tmp_path):
# also test reloading module # also test reloading module
(tmp_path / 'project' / 'testpath' / 'mymod.py')\ (tmp_path / 'project' / 'testpath' / 'mymod.py')\
.write_text("def my_test_function():\n return 'hjothjorhj'") .write_text("def my_test_function():\n return 'hjothjorhj'", encoding='utf-8')
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something') module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
@@ -61,9 +61,9 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
pythonpath = tmp_path / 'priv-python' pythonpath = tmp_path / 'priv-python'
pythonpath.mkdir() pythonpath.mkdir()
(pythonpath / MODULE_NAME).mkdir() (pythonpath / MODULE_NAME).mkdir()
(pythonpath / MODULE_NAME / '__init__.py').write_text('') (pythonpath / MODULE_NAME / '__init__.py').write_text('', encoding='utf-8')
(pythonpath / MODULE_NAME / 'tester.py')\ (pythonpath / MODULE_NAME / 'tester.py')\
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'") .write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
monkeypatch.syspath_prepend(pythonpath) monkeypatch.syspath_prepend(pythonpath)
@@ -73,7 +73,7 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
# also test reloading module # also test reloading module
(pythonpath / MODULE_NAME / 'tester.py')\ (pythonpath / MODULE_NAME / 'tester.py')\
.write_text("def my_test_function():\n return 'dfigjreigj'") .write_text("def my_test_function():\n return 'dfigjreigj'", encoding='utf-8')
module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something') module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something')

View File

@@ -2,12 +2,16 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
import itertools import itertools
import sys import sys
import asyncio
from pathlib import Path from pathlib import Path
if sys.platform == 'win32':
asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
import psycopg import psycopg
from psycopg import sql as pysql from psycopg import sql as pysql
import pytest import pytest
@@ -17,12 +21,11 @@ SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve()
sys.path.insert(0, str(SRC_DIR / 'src')) sys.path.insert(0, str(SRC_DIR / 'src'))
from nominatim_db.config import Configuration from nominatim_db.config import Configuration
from nominatim_db.db import connection from nominatim_db.db import connection, properties
from nominatim_db.db.sql_preprocessor import SQLPreprocessor from nominatim_db.db.sql_preprocessor import SQLPreprocessor
import nominatim_db.tokenizer.factory import nominatim_db.tokenizer.factory
import dummy_tokenizer import dummy_tokenizer
import mocks
from cursor import CursorForTesting from cursor import CursorForTesting
@@ -60,7 +63,7 @@ def temp_db(monkeypatch):
with psycopg.connect(dbname='postgres', autocommit=True) as conn: with psycopg.connect(dbname='postgres', autocommit=True) as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute('DROP DATABASE IF EXISTS {}'.format(name)) cur.execute(pysql.SQL('DROP DATABASE IF EXISTS') + pysql.Identifier(name))
@pytest.fixture @pytest.fixture
@@ -104,7 +107,9 @@ def table_factory(temp_db_conn):
""" """
def mk_table(name, definition='id INT', content=None): def mk_table(name, definition='id INT', content=None):
with psycopg.ClientCursor(temp_db_conn) as cur: with psycopg.ClientCursor(temp_db_conn) as cur:
cur.execute('CREATE TABLE {} ({})'.format(name, definition)) cur.execute(pysql.SQL("CREATE TABLE {} ({})")
.format(pysql.Identifier(name),
pysql.SQL(definition)))
if content: if content:
sql = pysql.SQL("INSERT INTO {} VALUES ({})")\ sql = pysql.SQL("INSERT INTO {} VALUES ({})")\
.format(pysql.Identifier(name), .format(pysql.Identifier(name),
@@ -130,28 +135,50 @@ def project_env(tmp_path):
@pytest.fixture @pytest.fixture
def property_table(table_factory, temp_db_conn): def country_table(table_factory):
table_factory('nominatim_properties', 'property TEXT, value TEXT') table_factory('country_name', 'partition INT, country_code varchar(2), name hstore')
return mocks.MockPropertyTable(temp_db_conn)
@pytest.fixture @pytest.fixture
def status_table(table_factory): def country_row(country_table, temp_db_cursor):
def _add(partition=None, country=None, names=None):
temp_db_cursor.insert_row('country_name', partition=partition,
country_code=country, name=names)
return _add
@pytest.fixture
def load_sql(temp_db_conn, country_table):
conf = Configuration(None)
def _run(*filename, **kwargs):
for fn in filename:
SQLPreprocessor(temp_db_conn, conf).run_sql_file(temp_db_conn, fn, **kwargs)
return _run
@pytest.fixture
def property_table(load_sql, temp_db_conn):
load_sql('tables/nominatim_properties.sql')
class _PropTable:
def set(self, name, value):
properties.set_property(temp_db_conn, name, value)
def get(self, name):
return properties.get_property(temp_db_conn, name)
return _PropTable()
@pytest.fixture
def status_table(load_sql):
""" Create an empty version of the status table and """ Create an empty version of the status table and
the status logging table. the status logging table.
""" """
table_factory('import_status', load_sql('tables/status.sql')
"""lastimportdate timestamp with time zone NOT NULL,
sequence_id integer,
indexed boolean""")
table_factory('import_osmosis_log',
"""batchend timestamp,
batchseq integer,
batchsize bigint,
starttime timestamp,
endtime timestamp,
event text""")
@pytest.fixture @pytest.fixture
@@ -176,12 +203,14 @@ def place_row(place_table, temp_db_cursor):
prerequisite to the fixture. prerequisite to the fixture.
""" """
idseq = itertools.count(1001) idseq = itertools.count(1001)
def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None, def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
admin_level=None, address=None, extratags=None, geom=None): admin_level=None, address=None, extratags=None, geom='POINT(0 0)'):
temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", args = {'osm_type': osm_type, 'osm_id': osm_id or next(idseq),
(osm_id or next(idseq), osm_type, cls, typ, names, 'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level,
admin_level, address, extratags, 'address': address, 'extratags': extratags,
geom or 'SRID=4326;POINT(0 0)')) 'geometry': _with_srid(geom)}
temp_db_cursor.insert_row('place', **args)
return _insert return _insert
@@ -201,50 +230,104 @@ def place_postcode_table(temp_db_with_extensions, table_factory):
@pytest.fixture @pytest.fixture
def place_postcode_row(place_postcode_table, temp_db_cursor): def place_postcode_row(place_postcode_table, temp_db_cursor):
""" A factory for rows in the place table. The table is created as a """ A factory for rows in the place_postcode table. The table is created as a
prerequisite to the fixture. prerequisite to the fixture.
""" """
idseq = itertools.count(5001) idseq = itertools.count(5001)
def _insert(osm_type='N', osm_id=None, postcode=None, country=None, def _insert(osm_type='N', osm_id=None, postcode=None, country=None,
centroid=None, geom=None): centroid='POINT(12.0 4.0)', geom=None):
temp_db_cursor.execute("INSERT INTO place_postcode VALUES (%s, %s, %s, %s, %s, %s)", temp_db_cursor.insert_row('place_postcode',
(osm_type, osm_id or next(idseq), osm_type=osm_type, osm_id=osm_id or next(idseq),
postcode, country, postcode=postcode, country_code=country,
_with_srid(centroid, 'POINT(12.0 4.0)'), centroid=_with_srid(centroid),
_with_srid(geom))) geometry=_with_srid(geom))
return _insert return _insert
@pytest.fixture @pytest.fixture
def placex_table(temp_db_with_extensions, temp_db_conn): def placex_table(temp_db_with_extensions, temp_db_conn, load_sql, place_table):
""" Create an empty version of the place table. """ Create an empty version of the placex table.
""" """
return mocks.MockPlacexTable(temp_db_conn) load_sql('tables/placex.sql')
temp_db_conn.execute("CREATE SEQUENCE IF NOT EXISTS seq_place START 1")
@pytest.fixture @pytest.fixture
def osmline_table(temp_db_with_extensions, table_factory): def placex_row(placex_table, temp_db_cursor):
table_factory('location_property_osmline', """ A factory for rows in the placex table. The table is created as a
"""place_id BIGINT, prerequisite to the fixture.
osm_id BIGINT, """
parent_place_id BIGINT, idseq = itertools.count(1001)
geometry_sector INTEGER,
indexed_date TIMESTAMP, def _add(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
startnumber INTEGER, admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
endnumber INTEGER, country=None, housenumber=None, rank_search=30, rank_address=30,
partition SMALLINT, centroid='POINT(10 4)', indexed_status=0, indexed_date=None):
indexed_status SMALLINT, args = {'place_id': pysql.SQL("nextval('seq_place')"),
linegeo GEOMETRY, 'osm_type': osm_type, 'osm_id': osm_id or next(idseq),
interpolationtype TEXT, 'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level,
address HSTORE, 'address': address, 'housenumber': housenumber,
postcode TEXT, 'rank_search': rank_search, 'rank_address': rank_address,
country_code VARCHAR(2)""") 'extratags': extratags,
'centroid': _with_srid(centroid), 'geometry': _with_srid(geom),
'country_code': country,
'indexed_status': indexed_status, 'indexed_date': indexed_date,
'partition': pysql.Literal(0), 'geometry_sector': pysql.Literal(1)}
return temp_db_cursor.insert_row('placex', **args)
return _add
@pytest.fixture @pytest.fixture
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions): def osmline_table(temp_db_with_extensions, load_sql):
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, ))) load_sql('tables/interpolation.sql')
@pytest.fixture
def osmline_row(osmline_table, temp_db_cursor):
idseq = itertools.count(20001)
def _add(osm_id=None, geom='LINESTRING(12.0 11.0, 12.003 11.0)'):
return temp_db_cursor.insert_row(
'location_property_osmline',
place_id=pysql.SQL("nextval('seq_place')"),
osm_id=osm_id or next(idseq),
geometry_sector=pysql.Literal(20),
partition=pysql.Literal(0),
indexed_status=1,
linegeo=_with_srid(geom))
return _add
@pytest.fixture
def postcode_table(temp_db_with_extensions, load_sql):
load_sql('tables/postcodes.sql')
@pytest.fixture
def postcode_row(postcode_table, temp_db_cursor):
def _add(country, postcode, x=34.5, y=-9.33):
geom = _with_srid(f"POINT({x} {y})")
return temp_db_cursor.insert_row(
'location_postcodes',
place_id=pysql.SQL("nextval('seq_place')"),
indexed_status=pysql.Literal(1),
country_code=country, postcode=postcode,
centroid=geom,
rank_search=pysql.Literal(16),
geometry=('ST_Expand(%s::geometry, 0.005)', geom))
return _add
@pytest.fixture
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions, country_row):
for part in range(3):
country_row(partition=part)
cfg = Configuration(None) cfg = Configuration(None)
cfg.set_libdirs(sql=tmp_path) cfg.set_libdirs(sql=tmp_path)
return cfg return cfg

View File

@@ -2,12 +2,13 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Specialised psycopg cursor with shortcut functions useful for testing. Specialised psycopg cursor with shortcut functions useful for testing.
""" """
import psycopg import psycopg
from psycopg import sql as pysql
class CursorForTesting(psycopg.Cursor): class CursorForTesting(psycopg.Cursor):
@@ -52,7 +53,49 @@ class CursorForTesting(psycopg.Cursor):
def table_rows(self, table, where=None): def table_rows(self, table, where=None):
""" Return the number of rows in the given table. """ Return the number of rows in the given table.
""" """
if where is None: sql = pysql.SQL('SELECT count(*) FROM') + pysql.Identifier(table)
return self.scalar('SELECT count(*) FROM ' + table) if where is not None:
sql += pysql.SQL('WHERE') + pysql.SQL(where)
return self.scalar('SELECT count(*) FROM {} WHERE {}'.format(table, where)) return self.scalar(sql)
def insert_row(self, table, **data):
""" Insert a row into the given table.
'data' is a dictionary of column names and associated values.
When the value is a pysql.Literal or pysql.SQL, then the expression
will be inserted as is instead of loading the value. When the
value is a tuple, then the first element will be added as an
SQL expression for the value and the second element is treated
as the actual value to insert. The SQL expression must contain
a %s placeholder in that case.
If data contains a 'place_id' column, then the value of the
place_id column after insert is returned. Otherwise the function
returns nothing.
"""
columns = []
placeholders = []
values = []
for k, v in data.items():
columns.append(pysql.Identifier(k))
if isinstance(v, tuple):
placeholders.append(pysql.SQL(v[0]))
values.append(v[1])
elif isinstance(v, (pysql.Literal, pysql.SQL)):
placeholders.append(v)
else:
placeholders.append(pysql.Placeholder())
values.append(v)
sql = pysql.SQL("INSERT INTO {table} ({columns}) VALUES({values})")\
.format(table=pysql.Identifier(table),
columns=pysql.SQL(',').join(columns),
values=pysql.SQL(',').join(placeholders))
if 'place_id' in data:
sql += pysql.SQL('RETURNING place_id')
self.execute(sql, values)
return self.fetchone()[0] if 'place_id' in data else None

View File

@@ -22,7 +22,8 @@ def loaded_country(def_config):
def env_with_country_config(project_env): def env_with_country_config(project_env):
def _mk_config(cfg): def _mk_config(cfg):
(project_env.project_dir / 'country_settings.yaml').write_text(dedent(cfg)) (project_env.project_dir / 'country_settings.yaml').write_text(
dedent(cfg), encoding='utf-8')
return project_env return project_env
@@ -52,11 +53,10 @@ def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cur
@pytest.mark.parametrize("languages", (None, ['fr', 'en'])) @pytest.mark.parametrize("languages", (None, ['fr', 'en']))
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor, def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
table_factory, tokenizer_mock, languages, loaded_country): country_row, tokenizer_mock, languages, loaded_country):
temp_db_cursor.execute('TRUNCATE country_name')
table_factory('country_name', 'country_code varchar(2), name hstore', country_row(country='us', names={"name": "us1", "name:af": "us2"})
content=(('us', '"name"=>"us1","name:af"=>"us2"'), country_row(country='fr', names={"name": "Fra", "name:en": "Fren"})
('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2 assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2

View File

@@ -22,7 +22,7 @@ def sql_factory(tmp_path):
BEGIN BEGIN
{} {}
END; END;
$$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body)) $$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body), encoding='utf-8')
return 'test.sql' return 'test.sql'
return _mk_sql return _mk_sql
@@ -63,7 +63,7 @@ def test_load_file_with_params(sql_preprocessor, sql_factory, temp_db_conn, temp
async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor): async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor):
(tmp_path / 'test.sql').write_text(""" (tmp_path / 'test.sql').write_text("""
CREATE TABLE foo (a TEXT); CREATE TABLE foo (a TEXT);
CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);") CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);", encoding='utf-8')
await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4) await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4)

View File

@@ -15,7 +15,8 @@ from nominatim_db.errors import UsageError
def test_execute_file_success(dsn, temp_db_cursor, tmp_path): def test_execute_file_success(dsn, temp_db_cursor, tmp_path):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);') tmpfile.write_text(
'CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile) db_utils.execute_file(dsn, tmpfile)
@@ -29,7 +30,7 @@ def test_execute_file_bad_file(dsn, tmp_path):
def test_execute_file_bad_sql(dsn, tmp_path): def test_execute_file_bad_sql(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)') tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
with pytest.raises(UsageError): with pytest.raises(UsageError):
db_utils.execute_file(dsn, tmpfile) db_utils.execute_file(dsn, tmpfile)
@@ -37,14 +38,14 @@ def test_execute_file_bad_sql(dsn, tmp_path):
def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path): def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE STABLE test (id INT)') tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, ignore_errors=True) db_utils.execute_file(dsn, tmpfile, ignore_errors=True)
def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor): def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('INSERT INTO test VALUES(4)') tmpfile.write_text('INSERT INTO test VALUES(4)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)') db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)')
@@ -53,7 +54,7 @@ def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor): def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
tmpfile = tmp_path / 'test.sql' tmpfile = tmp_path / 'test.sql'
tmpfile.write_text('CREATE TABLE test (id INT)') tmpfile.write_text('CREATE TABLE test (id INT)', encoding='utf-8')
db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)') db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')

View File

@@ -1,13 +1,12 @@
# SPDX-License-Identifier: GPL-3.0-or-later
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for running the indexing. Tests for running the indexing.
""" """
import itertools
import pytest import pytest
import pytest_asyncio # noqa import pytest_asyncio # noqa
@@ -15,129 +14,57 @@ from nominatim_db.indexer import indexer
from nominatim_db.tokenizer import factory from nominatim_db.tokenizer import factory
class IndexerTestDB: class TestIndexing:
@pytest.fixture(autouse=True)
def setup(self, temp_db_conn, project_env, tokenizer_mock,
placex_table, postcode_table, osmline_table):
self.conn = temp_db_conn
temp_db_conn.execute("""
CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER AS $$
BEGIN
IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
NEW.indexed_date = now();
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql;
def __init__(self, conn): DROP TYPE IF EXISTS prepare_update_info CASCADE;
self.placex_id = itertools.count(100000) CREATE TYPE prepare_update_info AS (
self.osmline_id = itertools.count(500000) name HSTORE,
self.postcode_id = itertools.count(700000) address HSTORE,
rank_address SMALLINT,
country_code TEXT,
class TEXT,
type TEXT,
linked_place_id BIGINT
);
CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
OUT result prepare_update_info) AS $$
BEGIN
result.address := p.address;
result.name := p.name;
result.class := p.class;
result.type := p.type;
result.country_code := p.country_code;
result.rank_address := p.rank_address;
END; $$ LANGUAGE plpgsql STABLE;
self.conn = conn CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
self.conn.autocimmit = True RETURNS HSTORE AS $$ SELECT in_address $$ LANGUAGE sql STABLE;
with self.conn.cursor() as cur: """)
cur.execute("""CREATE TABLE placex (place_id BIGINT,
name HSTORE,
class TEXT,
type TEXT,
linked_place_id BIGINT,
rank_address SMALLINT,
rank_search SMALLINT,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
partition SMALLINT,
admin_level SMALLINT,
country_code TEXT,
address HSTORE,
token_info JSONB,
geometry_sector INTEGER)""")
cur.execute("""CREATE TABLE location_property_osmline (
place_id BIGINT,
osm_id BIGINT,
address HSTORE,
token_info JSONB,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
geometry_sector INTEGER)""")
cur.execute("""CREATE TABLE location_postcodes (
place_id BIGINT,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
country_code varchar(2),
postcode TEXT)""")
cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
AS $$
BEGIN
IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
NEW.indexed_date = now();
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql;""")
cur.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
cur.execute("""CREATE TYPE prepare_update_info AS (
name HSTORE,
address HSTORE,
rank_address SMALLINT,
country_code TEXT,
class TEXT,
type TEXT,
linked_place_id BIGINT
)""")
cur.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
OUT result prepare_update_info)
AS $$
BEGIN
result.address := p.address;
result.name := p.name;
result.class := p.class;
result.type := p.type;
result.country_code := p.country_code;
result.rank_address := p.rank_address;
END;
$$ LANGUAGE plpgsql STABLE;
""")
cur.execute("""CREATE OR REPLACE FUNCTION
get_interpolation_address(in_address HSTORE, wayid BIGINT)
RETURNS HSTORE AS $$
BEGIN
RETURN in_address;
END;
$$ LANGUAGE plpgsql STABLE;
""")
for table in ('placex', 'location_property_osmline', 'location_postcodes'): for table in ('placex', 'location_property_osmline', 'location_postcodes'):
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0} temp_db_conn.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
FOR EACH ROW EXECUTE PROCEDURE date_update() FOR EACH ROW EXECUTE PROCEDURE date_update()
""".format(table)) """.format(table))
self.tokenizer = factory.create_tokenizer(project_env)
def scalar(self, query): def scalar(self, query):
with self.conn.cursor() as cur: with self.conn.cursor() as cur:
cur.execute(query) cur.execute(query)
return cur.fetchone()[0] return cur.fetchone()[0]
def add_place(self, cls='place', typ='locality',
rank_search=30, rank_address=30, sector=20):
next_id = next(self.placex_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO placex
(place_id, class, type, rank_search, rank_address,
indexed_status, geometry_sector)
VALUES (%s, %s, %s, %s, %s, 1, %s)""",
(next_id, cls, typ, rank_search, rank_address, sector))
return next_id
def add_admin(self, **kwargs):
kwargs['cls'] = 'boundary'
kwargs['typ'] = 'administrative'
return self.add_place(**kwargs)
def add_osmline(self, sector=20):
next_id = next(self.osmline_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO location_property_osmline
(place_id, osm_id, indexed_status, geometry_sector)
VALUES (%s, %s, 1, %s)""",
(next_id, next_id, sector))
return next_id
def add_postcode(self, country, postcode):
next_id = next(self.postcode_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO location_postcodes
(place_id, indexed_status, country_code, postcode)
VALUES (%s, 1, %s, %s)""",
(next_id, country, postcode))
return next_id
def placex_unindexed(self): def placex_unindexed(self):
return self.scalar('SELECT count(*) from placex where indexed_status > 0') return self.scalar('SELECT count(*) from placex where indexed_status > 0')
@@ -145,148 +72,133 @@ class IndexerTestDB:
return self.scalar("""SELECT count(*) from location_property_osmline return self.scalar("""SELECT count(*) from location_property_osmline
WHERE indexed_status > 0""") WHERE indexed_status > 0""")
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_all_by_rank(self, dsn, threads, placex_row, osmline_row):
for rank in range(31):
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
@pytest.fixture assert self.placex_unindexed() == 31
def test_db(temp_db_conn): assert self.osmline_unindexed() == 1
yield IndexerTestDB(temp_db_conn)
idx = indexer.Indexer(dsn, self.tokenizer, threads)
await idx.index_by_rank(0, 30)
@pytest.fixture assert self.placex_unindexed() == 0
def test_tokenizer(tokenizer_mock, project_env): assert self.osmline_unindexed() == 0
return factory.create_tokenizer(project_env)
assert self.scalar("""SELECT count(*) from placex
WHERE indexed_status = 0 and indexed_date is null""") == 0
# ranks come in order of rank address
assert self.scalar("""
SELECT count(*) FROM placex p WHERE rank_address > 0
AND indexed_date >= (SELECT min(indexed_date) FROM placex o
WHERE p.rank_address < o.rank_address)""") == 0
# placex address ranked objects come before interpolations
assert self.scalar(
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM location_property_osmline)""") == 0
# rank 0 comes after all other placex objects
assert self.scalar(
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0
@pytest.mark.parametrize("threads", [1, 15]) @pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_index_all_by_rank(test_db, threads, test_tokenizer): async def test_index_partial_without_30(self, dsn, threads, placex_row, osmline_row):
for rank in range(31): for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank) placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
test_db.add_osmline() osmline_row()
assert test_db.placex_unindexed() == 31 assert self.placex_unindexed() == 31
assert test_db.osmline_unindexed() == 1 assert self.osmline_unindexed() == 1
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) idx = indexer.Indexer(dsn, self.tokenizer, threads)
await idx.index_by_rank(0, 30) await idx.index_by_rank(4, 15)
assert test_db.placex_unindexed() == 0 assert self.placex_unindexed() == 19
assert test_db.osmline_unindexed() == 0 assert self.osmline_unindexed() == 1
assert test_db.scalar("""SELECT count(*) from placex assert self.scalar("""
WHERE indexed_status = 0 and indexed_date is null""") == 0 SELECT count(*) FROM placex
# ranks come in order of rank address WHERE indexed_status = 0 AND not rank_address between 4 and 15""") == 0
assert test_db.scalar("""
SELECT count(*) FROM placex p WHERE rank_address > 0
AND indexed_date >= (SELECT min(indexed_date) FROM placex o
WHERE p.rank_address < o.rank_address)""") == 0
# placex address ranked objects come before interpolations
assert test_db.scalar(
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM location_property_osmline)""") == 0
# rank 0 comes after all other placex objects
assert test_db.scalar(
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_partial_with_30(self, dsn, threads, placex_row, osmline_row):
for rank in range(31):
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
@pytest.mark.parametrize("threads", [1, 15]) assert self.placex_unindexed() == 31
@pytest.mark.asyncio assert self.osmline_unindexed() == 1
async def test_index_partial_without_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert test_db.placex_unindexed() == 31 idx = indexer.Indexer(dsn, self.tokenizer, threads)
assert test_db.osmline_unindexed() == 1 await idx.index_by_rank(28, 30)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', assert self.placex_unindexed() == 28
test_tokenizer, threads) assert self.osmline_unindexed() == 0
await idx.index_by_rank(4, 15)
assert test_db.placex_unindexed() == 19 assert self.scalar("""
assert test_db.osmline_unindexed() == 1 SELECT count(*) FROM placex
WHERE indexed_status = 0 AND rank_address between 0 and 27""") == 0
assert test_db.scalar(""" @pytest.mark.parametrize("threads", [1, 15])
SELECT count(*) FROM placex @pytest.mark.asyncio
WHERE indexed_status = 0 AND not rank_address between 4 and 15""") == 0 async def test_index_boundaries(self, dsn, threads, placex_row, osmline_row):
for rank in range(4, 10):
placex_row(cls='boundary', typ='administrative',
rank_address=rank, rank_search=rank, indexed_status=1)
for rank in range(31):
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
assert self.placex_unindexed() == 37
assert self.osmline_unindexed() == 1
@pytest.mark.parametrize("threads", [1, 15]) idx = indexer.Indexer(dsn, self.tokenizer, threads)
@pytest.mark.asyncio await idx.index_boundaries()
async def test_index_partial_with_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert test_db.placex_unindexed() == 31 assert self.placex_unindexed() == 31
assert test_db.osmline_unindexed() == 1 assert self.osmline_unindexed() == 1
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) assert self.scalar("""
await idx.index_by_rank(28, 30) SELECT count(*) FROM placex
WHERE indexed_status = 0 AND class != 'boundary'""") == 0
assert test_db.placex_unindexed() == 27 @pytest.mark.parametrize("threads", [1, 15])
assert test_db.osmline_unindexed() == 0 @pytest.mark.asyncio
async def test_index_postcodes(self, dsn, threads, postcode_row):
for postcode in range(1000):
postcode_row(country='de', postcode=postcode)
for postcode in range(32000, 33000):
postcode_row(country='us', postcode=postcode)
assert test_db.scalar(""" idx = indexer.Indexer(dsn, self.tokenizer, threads)
SELECT count(*) FROM placex await idx.index_postcodes()
WHERE indexed_status = 0 AND rank_address between 1 and 27""") == 0
assert self.scalar("""SELECT count(*) FROM location_postcodes
WHERE indexed_status != 0""") == 0
@pytest.mark.parametrize("threads", [1, 15]) @pytest.mark.parametrize("analyse", [True, False])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_index_boundaries(test_db, threads, test_tokenizer): async def test_index_full(self, dsn, analyse, placex_row, osmline_row, postcode_row):
for rank in range(4, 10): for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank) placex_row(cls='boundary', typ='administrative',
for rank in range(31): rank_address=rank, rank_search=rank, indexed_status=1)
test_db.add_place(rank_address=rank, rank_search=rank) for rank in range(31):
test_db.add_osmline() placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
for postcode in range(1000):
postcode_row(country='de', postcode=postcode)
assert test_db.placex_unindexed() == 37 idx = indexer.Indexer(dsn, self.tokenizer, 4)
assert test_db.osmline_unindexed() == 1 await idx.index_full(analyse=analyse)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) assert self.placex_unindexed() == 0
await idx.index_boundaries(0, 30) assert self.osmline_unindexed() == 0
assert self.scalar("""SELECT count(*) FROM location_postcodes
assert test_db.placex_unindexed() == 31 WHERE indexed_status != 0""") == 0
assert test_db.osmline_unindexed() == 1
assert test_db.scalar("""
SELECT count(*) FROM placex
WHERE indexed_status = 0 AND class != 'boundary'""") == 0
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_postcodes(test_db, threads, test_tokenizer):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
for postcode in range(32000, 33000):
test_db.add_postcode('us', postcode)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
await idx.index_postcodes()
assert test_db.scalar("""SELECT count(*) FROM location_postcodes
WHERE indexed_status != 0""") == 0
@pytest.mark.parametrize("analyse", [True, False])
@pytest.mark.asyncio
async def test_index_full(test_db, analyse, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
for postcode in range(1000):
test_db.add_postcode('de', postcode)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
await idx.index_full(analyse=analyse)
assert test_db.placex_unindexed() == 0
assert test_db.osmline_unindexed() == 0
assert test_db.scalar("""SELECT count(*) FROM location_postcodes
WHERE indexed_status != 0""") == 0

View File

@@ -1,85 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom mocks for testing.
"""
import itertools
from nominatim_db.db import properties
class MockPlacexTable:
""" A placex table for testing.
"""
def __init__(self, conn):
self.idseq = itertools.count(10000)
self.conn = conn
with conn.cursor() as cur:
cur.execute("""CREATE TABLE placex (
place_id BIGINT,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
indexed_date TIMESTAMP,
geometry_sector INTEGER,
rank_address SMALLINT,
rank_search SMALLINT,
partition SMALLINT,
indexed_status SMALLINT,
osm_id int8,
osm_type char(1),
class text,
type text,
name hstore,
admin_level smallint,
address hstore,
extratags hstore,
token_info jsonb,
geometry Geometry(Geometry,4326),
wikipedia TEXT,
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326))""")
cur.execute("CREATE SEQUENCE IF NOT EXISTS seq_place")
conn.commit()
def add(self, osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
country=None, housenumber=None, rank_search=30, centroid=None):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO placex (place_id, osm_type, osm_id, class,
type, name, admin_level, address,
housenumber, rank_search,
extratags, centroid, geometry, country_code)
VALUES(nextval('seq_place'), %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s)
RETURNING place_id""",
(osm_type, osm_id or next(self.idseq), cls, typ, names,
admin_level, address, housenumber, rank_search,
extratags, centroid, 'SRID=4326;' + geom,
country))
place_id = cur.fetchone()[0]
self.conn.commit()
return place_id
class MockPropertyTable:
""" A property table for testing.
"""
def __init__(self, conn):
self.conn = conn
def set(self, name, value):
""" Set a property in the table to the given value.
"""
properties.set_property(self.conn, name, value)
def get(self, name):
""" Set a property in the table to the given value.
"""
return properties.get_property(self.conn, name)

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for the sanitizer that normalizes housenumbers. Tests for the sanitizer that normalizes housenumbers.
@@ -67,3 +67,25 @@ def test_convert_to_name_unconverted(def_config, number):
assert 'housenumber' not in set(p.kind for p in names) assert 'housenumber' not in set(p.kind for p in names)
assert ('housenumber', number) in set((p.kind, p.name) for p in address) assert ('housenumber', number) in set((p.kind, p.name) for p in address)
@pytest.mark.parametrize('hnr,itype,out', [
('1-5', 'all', (1, 2, 3, 4, 5)),
('1-5', 'odd', (1, 3, 5)),
('1-5', 'even', (2, 4)),
('6-9', '1', (6, 7, 8, 9)),
('6-9', '2', (6, 8)),
('6-9', '3', (6, 9)),
('6-9', '5', (6,)),
('6-9', 'odd', (7, 9)),
('6-9', 'even', (6, 8)),
('6-22', 'even', (6, 8, 10, 12, 14, 16, 18, 20, 22))
])
def test_convert_interpolations(sanitize, hnr, itype, out):
assert set(sanitize(housenumber=hnr, interpolation=itype)) \
== {('housenumber', str(i)) for i in out}
@pytest.mark.parametrize('hnr', ('23', '23-', '3z-f', '1-10', '5-1', '1-4-5'))
def test_ignore_interpolation_with_bad_housenumber(sanitize, hnr):
assert sanitize(housenumber=hnr, interpolation='all') == [('housenumber', hnr)]

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for ICU tokenizer. Tests for ICU tokenizer.
@@ -15,7 +15,6 @@ import pytest
from nominatim_db.tokenizer import icu_tokenizer from nominatim_db.tokenizer import icu_tokenizer
import nominatim_db.tokenizer.icu_rule_loader import nominatim_db.tokenizer.icu_rule_loader
from nominatim_db.db import properties from nominatim_db.db import properties
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
from nominatim_db.data.place_info import PlaceInfo from nominatim_db.data.place_info import PlaceInfo
from mock_icu_word_table import MockIcuWordTable from mock_icu_word_table import MockIcuWordTable
@@ -31,7 +30,7 @@ def test_config(project_env, tmp_path):
sqldir = tmp_path / 'sql' sqldir = tmp_path / 'sql'
sqldir.mkdir() sqldir.mkdir()
(sqldir / 'tokenizer').mkdir() (sqldir / 'tokenizer').mkdir()
(sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'") (sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'", encoding='utf-8')
project_env.lib_dir.sql = sqldir project_env.lib_dir.sql = sqldir
@@ -58,7 +57,7 @@ def db_prop(temp_db_conn):
def analyzer(tokenizer_factory, test_config, monkeypatch, def analyzer(tokenizer_factory, test_config, monkeypatch,
temp_db_with_extensions, tmp_path): temp_db_with_extensions, tmp_path):
sql = tmp_path / 'sql' / 'tokenizer' / 'icu_tokenizer.sql' sql = tmp_path / 'sql' / 'tokenizer' / 'icu_tokenizer.sql'
sql.write_text("SELECT 'a';") sql.write_text("SELECT 'a';", encoding='utf-8')
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();') monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
tok = tokenizer_factory() tok = tokenizer_factory()
@@ -80,7 +79,8 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
if with_postcode: if with_postcode:
cfgstr['token-analysis'].append({'id': '@postcode', cfgstr['token-analysis'].append({'id': '@postcode',
'analyzer': 'postcodes'}) 'analyzer': 'postcodes'})
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr)) (test_config.project_dir / 'icu_tokenizer.yaml').write_text(
yaml.dump(cfgstr), encoding='utf-8')
tok.loader = nominatim_db.tokenizer.icu_rule_loader.ICURuleLoader(test_config) tok.loader = nominatim_db.tokenizer.icu_rule_loader.ICURuleLoader(test_config)
return tok.name_analyzer() return tok.name_analyzer()
@@ -89,13 +89,9 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
@pytest.fixture @pytest.fixture
def sql_functions(temp_db_conn, def_config, src_dir): def sql_functions(load_sql):
orig_sql = def_config.lib_dir.sql load_sql('functions/utils.sql')
def_config.lib_dir.sql = src_dir / 'lib-sql' load_sql('tokenizer/icu_tokenizer.sql')
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
sqlproc.run_sql_file(temp_db_conn, 'tokenizer/icu_tokenizer.sql')
def_config.lib_dir.sql = orig_sql
@pytest.fixture @pytest.fixture
@@ -190,7 +186,7 @@ def test_update_sql_functions(db_prop, temp_db_cursor,
table_factory('test', 'txt TEXT') table_factory('test', 'txt TEXT')
func_file = test_config.lib_dir.sql / 'tokenizer' / 'icu_tokenizer.sql' func_file = test_config.lib_dir.sql / 'tokenizer' / 'icu_tokenizer.sql'
func_file.write_text("""INSERT INTO test VALUES (1133)""") func_file.write_text("""INSERT INTO test VALUES (1133)""", encoding='utf-8')
tok.update_sql_functions(test_config) tok.update_sql_functions(test_config)
@@ -652,22 +648,21 @@ class TestUpdateWordTokens:
self.tok.update_word_tokens() self.tok.update_word_tokens()
assert word_table.count_housenumbers() == 1 assert word_table.count_housenumbers() == 1
def test_keep_housenumbers_from_placex_table(self, add_housenumber, word_table, def test_keep_housenumbers_from_placex_table(self, add_housenumber, word_table, placex_row):
placex_table):
add_housenumber(9999, '5432a') add_housenumber(9999, '5432a')
add_housenumber(9990, '34z') add_housenumber(9990, '34z')
placex_table.add(housenumber='34z') placex_row(housenumber='34z')
placex_table.add(housenumber='25432a') placex_row(housenumber='25432a')
assert word_table.count_housenumbers() == 2 assert word_table.count_housenumbers() == 2
self.tok.update_word_tokens() self.tok.update_word_tokens()
assert word_table.count_housenumbers() == 1 assert word_table.count_housenumbers() == 1
def test_keep_housenumbers_from_placex_table_hnr_list(self, add_housenumber, def test_keep_housenumbers_from_placex_table_hnr_list(self, add_housenumber,
word_table, placex_table): word_table, placex_row):
add_housenumber(9991, '9 b') add_housenumber(9991, '9 b')
add_housenumber(9990, '34z') add_housenumber(9990, '34z')
placex_table.add(housenumber='9 a;9 b;9 c') placex_row(housenumber='9 a;9 b;9 c')
assert word_table.count_housenumbers() == 2 assert word_table.count_housenumbers() == 2
self.tok.update_word_tokens() self.tok.update_word_tokens()

View File

@@ -27,7 +27,8 @@ class TestIcuRuleLoader:
self.project_env = project_env self.project_env = project_env
def write_config(self, content): def write_config(self, content):
(self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(dedent(content)) (self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(
dedent(content), encoding='utf-8')
def config_rules(self, *variants): def config_rules(self, *variants):
content = dedent("""\ content = dedent("""\
@@ -119,7 +120,7 @@ class TestIcuRuleLoader:
variants: variants:
""") """)
transpath = self.project_env.project_dir / ('transliteration.yaml') transpath = self.project_env.project_dir / ('transliteration.yaml')
transpath.write_text('- "x > y"') transpath.write_text('- "x > y"', encoding='utf-8')
loader = ICURuleLoader(self.project_env) loader = ICURuleLoader(self.project_env)
rules = loader.get_transliteration_rules() rules = loader.get_transliteration_rules()

View File

@@ -21,7 +21,7 @@ if [ "$*" = "--version" ]; then
else else
echo "$@" echo "$@"
fi fi
""") """, encoding='utf-8')
osm2pgsql_exec.chmod(0o777) osm2pgsql_exec.chmod(0o777)
return dict(osm2pgsql=str(osm2pgsql_exec), return dict(osm2pgsql=str(osm2pgsql_exec),

View File

@@ -2,17 +2,17 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for maintenance and analysis functions. Tests for maintenance and analysis functions.
""" """
import pytest import pytest
import datetime as dt
from nominatim_db.errors import UsageError from nominatim_db.errors import UsageError
from nominatim_db.tools import admin from nominatim_db.tools import admin
from nominatim_db.tokenizer import factory from nominatim_db.tokenizer import factory
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
@@ -61,15 +61,14 @@ def test_analyse_indexing_unknown_osmid(project_env):
admin.analyse_indexing(project_env, osm_id='W12345674') admin.analyse_indexing(project_env, osm_id='W12345674')
def test_analyse_indexing_with_place_id(project_env, temp_db_cursor): def test_analyse_indexing_with_place_id(project_env, placex_row):
temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)") place_id = placex_row()
admin.analyse_indexing(project_env, place_id=12345) admin.analyse_indexing(project_env, place_id=place_id)
def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor): def test_analyse_indexing_with_osm_id(project_env, placex_row):
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id) placex_row(osm_type='N', osm_id=10000)
VALUES(9988, 'N', 10000)""")
admin.analyse_indexing(project_env, osm_id='N10000') admin.analyse_indexing(project_env, osm_id='N10000')
@@ -77,8 +76,8 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
class TestAdminCleanDeleted: class TestAdminCleanDeleted:
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def setup_polygon_delete(self, project_env, table_factory, place_table, def setup_polygon_delete(self, project_env, table_factory, place_table, placex_row,
osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir): osmline_table, temp_db_cursor, load_sql):
""" Set up place_force_delete function and related tables """ Set up place_force_delete function and related tables
""" """
self.project_env = project_env self.project_env = project_env
@@ -91,12 +90,15 @@ class TestAdminCleanDeleted:
((100, 'N', 'boundary', 'administrative'), ((100, 'N', 'boundary', 'administrative'),
(145, 'N', 'boundary', 'administrative'), (145, 'N', 'boundary', 'administrative'),
(175, 'R', 'landcover', 'grass'))) (175, 'R', 'landcover', 'grass')))
temp_db_cursor.execute("""
INSERT INTO placex (place_id, osm_id, osm_type, class, type, now = dt.datetime.now()
indexed_date, indexed_status) placex_row(osm_type='N', osm_id=100, cls='boundary', typ='administrative',
VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1), indexed_status=1, indexed_date=now - dt.timedelta(days=30))
(2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1), placex_row(osm_type='N', osm_id=145, cls='boundary', typ='administrative',
(3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""") indexed_status=1, indexed_date=now - dt.timedelta(days=90))
placex_row(osm_type='R', osm_id=175, cls='landcover', typ='grass',
indexed_status=1, indexed_date=now - dt.timedelta(days=90))
# set up tables and triggers for utils function # set up tables and triggers for utils function
table_factory('place_to_be_deleted', table_factory('place_to_be_deleted',
"""osm_id BIGINT, """osm_id BIGINT,
@@ -104,7 +106,6 @@ class TestAdminCleanDeleted:
class TEXT NOT NULL, class TEXT NOT NULL,
type TEXT NOT NULL, type TEXT NOT NULL,
deferred BOOLEAN""") deferred BOOLEAN""")
table_factory('country_name', 'partition INT')
table_factory('import_polygon_error', """osm_id BIGINT, table_factory('import_polygon_error', """osm_id BIGINT,
osm_type CHAR(1), osm_type CHAR(1),
class TEXT NOT NULL, class TEXT NOT NULL,
@@ -115,11 +116,7 @@ class TestAdminCleanDeleted:
$$ LANGUAGE plpgsql;""") $$ LANGUAGE plpgsql;""")
temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
FOR EACH ROW EXECUTE PROCEDURE place_delete();""") FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
orig_sql = def_config.lib_dir.sql load_sql('functions/utils.sql')
def_config.lib_dir.sql = src_dir / 'lib-sql'
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
def_config.lib_dir.sql = orig_sql
def test_admin_clean_deleted_no_records(self): def test_admin_clean_deleted_no_records(self):
admin.clean_deleted_relations(self.project_env, age='1 year') admin.clean_deleted_relations(self.project_env, age='1 year')

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for database integrity checks. Tests for database integrity checks.
@@ -46,8 +46,7 @@ def test_check_database_version_bad(property_table, temp_db_conn, def_config):
assert chkdb.check_database_version(temp_db_conn, def_config) == chkdb.CheckState.FATAL assert chkdb.check_database_version(temp_db_conn, def_config) == chkdb.CheckState.FATAL
def test_check_placex_table_good(table_factory, temp_db_conn, def_config): def test_check_placex_table_good(placex_table, temp_db_conn, def_config):
table_factory('placex')
assert chkdb.check_placex_table(temp_db_conn, def_config) == chkdb.CheckState.OK assert chkdb.check_placex_table(temp_db_conn, def_config) == chkdb.CheckState.OK
@@ -55,13 +54,13 @@ def test_check_placex_table_bad(temp_db_conn, def_config):
assert chkdb.check_placex_table(temp_db_conn, def_config) == chkdb.CheckState.FATAL assert chkdb.check_placex_table(temp_db_conn, def_config) == chkdb.CheckState.FATAL
def test_check_placex_table_size_good(table_factory, temp_db_conn, def_config): def test_check_placex_table_size_good(placex_row, temp_db_conn, def_config):
table_factory('placex', content=((1, ), (2, ))) for _ in range(2):
placex_row()
assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.OK assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.OK
def test_check_placex_table_size_bad(table_factory, temp_db_conn, def_config): def test_check_placex_table_size_bad(placex_table, temp_db_conn, def_config):
table_factory('placex')
assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.FATAL assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.FATAL
@@ -84,15 +83,22 @@ def test_check_tokenizer(temp_db_conn, def_config, monkeypatch,
assert chkdb.check_tokenizer(temp_db_conn, def_config) == state assert chkdb.check_tokenizer(temp_db_conn, def_config) == state
def test_check_indexing_good(table_factory, temp_db_conn, def_config): def test_check_indexing_good(placex_row, temp_db_conn, def_config):
table_factory('placex', 'place_id int, indexed_status smallint', for _ in range(2):
content=((1, 0), (2, 0))) placex_row(indexed_status=0)
assert chkdb.check_indexing(temp_db_conn, def_config) == chkdb.CheckState.OK assert chkdb.check_indexing(temp_db_conn, def_config) == chkdb.CheckState.OK
def test_check_indexing_bad(table_factory, temp_db_conn, def_config): def test_check_indexing_bad(placex_row, temp_db_conn, def_config):
table_factory('placex', 'place_id int, indexed_status smallint', for status in (0, 2):
content=((1, 0), (2, 2))) placex_row(indexed_status=status)
assert chkdb.check_indexing(temp_db_conn, def_config) == chkdb.CheckState.FAIL
def test_check_indexing_bad_frozen(placex_row, temp_db_conn, def_config):
for status in (0, 2):
placex_row(indexed_status=status)
temp_db_conn.execute('DROP TABLE place')
assert chkdb.check_indexing(temp_db_conn, def_config) == chkdb.CheckState.WARN assert chkdb.check_indexing(temp_db_conn, def_config) == chkdb.CheckState.WARN

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for functions to import a new database. Tests for functions to import a new database.
@@ -25,12 +25,14 @@ class TestDatabaseSetup:
def setup_nonexistant_db(self): def setup_nonexistant_db(self):
with psycopg.connect(dbname='postgres', autocommit=True) as conn: with psycopg.connect(dbname='postgres', autocommit=True) as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}') cur.execute(pysql.SQL('DROP DATABASE IF EXISTS ')
+ pysql.Identifier(self.DBNAME))
yield True yield True
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}') cur.execute(pysql.SQL('DROP DATABASE IF EXISTS ')
+ pysql.Identifier(self.DBNAME))
@pytest.fixture @pytest.fixture
def cursor(self): def cursor(self):
@@ -62,7 +64,7 @@ class TestDatabaseSetup:
def test_create_db_missing_ro_user(self): def test_create_db_missing_ro_user(self):
with pytest.raises(UsageError, match='Missing read-only user.'): with pytest.raises(UsageError, match='Missing read-only user.'):
database_import.setup_database_skeleton(f'dbname={self.DBNAME}', database_import.setup_database_skeleton(f'dbname={self.DBNAME}',
rouser='sdfwkjkjgdugu2;jgsafkljas;') rouser='sdfwkjkjgdugu2jgsafkljas')
def test_setup_extensions_old_postgis(self, monkeypatch): def test_setup_extensions_old_postgis(self, monkeypatch):
monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50)) monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50))
@@ -76,8 +78,8 @@ def test_setup_skeleton_already_exists(temp_db):
database_import.setup_database_skeleton(f'dbname={temp_db}') database_import.setup_database_skeleton(f'dbname={temp_db}')
def test_import_osm_data_simple(table_factory, osm2pgsql_options, capfd): def test_import_osm_data_simple(place_row, osm2pgsql_options, capfd):
table_factory('place', content=((1, ), )) place_row()
database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options) database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
captured = capfd.readouterr() captured = capfd.readouterr()
@@ -90,13 +92,13 @@ def test_import_osm_data_simple(table_factory, osm2pgsql_options, capfd):
assert 'file.pbf' in captured.out assert 'file.pbf' in captured.out
def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, capfd): def test_import_osm_data_multifile(place_row, tmp_path, osm2pgsql_options, capfd):
table_factory('place', content=((1, ), )) place_row()
osm2pgsql_options['osm2pgsql_cache'] = 0 osm2pgsql_options['osm2pgsql_cache'] = 0
files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm'] files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
for f in files: for f in files:
f.write_text('test') f.write_text('test', encoding='utf-8')
database_import.import_osm_data(files, osm2pgsql_options) database_import.import_osm_data(files, osm2pgsql_options)
captured = capfd.readouterr() captured = capfd.readouterr()
@@ -105,26 +107,23 @@ def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, c
assert 'file2.osm' in captured.out assert 'file2.osm' in captured.out
def test_import_osm_data_simple_no_data(table_factory, osm2pgsql_options): def test_import_osm_data_simple_no_data(place_row, osm2pgsql_options):
table_factory('place')
with pytest.raises(UsageError, match='No data imported'): with pytest.raises(UsageError, match='No data imported'):
database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options) database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options)
def test_import_osm_data_simple_ignore_no_data(table_factory, osm2pgsql_options): def test_import_osm_data_simple_ignore_no_data(place_table, osm2pgsql_options):
table_factory('place')
database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options, database_import.import_osm_data(Path('file.pbf'), osm2pgsql_options,
ignore_errors=True) ignore_errors=True)
def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql_options): def test_import_osm_data_drop(place_row, table_factory, temp_db_cursor,
table_factory('place', content=((1, ), )) tmp_path, osm2pgsql_options):
place_row()
table_factory('planet_osm_nodes') table_factory('planet_osm_nodes')
flatfile = tmp_path / 'flatfile' flatfile = tmp_path / 'flatfile'
flatfile.write_text('touch') flatfile.write_text('touch', encoding='utf-8')
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve()) osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
@@ -134,8 +133,8 @@ def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql
assert not temp_db_cursor.table_exists('planet_osm_nodes') assert not temp_db_cursor.table_exists('planet_osm_nodes')
def test_import_osm_data_default_cache(table_factory, osm2pgsql_options, capfd): def test_import_osm_data_default_cache(place_row, osm2pgsql_options, capfd):
table_factory('place', content=((1, ), )) place_row()
osm2pgsql_options['osm2pgsql_cache'] = 0 osm2pgsql_options['osm2pgsql_cache'] = 0
@@ -168,14 +167,41 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory, w
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_load_data(dsn, place_row, placex_table, osmline_table, async def test_load_data(dsn, place_row, placex_table, osmline_table,
temp_db_cursor, threads): temp_db_cursor, threads):
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute(pysql.SQL("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""").format(pysql.Identifier(func)))
for oid in range(100, 130): for oid in range(100, 130):
place_row(osm_id=oid) place_row(osm_id=oid)
place_row(osm_type='W', osm_id=342, cls='place', typ='houses', place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
geom='SRID=4326;LINESTRING(0 0, 10 10)') geom='LINESTRING(0 0, 10 10)')
temp_db_cursor.execute("""
CREATE OR REPLACE FUNCTION placex_insert() RETURNS TRIGGER AS $$
BEGIN
NEW.place_id := nextval('seq_place');
NEW.indexed_status := 1;
NEW.centroid := ST_Centroid(NEW.geometry);
NEW.partition := 0;
NEW.geometry_sector := 2424;
NEW.rank_address := 30;
NEW.rank_search := 30;
RETURN NEW;
END; $$ LANGUAGE plpgsql STABLE PARALLEL SAFE;
CREATE OR REPLACE FUNCTION osmline_insert() RETURNS TRIGGER AS $$
BEGIN
NEW.place_id := nextval('seq_place');
IF NEW.indexed_status IS NULL THEN
NEW.indexed_status := 1;
NEW.partition := 0;
NEW.geometry_sector := 2424;
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql STABLE PARALLEL SAFE;
CREATE TRIGGER placex_before_insert BEFORE INSERT ON placex
FOR EACH ROW EXECUTE PROCEDURE placex_insert();
CREATE TRIGGER osmline_before_insert BEFORE INSERT ON location_property_osmline
FOR EACH ROW EXECUTE PROCEDURE osmline_insert();
""")
await database_import.load_data(dsn, threads) await database_import.load_data(dsn, threads)
@@ -186,50 +212,53 @@ async def test_load_data(dsn, place_row, placex_table, osmline_table,
class TestSetupSQL: class TestSetupSQL:
@pytest.fixture(autouse=True) @pytest.fixture(autouse=True)
def init_env(self, temp_db, tmp_path, def_config, sql_preprocessor_cfg): def osm2ppsql_skel(self, def_config, temp_db_with_extensions, place_row,
def_config.lib_dir.sql = tmp_path / 'sql' country_table, table_factory, temp_db_conn):
def_config.lib_dir.sql.mkdir()
self.config = def_config self.config = def_config
place_row()
table_factory('osm2pgsql_properties', 'property TEXT, value TEXT',
(('db_format', 2),))
def write_sql(self, fname, content): table_factory('planet_osm_rels', 'id BIGINT, members JSONB, tags JSONB')
(self.config.lib_dir.sql / fname).write_text(content) temp_db_conn.execute("""
CREATE OR REPLACE FUNCTION planet_osm_member_ids(jsonb, character)
RETURNS bigint[] AS $$
SELECT array_agg((el->>'ref')::int8)
FROM jsonb_array_elements($1) AS el WHERE el->>'type' = $2
$$ LANGUAGE sql IMMUTABLE;
""")
@pytest.mark.parametrize("reverse", [True, False]) @pytest.mark.parametrize("reverse", [True, False])
def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse): def test_create_tables(self, table_factory, temp_db_conn, temp_db_cursor, reverse):
self.write_sql('tables.sql', table_factory('country_osm_grid')
"""CREATE FUNCTION test() RETURNS bool
AS $$ SELECT {{db.reverse_only}} $$ LANGUAGE SQL""")
database_import.create_tables(temp_db_conn, self.config, reverse) database_import.create_tables(temp_db_conn, self.config, reverse)
temp_db_cursor.scalar('SELECT test()') == reverse assert temp_db_cursor.table_exists('placex')
assert not reverse == temp_db_cursor.table_exists('search_name')
def test_create_table_triggers(self, temp_db_conn, temp_db_cursor): def test_create_table_triggers(self, temp_db_conn, placex_table, osmline_table,
self.write_sql('table-triggers.sql', postcode_table, load_sql):
"""CREATE FUNCTION test() RETURNS TEXT load_sql('functions.sql')
AS $$ SELECT 'a'::text $$ LANGUAGE SQL""")
database_import.create_table_triggers(temp_db_conn, self.config) database_import.create_table_triggers(temp_db_conn, self.config)
temp_db_cursor.scalar('SELECT test()') == 'a' def test_create_partition_tables(self, country_row, temp_db_conn, temp_db_cursor, load_sql):
for i in range(3):
def test_create_partition_tables(self, temp_db_conn, temp_db_cursor): country_row(partition=i)
self.write_sql('partition-tables.src.sql', load_sql('tables/location_area.sql')
"""CREATE FUNCTION test() RETURNS TEXT
AS $$ SELECT 'b'::text $$ LANGUAGE SQL""")
database_import.create_partition_tables(temp_db_conn, self.config) database_import.create_partition_tables(temp_db_conn, self.config)
temp_db_cursor.scalar('SELECT test()') == 'b' for i in range(3):
assert temp_db_cursor.table_exists(f"location_area_large_{i}")
assert temp_db_cursor.table_exists(f"search_name_{i}")
@pytest.mark.parametrize("drop", [True, False]) @pytest.mark.parametrize("drop", [True, False])
@pytest.mark.asyncio @pytest.mark.asyncio
async def test_create_search_indices(self, temp_db_conn, temp_db_cursor, drop): async def test_create_search_indices(self, temp_db_conn, temp_db_cursor, drop, load_sql):
self.write_sql('indices.sql', load_sql('tables.sql', 'functions/ranking.sql')
"""CREATE FUNCTION test() RETURNS bool
AS $$ SELECT {{drop}} $$ LANGUAGE SQL""")
await database_import.create_search_indices(temp_db_conn, self.config, drop) await database_import.create_search_indices(temp_db_conn, self.config, drop)
temp_db_cursor.scalar('SELECT test()') == drop assert temp_db_cursor.index_exists('placex', 'idx_placex_geometry')
assert not drop == temp_db_cursor.index_exists('placex', 'idx_placex_geometry_buildings')

View File

@@ -54,7 +54,7 @@ def test_drop_flatnode_file_file_already_gone(tmp_path):
def test_drop_flatnode_file_delete(tmp_path): def test_drop_flatnode_file_delete(tmp_path):
flatfile = tmp_path / 'flatnode.store' flatfile = tmp_path / 'flatnode.store'
flatfile.write_text('Some content') flatfile.write_text('Some content', encoding="utf-8")
freeze.drop_flatnode_file(flatfile) freeze.drop_flatnode_file(flatfile)

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for import special phrases methods Tests for import special phrases methods
@@ -30,7 +30,7 @@ def xml_wiki_content(src_dir):
return the content of the static xml test file. return the content of the static xml test file.
""" """
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text() return xml_test_content.read_text(encoding='utf-8')
@pytest.fixture @pytest.fixture
@@ -125,9 +125,8 @@ def test_grant_access_to_web_user(temp_db_conn, temp_db_cursor, table_factory,
phrase_class, phrase_type) phrase_class, phrase_type)
def test_create_place_classtype_table_and_indexes( def test_create_place_classtype_table_and_indexes(temp_db_cursor, def_config, placex_row,
temp_db_cursor, def_config, placex_table, sp_importer, temp_db_conn, monkeypatch):
sp_importer, temp_db_conn, monkeypatch):
""" """
Test that _create_place_classtype_table_and_indexes() Test that _create_place_classtype_table_and_indexes()
create the right place_classtype tables and place_id indexes create the right place_classtype tables and place_id indexes
@@ -136,7 +135,7 @@ def test_create_place_classtype_table_and_indexes(
""" """
pairs = set([('class1', 'type1'), ('class2', 'type2')]) pairs = set([('class1', 'type1'), ('class2', 'type2')])
for pair in pairs: for pair in pairs:
placex_table.add(cls=pair[0], typ=pair[1]) # adding to db placex_row(cls=pair[0], typ=pair[1]) # adding to db
sp_importer._create_classtype_table_and_indexes(pairs) sp_importer._create_classtype_table_and_indexes(pairs)
temp_db_conn.commit() temp_db_conn.commit()
@@ -178,7 +177,7 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
@pytest.mark.parametrize("should_replace", [(True), (False)]) @pytest.mark.parametrize("should_replace", [(True), (False)])
def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer, def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer,
placex_table, table_factory, tokenizer_mock, placex_row, table_factory, tokenizer_mock,
xml_wiki_content, should_replace): xml_wiki_content, should_replace):
""" """
Check that the main import_phrases() method is well executed. Check that the main import_phrases() method is well executed.
@@ -199,8 +198,8 @@ def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer,
type_test = 'zip_line' type_test = 'zip_line'
tokenizer = tokenizer_mock() tokenizer = tokenizer_mock()
placex_table.add(cls=class_test, typ=type_test) # in db for special phrase filtering placex_row(cls=class_test, typ=type_test) # in db for special phrase filtering
placex_table.add(cls='amenity', typ='animal_shelter') # in db for special phrase filtering placex_row(cls='amenity', typ='animal_shelter') # in db for special phrase filtering
sp_importer.import_phrases(tokenizer, should_replace) sp_importer.import_phrases(tokenizer, should_replace)
assert len(tokenizer.analyser_cache['special_phrases']) == 19 assert len(tokenizer.analyser_cache['special_phrases']) == 19
@@ -257,7 +256,7 @@ def check_placeid_and_centroid_indexes(temp_db_cursor, phrase_class, phrase_type
@pytest.mark.parametrize("should_replace", [(True), (False)]) @pytest.mark.parametrize("should_replace", [(True), (False)])
def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, def_config, def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, def_config,
sp_importer, placex_table, tokenizer_mock, sp_importer, placex_row, tokenizer_mock,
xml_wiki_content, should_replace): xml_wiki_content, should_replace):
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content', monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
@@ -266,7 +265,7 @@ def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, de
class_test = 'aerialway' class_test = 'aerialway'
type_test = 'zip_line' type_test = 'zip_line'
placex_table.add(cls=class_test, typ=type_test) # add to the database to make valid placex_row(cls=class_test, typ=type_test) # add to the database to make valid
tokenizer = tokenizer_mock() tokenizer = tokenizer_mock()
sp_importer.import_phrases(tokenizer, should_replace) sp_importer.import_phrases(tokenizer, should_replace)
@@ -276,11 +275,11 @@ def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, de
assert check_grant_access(temp_db_cursor, def_config.DATABASE_WEBUSER, class_test, type_test) assert check_grant_access(temp_db_cursor, def_config.DATABASE_WEBUSER, class_test, type_test)
def test_get_classtype_pairs_directly(placex_table, temp_db_conn, sp_importer): def test_get_classtype_pairs_directly(placex_row, temp_db_conn, sp_importer):
for _ in range(101): for _ in range(101):
placex_table.add(cls='highway', typ='residential') placex_row(cls='highway', typ='residential')
for _ in range(99): for _ in range(99):
placex_table.add(cls='amenity', typ='toilet') placex_row(cls='amenity', typ='toilet')
temp_db_conn.commit() temp_db_conn.commit()

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for functions to maintain the artificial postcode table. Tests for functions to maintain the artificial postcode table.
@@ -11,88 +11,27 @@ import subprocess
import pytest import pytest
from psycopg.rows import tuple_row
from nominatim_db.tools import postcodes from nominatim_db.tools import postcodes
from nominatim_db.data import country_info from nominatim_db.data import country_info
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
import dummy_tokenizer import dummy_tokenizer
class MockPostcodeTable:
""" A location_postcodes table for testing.
"""
def __init__(self, conn, config):
self.conn = conn
SQLPreprocessor(conn, config).run_sql_file(conn, 'functions/postcode_triggers.sql')
with conn.cursor() as cur:
cur.execute("""CREATE TABLE location_postcodes (
place_id BIGINT,
osm_id BIGINT,
parent_place_id BIGINT,
rank_search SMALLINT,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
country_code varchar(2),
postcode TEXT,
geometry GEOMETRY(Geometry, 4326),
centroid GEOMETRY(Point, 4326))""")
cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION get_country_code(place geometry)
RETURNS TEXT AS $$ BEGIN
RETURN null;
END; $$ LANGUAGE plpgsql;
""")
cur.execute("""CREATE OR REPLACE FUNCTION expand_by_meters(geom GEOMETRY, meters FLOAT)
RETURNS GEOMETRY AS $$
SELECT ST_Envelope(ST_Buffer(geom::geography, meters, 1)::geometry)
$$ LANGUAGE sql;""")
conn.commit()
def add(self, country, postcode, x, y):
with self.conn.cursor() as cur:
cur.execute(
"""INSERT INTO location_postcodes
(place_id, indexed_status, country_code, postcode, centroid, geometry)
VALUES (nextval('seq_place'), 1, %(cc)s, %(pc)s,
ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326),
ST_Expand(ST_SetSRID(ST_MakePoint(%(x)s, %(y)s), 4326), 0.005))""",
{'cc': country, 'pc': postcode, 'x': x, 'y': y})
self.conn.commit()
@property
def row_set(self):
with self.conn.cursor() as cur:
cur.execute("""SELECT osm_id, country_code, postcode,
ST_X(centroid), ST_Y(centroid)
FROM location_postcodes""")
return set((tuple(row) for row in cur))
@pytest.fixture @pytest.fixture
def postcode_table(def_config, temp_db_conn, placex_table, table_factory): def insert_implicit_postcode(placex_row, place_postcode_row):
country_info.setup_country_config(def_config)
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
return MockPostcodeTable(temp_db_conn, def_config)
@pytest.fixture
def insert_implicit_postcode(placex_table, place_postcode_row):
""" Insert data into the placex and place table """ Insert data into the placex and place table
which can then be used to compute one postcode. which can then be used to compute one postcode.
""" """
def _insert_implicit_postcode(osm_id, country, geometry, postcode, in_placex=False): def _insert_implicit_postcode(osm_id, country, geometry, postcode, in_placex=False):
if in_placex: if in_placex:
placex_table.add(osm_id=osm_id, country=country, geom=geometry, placex_row(osm_id=osm_id, country=country, geom=geometry,
centroid=f'SRID=4326;{geometry}', centroid=geometry,
address={'postcode': postcode}) address={'postcode': postcode})
else: else:
place_postcode_row(osm_id=osm_id, centroid=geometry, place_postcode_row(osm_id=osm_id, centroid=geometry,
country=country, postcode=postcode) country=country, postcode=postcode)
return _insert_implicit_postcode return _insert_implicit_postcode
@@ -105,7 +44,6 @@ def insert_postcode_area(place_postcode_row):
place_postcode_row(osm_type='R', osm_id=osm_id, postcode=postcode, country=country, place_postcode_row(osm_type='R', osm_id=osm_id, postcode=postcode, country=country,
centroid=f"POINT({x} {y})", centroid=f"POINT({x} {y})",
geom=f"POLYGON(({x1} {y1}, {x1} {y2}, {x2} {y2}, {x2} {y1}, {x1} {y1}))") geom=f"POLYGON(({x1} {y1}, {x1} {y2}, {x2} {y2}, {x2} {y1}, {x1} {y1}))")
return _do return _do
@@ -125,185 +63,198 @@ def postcode_update(dsn, temp_db_conn):
BEFORE INSERT ON location_postcodes BEFORE INSERT ON location_postcodes
FOR EACH ROW EXECUTE PROCEDURE postcodes_insert()""") FOR EACH ROW EXECUTE PROCEDURE postcodes_insert()""")
temp_db_conn.commit() temp_db_conn.commit()
postcodes.update_postcodes(dsn, data_path, tokenizer) postcodes.update_postcodes(dsn, data_path, tokenizer)
return _do return _do
def test_postcodes_empty(postcode_update, postcode_table, place_postcode_table): class TestPostcodes:
postcode_update() @pytest.fixture(autouse=True)
def setup(self, def_config, postcode_table, placex_table, place_postcode_table,
load_sql, temp_db_conn):
self.conn = temp_db_conn
country_info.setup_country_config(def_config)
load_sql('functions/postcode_triggers.sql')
assert not postcode_table.row_set temp_db_conn.execute("""
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
RETURNS TEXT AS $$
SELECT postcode
$$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION get_country_code(place geometry)
RETURNS TEXT AS $$
SELECT NULL
$$ LANGUAGE sql;
@pytest.mark.parametrize('in_placex', [True, False]) CREATE OR REPLACE FUNCTION expand_by_meters(geom GEOMETRY, meters FLOAT)
def test_postcodes_add_new_point(postcode_update, postcode_table, RETURNS GEOMETRY AS $$
insert_implicit_postcode, in_placex): SELECT ST_Envelope(ST_Buffer(geom::geography, meters, 1)::geometry)
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', '9486', in_placex) $$ LANGUAGE sql;
postcode_table.add('yy', '9486', 99, 34) """)
postcode_update() @property
def row_set(self):
with self.conn.cursor(row_factory=tuple_row) as cur:
cur.execute("""SELECT osm_id, country_code, postcode,
ST_X(centroid), ST_Y(centroid)
FROM location_postcodes""")
return {r for r in cur}
assert postcode_table.row_set == {(None, 'xx', '9486', 10, 12), } def test_postcodes_empty(self, postcode_update):
postcode_update()
assert not self.row_set
def test_postcodes_add_new_area(postcode_update, insert_postcode_area, postcode_table): @pytest.mark.parametrize('in_placex', [True, False])
insert_postcode_area(345, 'de', '10445', 23.5, 46.2) def test_postcodes_add_new_point(self, postcode_update, postcode_row,
insert_implicit_postcode, in_placex):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', '9486', in_placex)
postcode_row('yy', '9486', 99, 34)
postcode_update() postcode_update()
assert postcode_table.row_set == {(345, 'de', '10445', 23.5, 46.2)} assert self.row_set == {(None, 'xx', '9486', 10, 12), }
def test_postcodes_add_new_area(self, postcode_update, insert_postcode_area):
insert_postcode_area(345, 'de', '10445', 23.5, 46.2)
@pytest.mark.parametrize('in_placex', [True, False]) postcode_update()
def test_postcodes_add_area_and_point(postcode_update, insert_postcode_area,
insert_implicit_postcode, postcode_table, in_placex):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', '10445', in_placex)
insert_postcode_area(345, 'xx', '10445', 23.5, 46.2)
postcode_update() assert self.row_set == {(345, 'de', '10445', 23.5, 46.2)}
assert postcode_table.row_set == {(345, 'xx', '10445', 23.5, 46.2)} @pytest.mark.parametrize('in_placex', [True, False])
def test_postcodes_add_area_and_point(self, postcode_update, insert_postcode_area,
insert_implicit_postcode, in_placex):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', '10445', in_placex)
insert_postcode_area(345, 'xx', '10445', 23.5, 46.2)
postcode_update()
@pytest.mark.parametrize('in_placex', [True, False]) assert self.row_set == {(345, 'xx', '10445', 23.5, 46.2)}
def test_postcodes_add_point_within_area(postcode_update, insert_postcode_area,
insert_implicit_postcode, postcode_table, in_placex):
insert_implicit_postcode(1, 'xx', 'POINT(23.5 46.2)', '10446', in_placex)
insert_postcode_area(345, 'xx', '10445', 23.5, 46.2)
postcode_update() @pytest.mark.parametrize('in_placex', [True, False])
def test_postcodes_add_point_within_area(self, postcode_update, insert_postcode_area,
insert_implicit_postcode, in_placex):
insert_implicit_postcode(1, 'xx', 'POINT(23.5 46.2)', '10446', in_placex)
insert_postcode_area(345, 'xx', '10445', 23.5, 46.2)
assert postcode_table.row_set == {(345, 'xx', '10445', 23.5, 46.2)} postcode_update()
assert self.row_set == {(345, 'xx', '10445', 23.5, 46.2)}
@pytest.mark.parametrize('coords', [(99, 34), (10, 34), (99, 12), @pytest.mark.parametrize('coords', [(99, 34), (10, 34), (99, 12),
(9, 34), (9, 11), (23, 11)]) (9, 34), (9, 11), (23, 11)])
def test_postcodes_replace_coordinates(postcode_update, postcode_table, tmp_path, def test_postcodes_replace_coordinates(self, postcode_update, postcode_row, tmp_path,
insert_implicit_postcode, coords): insert_implicit_postcode, coords):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
postcode_table.add('xx', 'AB 4511', *coords) postcode_row('xx', 'AB 4511', *coords)
postcode_update(tmp_path) postcode_update(tmp_path)
assert postcode_table.row_set == {(None, 'xx', 'AB 4511', 10, 12)} assert self.row_set == {(None, 'xx', 'AB 4511', 10, 12)}
def test_postcodes_replace_coordinates_close(self, postcode_update, postcode_row,
insert_implicit_postcode):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
postcode_row('xx', 'AB 4511', 10, 11.99999999)
def test_postcodes_replace_coordinates_close(postcode_update, postcode_table, postcode_update()
insert_implicit_postcode):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
postcode_table.add('xx', 'AB 4511', 10, 11.99999999)
postcode_update() assert self.row_set == {(None, 'xx', 'AB 4511', 10, 11.99999999)}
assert postcode_table.row_set == {(None, 'xx', 'AB 4511', 10, 11.99999999)} def test_postcodes_remove_point(self, postcode_update, postcode_row,
insert_implicit_postcode):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
postcode_row('xx', 'badname', 10, 12)
postcode_update()
def test_postcodes_remove_point(postcode_update, postcode_table, assert self.row_set == {(None, 'xx', 'AB 4511', 10, 12)}
insert_implicit_postcode):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
postcode_table.add('xx', 'badname', 10, 12)
postcode_update() def test_postcodes_ignore_empty_country(self, postcode_update, insert_implicit_postcode):
insert_implicit_postcode(1, None, 'POINT(10 12)', 'AB 4511')
postcode_update()
assert not self.row_set
assert postcode_table.row_set == {(None, 'xx', 'AB 4511', 10, 12)} def test_postcodes_remove_all(self, postcode_update, postcode_row, place_postcode_table):
postcode_row('ch', '5613', 10, 12)
postcode_update()
assert not self.row_set
def test_postcodes_ignore_empty_country(postcode_update, postcode_table, def test_postcodes_multi_country(self, postcode_update,
insert_implicit_postcode):
insert_implicit_postcode(1, None, 'POINT(10 12)', 'AB 4511')
postcode_update()
assert not postcode_table.row_set
def test_postcodes_remove_all(postcode_update, postcode_table, place_postcode_table):
postcode_table.add('ch', '5613', 10, 12)
postcode_update()
assert not postcode_table.row_set
def test_postcodes_multi_country(postcode_update, postcode_table,
insert_implicit_postcode):
insert_implicit_postcode(1, 'de', 'POINT(10 12)', '54451')
insert_implicit_postcode(2, 'cc', 'POINT(100 56)', 'DD23 T')
insert_implicit_postcode(3, 'de', 'POINT(10.3 11.0)', '54452')
insert_implicit_postcode(4, 'cc', 'POINT(10.3 11.0)', '54452')
postcode_update()
assert postcode_table.row_set == {(None, 'de', '54451', 10, 12),
(None, 'de', '54452', 10.3, 11.0),
(None, 'cc', '54452', 10.3, 11.0),
(None, 'cc', 'DD23 T', 100, 56)}
@pytest.mark.parametrize("gzipped", [True, False])
def test_postcodes_extern(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode, gzipped):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
if gzipped:
subprocess.run(['gzip', str(extfile)])
assert not extfile.is_file()
postcode_update(tmp_path)
assert postcode_table.row_set == {(None, 'xx', 'AB 4511', 10, 12),
(None, 'xx', 'CD 4511', -10, -5)}
def test_postcodes_extern_bad_column(postcode_update, postcode_table, tmp_path,
insert_implicit_postcode): insert_implicit_postcode):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') insert_implicit_postcode(1, 'de', 'POINT(10 12)', '54451')
insert_implicit_postcode(2, 'cc', 'POINT(100 56)', 'DD23 T')
insert_implicit_postcode(3, 'de', 'POINT(10.3 11.0)', '54452')
insert_implicit_postcode(4, 'cc', 'POINT(10.3 11.0)', '54452')
extfile = tmp_path / 'xx_postcodes.csv' postcode_update()
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
postcode_update(tmp_path) assert self.row_set == {(None, 'de', '54451', 10, 12),
(None, 'de', '54452', 10.3, 11.0),
(None, 'cc', '54452', 10.3, 11.0),
(None, 'cc', 'DD23 T', 100, 56)}
assert postcode_table.row_set == {(None, 'xx', 'AB 4511', 10, 12)} @pytest.mark.parametrize("gzipped", [True, False])
def test_postcodes_extern(self, postcode_update, tmp_path,
insert_implicit_postcode, gzipped):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
def test_postcodes_extern_bad_number(postcode_update, insert_implicit_postcode, if gzipped:
postcode_table, tmp_path): subprocess.run(['gzip', str(extfile)])
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511') assert not extfile.is_file()
extfile = tmp_path / 'xx_postcodes.csv' postcode_update(tmp_path)
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
postcode_update(tmp_path) assert self.row_set == {(None, 'xx', 'AB 4511', 10, 12),
(None, 'xx', 'CD 4511', -10, -5)}
assert postcode_table.row_set == {(None, 'xx', 'AB 4511', 10, 12), def test_postcodes_extern_bad_column(self, postcode_update, tmp_path,
(None, 'xx', 'CD 4511', -10, -5)} insert_implicit_postcode):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
postcode_update(tmp_path)
assert self.row_set == {(None, 'xx', 'AB 4511', 10, 12)}
def test_postcodes_extern_bad_number(self, postcode_update, insert_implicit_postcode,
tmp_path):
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
extfile = tmp_path / 'xx_postcodes.csv'
extfile.write_text(
"postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0", encoding='utf-8')
postcode_update(tmp_path)
assert self.row_set == {(None, 'xx', 'AB 4511', 10, 12),
(None, 'xx', 'CD 4511', -10, -5)}
def test_no_placex_entry(self, postcode_update, temp_db_cursor, place_postcode_row):
# Rewrite the get_country_code function to verify its execution.
temp_db_cursor.execute("""
CREATE OR REPLACE FUNCTION get_country_code(place geometry) RETURNS TEXT AS $$
SELECT 'yy' $$ LANGUAGE sql""")
place_postcode_row(centroid='POINT(10 12)', postcode='AB 4511')
postcode_update()
assert self.row_set == {(None, 'yy', 'AB 4511', 10, 12)}
def test_discard_badly_formatted_postcodes(self, postcode_update, place_postcode_row):
place_postcode_row(centroid='POINT(10 12)', country='fr', postcode='AB 4511')
postcode_update()
assert not self.row_set
def test_can_compute(dsn, table_factory): def test_can_compute(dsn, table_factory):
assert not postcodes.can_compute(dsn) assert not postcodes.can_compute(dsn)
table_factory('place_postcode') table_factory('place_postcode')
assert postcodes.can_compute(dsn) assert postcodes.can_compute(dsn)
def test_no_placex_entry(postcode_update, temp_db_cursor, place_postcode_row, postcode_table):
# Rewrite the get_country_code function to verify its execution.
temp_db_cursor.execute("""
CREATE OR REPLACE FUNCTION get_country_code(place geometry)
RETURNS TEXT AS $$ BEGIN
RETURN 'yy';
END; $$ LANGUAGE plpgsql;
""")
place_postcode_row(centroid='POINT(10 12)', postcode='AB 4511')
postcode_update()
assert postcode_table.row_set == {(None, 'yy', 'AB 4511', 10, 12)}
def test_discard_badly_formatted_postcodes(postcode_update, place_postcode_row, postcode_table):
place_postcode_row(centroid='POINT(10 12)', country='fr', postcode='AB 4511')
postcode_update()
assert not postcode_table.row_set

View File

@@ -42,8 +42,8 @@ def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, r
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R')) @pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp_db_cursor): def test_invalidate_osm_object_simple(placex_row, osm_type, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type=osm_type, osm_id=57283) placex_row(osm_type=osm_type, osm_id=57283)
refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn, recursive=False) refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn, recursive=False)
temp_db_conn.commit() temp_db_conn.commit()
@@ -53,8 +53,8 @@ def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp
(osm_type, 57283)) (osm_type, 57283))
def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, temp_db_cursor): def test_invalidate_osm_object_nonexisting_simple(placex_row, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type='W', osm_id=57283) placex_row(osm_type='W', osm_id=57283)
refresh.invalidate_osm_object('N', 57283, temp_db_conn, recursive=False) refresh.invalidate_osm_object('N', 57283, temp_db_conn, recursive=False)
temp_db_conn.commit() temp_db_conn.commit()
@@ -64,8 +64,8 @@ def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, te
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R')) @pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
def test_invalidate_osm_object_recursive(placex_table, osm_type, temp_db_conn, temp_db_cursor): def test_invalidate_osm_object_recursive(placex_row, osm_type, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type=osm_type, osm_id=57283) placex_row(osm_type=osm_type, osm_id=57283)
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_force_update(placeid BIGINT) temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_force_update(placeid BIGINT)
RETURNS BOOLEAN AS $$ RETURNS BOOLEAN AS $$

View File

@@ -22,7 +22,7 @@ def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor): def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
test_file = project_env.project_dir / 'address-levels.json' test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":{"place":{"sea":2}}}]') test_file.write_text('[{"tags":{"place":{"sea":2}}}]', encoding='utf-8')
load_address_levels_from_config(temp_db_conn, project_env) load_address_levels_from_config(temp_db_conn, project_env)
@@ -31,7 +31,7 @@ def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
def test_load_ranks_from_broken_file(project_env, temp_db_conn): def test_load_ranks_from_broken_file(project_env, temp_db_conn):
test_file = project_env.project_dir / 'address-levels.json' test_file = project_env.project_dir / 'address-levels.json'
test_file.write_text('[{"tags":"place":{"sea":2}}}]') test_file.write_text('[{"tags":"place":{"sea":2}}}]', encoding='utf-8')
with pytest.raises(json.decoder.JSONDecodeError): with pytest.raises(json.decoder.JSONDecodeError):
load_address_levels_from_config(temp_db_conn, project_env) load_address_levels_from_config(temp_db_conn, project_env)

View File

@@ -21,7 +21,7 @@ class TestCreateFunctions:
def write_functions(self, content): def write_functions(self, content):
sqlfile = self.config.lib_dir.sql / 'functions.sql' sqlfile = self.config.lib_dir.sql / 'functions.sql'
sqlfile.write_text(content) sqlfile.write_text(content, encoding='utf-8')
def test_create_functions(self, temp_db_cursor): def test_create_functions(self, temp_db_cursor):
self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Tests for correctly assigning wikipedia pages to places. Tests for correctly assigning wikipedia pages to places.
@@ -20,7 +20,7 @@ from nominatim_db.tools.refresh import (import_wikipedia_articles,
@pytest.fixture @pytest.fixture
def wiki_csv(tmp_path, sql_preprocessor): def wiki_csv(tmp_path, sql_preprocessor):
def _import(data): def _import(data):
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd: with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt', encoding='utf-8') as fd:
writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title', writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title',
'importance', 'wikidata_id'], 'importance', 'wikidata_id'],
delimiter='\t', quotechar='|') delimiter='\t', quotechar='|')
@@ -38,7 +38,7 @@ def wiki_csv(tmp_path, sql_preprocessor):
{'wikipedia': 'en:Test'}, {'wikipedia': 'en:Test'},
{'wikidata': 'Q123'}]) {'wikidata': 'Q123'}])
def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory, def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
def_config, wiki_csv, placex_table, extra): def_config, wiki_csv, placex_row, extra):
import_wikipedia_articles(dsn, wiki_csv([('en', 'Test', 0.3, 'Q123')])) import_wikipedia_articles(dsn, wiki_csv([('en', 'Test', 0.3, 'Q123')]))
create_functions(temp_db_conn, def_config) create_functions(temp_db_conn, def_config)
@@ -46,7 +46,7 @@ def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
'SELECT language, title, importance, wikidata FROM wikimedia_importance') 'SELECT language, title, importance, wikidata FROM wikimedia_importance')
assert content == set([('en', 'Test', 0.3, 'Q123')]) assert content == set([('en', 'Test', 0.3, 'Q123')])
place_id = placex_table.add(osm_id=12, extratags=extra) place_id = placex_row(osm_id=12, extratags=extra)
table_factory('search_name', table_factory('search_name',
'place_id BIGINT, importance FLOAT', 'place_id BIGINT, importance FLOAT',
[(place_id, 0.2)]) [(place_id, 0.2)])
@@ -61,11 +61,11 @@ def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
def test_wikipedia_no_match(dsn, temp_db_conn, temp_db_cursor, def_config, wiki_csv, def test_wikipedia_no_match(dsn, temp_db_conn, temp_db_cursor, def_config, wiki_csv,
placex_table, table_factory): placex_row, table_factory):
import_wikipedia_articles(dsn, wiki_csv([('de', 'Test', 0.3, 'Q123')])) import_wikipedia_articles(dsn, wiki_csv([('de', 'Test', 0.3, 'Q123')]))
create_functions(temp_db_conn, def_config) create_functions(temp_db_conn, def_config)
place_id = placex_table.add(osm_id=12, extratags={'wikipedia': 'en:Test'}, rank_search=10) place_id = placex_row(osm_id=12, extratags={'wikipedia': 'en:Test'}, rank_search=10)
table_factory('search_name', table_factory('search_name',
'place_id BIGINT, importance FLOAT', 'place_id BIGINT, importance FLOAT',
[(place_id, 0.2)]) [(place_id, 0.2)])

View File

@@ -1,69 +1,70 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
from nominatim_db.tools.special_phrases.sp_importer import SPImporter from nominatim_db.tools.special_phrases.sp_importer import SPImporter
# Testing Database Class Pair Retrival using Conftest.py and placex # Testing Database Class Pair Retrival using Conftest.py and placex
def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn): def test_get_classtype_pair_data(placex_row, def_config, temp_db_conn):
for _ in range(100): for _ in range(100):
placex_table.add(cls='highway', typ='motorway') # edge case 100 placex_row(cls='highway', typ='motorway') # edge case 100
for _ in range(99): for _ in range(99):
placex_table.add(cls='amenity', typ='prison') # edge case 99 placex_row(cls='amenity', typ='prison') # edge case 99
for _ in range(150): for _ in range(150):
placex_table.add(cls='tourism', typ='hotel') placex_row(cls='tourism', typ='hotel')
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None) importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs(min=100) result = importer.get_classtype_pairs(min=100)
expected = { assert result == {
("highway", "motorway"), ("highway", "motorway"),
("tourism", "hotel") ("tourism", "hotel")
} }
assert result == expected, f"Expected {expected}, got {result}"
def test_get_classtype_pair_data_more(placex_row, def_config, temp_db_conn):
def test_get_classtype_pair_data_more(placex_table, def_config, temp_db_conn):
for _ in range(99): for _ in range(99):
placex_table.add(cls='emergency', typ='firehydrant') # edge case 99, not included placex_row(cls='emergency', typ='firehydrant') # edge case 99, not included
for _ in range(199): for _ in range(199):
placex_table.add(cls='amenity', typ='prison') placex_row(cls='amenity', typ='prison')
for _ in range(3478): for _ in range(3478):
placex_table.add(cls='tourism', typ='hotel') placex_row(cls='tourism', typ='hotel')
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None) importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs(min=100) result = importer.get_classtype_pairs(min=100)
expected = { assert result == {
("amenity", "prison"), ("amenity", "prison"),
("tourism", "hotel") ("tourism", "hotel")
} }
assert result == expected, f"Expected {expected}, got {result}"
def test_get_classtype_pair_data_default(placex_row, def_config, temp_db_conn):
def test_get_classtype_pair_data_default(placex_table, def_config, temp_db_conn):
for _ in range(1): for _ in range(1):
placex_table.add(cls='emergency', typ='firehydrant') placex_row(cls='emergency', typ='firehydrant')
for _ in range(199): for _ in range(199):
placex_table.add(cls='amenity', typ='prison') placex_row(cls='amenity', typ='prison')
for _ in range(3478): for _ in range(3478):
placex_table.add(cls='tourism', typ='hotel') placex_row(cls='tourism', typ='hotel')
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None) importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs() result = importer.get_classtype_pairs()
expected = { assert result == {
("amenity", "prison"), ("amenity", "prison"),
("tourism", "hotel"), ("tourism", "hotel"),
("emergency", "firehydrant") ("emergency", "firehydrant")
} }
assert result == expected, f"Expected {expected}, got {result}"

View File

@@ -21,7 +21,7 @@ def sp_wiki_loader(src_dir, monkeypatch, def_config):
def _mock_wiki_content(lang): def _mock_wiki_content(lang):
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt' xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
return xml_test_content.read_text() return xml_test_content.read_text(encoding='utf-8')
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content', monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
_mock_wiki_content) _mock_wiki_content)

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Test for tiger data function Test for tiger data function
@@ -13,57 +13,10 @@ from textwrap import dedent
import pytest import pytest
import pytest_asyncio # noqa: F401 import pytest_asyncio # noqa: F401
from nominatim_db.db.connection import execute_scalar from nominatim_db.tools import tiger_data
from nominatim_db.tools import tiger_data, freeze
from nominatim_db.errors import UsageError from nominatim_db.errors import UsageError
class MockTigerTable:
def __init__(self, conn):
self.conn = conn
with conn.cursor() as cur:
cur.execute("""CREATE TABLE tiger (linegeo GEOMETRY,
start INTEGER,
stop INTEGER,
interpol TEXT,
token_info JSONB,
postcode TEXT)""")
# We need this table to determine if the database is frozen or not
cur.execute("CREATE TABLE place (number INTEGER)")
def count(self):
return execute_scalar(self.conn, "SELECT count(*) FROM tiger")
def row(self):
with self.conn.cursor() as cur:
cur.execute("SELECT * FROM tiger LIMIT 1")
return cur.fetchone()
@pytest.fixture
def tiger_table(def_config, temp_db_conn, sql_preprocessor,
temp_db_with_extensions, tmp_path):
def_config.lib_dir.sql = tmp_path / 'sql'
def_config.lib_dir.sql.mkdir()
(def_config.lib_dir.sql / 'tiger_import_start.sql').write_text(
"""CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, start INTEGER,
stop INTEGER, interpol TEXT,
token_info JSONB, postcode TEXT)
RETURNS INTEGER AS $$
INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
RETURNING 1
$$ LANGUAGE SQL;""")
(def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
"""DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
in_endnumber INTEGER, interpolationtype TEXT,
token_info JSONB, in_postcode TEXT);""")
return MockTigerTable(temp_db_conn)
@pytest.fixture @pytest.fixture
def csv_factory(tmp_path): def csv_factory(tmp_path):
def _mk_file(fname, hnr_from=1, hnr_to=9, interpol='odd', street='Main St', def _mk_file(fname, hnr_from=1, hnr_to=9, interpol='odd', street='Main St',
@@ -73,97 +26,115 @@ def csv_factory(tmp_path):
from;to;interpolation;street;city;state;postcode;geometry from;to;interpolation;street;city;state;postcode;geometry
{};{};{};{};{};{};{};{} {};{};{};{};{};{};{};{}
""".format(hnr_from, hnr_to, interpol, street, city, state, """.format(hnr_from, hnr_to, interpol, street, city, state,
postcode, geometry))) postcode, geometry)), encoding='utf-8')
return _mk_file return _mk_file
@pytest.mark.parametrize("threads", (1, 5)) class TestTiger:
@pytest.mark.asyncio
async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock, threads):
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, threads, tokenizer_mock())
assert tiger_table.count() == 6213 @pytest.fixture(autouse=True)
def setup(self, temp_db_conn, placex_row, load_sql):
load_sql('tables/search_name.sql', create_reverse_only=False)
load_sql('tables/tiger.sql')
# fake parent roads
for x in range(-870, -863):
for y in range(323, 328):
placex_row(rank_search=26, rank_address=26,
geom=f"LINESTRING({x/10 - 0.1} {y/10}, {x/10 + 0.1} {y/10})")
@pytest.mark.asyncio temp_db_conn.execute("""
async def test_add_tiger_data_database_frozen(def_config, temp_db_conn, tiger_table, tokenizer_mock, CREATE OR REPLACE FUNCTION get_partition(cc VARCHAR(10)) RETURNS INTEGER AS $$
tmp_path): SELECT 0;
freeze.drop_update_tables(temp_db_conn) $$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION token_matches_street(i JSONB, s INT[]) RETURNS BOOLEAN AS $$
SELECT false
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
""")
with pytest.raises(UsageError) as excinfo: @pytest.mark.parametrize("threads", (1, 5))
@pytest.mark.asyncio
async def test_add_tiger_data_database_frozen(self, def_config, src_dir, temp_db_cursor,
tokenizer_mock, threads):
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, threads, tokenizer_mock())
assert temp_db_cursor.table_rows('location_property_tiger') == 6209
@pytest.mark.asyncio
async def test_add_tiger_data_reverse_only(self, def_config, src_dir, temp_db_cursor,
tokenizer_mock):
temp_db_cursor.execute("DROP TABLE search_name")
with pytest.raises(UsageError,
match="Cannot perform tiger import: required tables are missing. "
"See https://github.com/osm-search/Nominatim/issues/2463 for details."):
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, 1, tokenizer_mock())
assert temp_db_cursor.table_rows('location_property_tiger') == 0
@pytest.mark.asyncio
async def test_add_tiger_data_no_files(self, def_config, temp_db_cursor, tokenizer_mock,
tmp_path):
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
assert "database frozen" in str(excinfo.value) assert temp_db_cursor.table_rows('location_property_tiger') == 0
assert tiger_table.count() == 0 @pytest.mark.asyncio
async def test_add_tiger_data_bad_file(self, def_config, temp_db_cursor, tokenizer_mock,
tmp_path):
sqlfile = tmp_path / '1010.csv'
sqlfile.write_text('Random text', encoding='utf-8')
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
@pytest.mark.asyncio assert temp_db_cursor.table_rows('location_property_tiger') == 0
async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
tmp_path):
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
assert tiger_table.count() == 0 @pytest.mark.asyncio
async def test_add_tiger_data_hnr_nan(self, def_config, temp_db_cursor, tokenizer_mock,
csv_factory, tmp_path):
csv_factory('file1', hnr_to=99)
csv_factory('file2', hnr_from='L12')
csv_factory('file3', hnr_to='12.4')
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
@pytest.mark.asyncio rows = temp_db_cursor.row_set("""
async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock, SELECT startnumber, endnumber FROM location_property_tiger""")
tmp_path):
sqlfile = tmp_path / '1010.csv'
sqlfile.write_text("""Random text""")
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) assert rows == {(1, 99)}
assert tiger_table.count() == 0 @pytest.mark.parametrize("threads", (1, 5))
@pytest.mark.asyncio
async def test_add_tiger_data_tarfile(self, def_config, temp_db_cursor, tokenizer_mock,
tmp_path, src_dir, threads):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
tar.close()
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
tokenizer_mock())
@pytest.mark.asyncio assert temp_db_cursor.table_rows('location_property_tiger') == 6209
async def test_add_tiger_data_hnr_nan(def_config, tiger_table, tokenizer_mock,
csv_factory, tmp_path):
csv_factory('file1', hnr_from=99)
csv_factory('file2', hnr_from='L12')
csv_factory('file3', hnr_to='12.4')
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock()) @pytest.mark.asyncio
async def test_add_tiger_data_bad_tarfile(self, def_config, tokenizer_mock, tmp_path):
tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""", encoding='utf-8')
assert tiger_table.count() == 1 with pytest.raises(UsageError):
assert tiger_table.row().start == 99 await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
@pytest.mark.asyncio
async def test_add_tiger_data_empty_tarfile(self, def_config, temp_db_cursor, tokenizer_mock,
tmp_path):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(__file__)
tar.close()
@pytest.mark.parametrize("threads", (1, 5)) await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
@pytest.mark.asyncio tokenizer_mock())
async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path, src_dir, threads):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
tar.close()
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads, assert temp_db_cursor.table_rows('location_property_tiger') == 0
tokenizer_mock())
assert tiger_table.count() == 6213
@pytest.mark.asyncio
async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""")
with pytest.raises(UsageError):
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
@pytest.mark.asyncio
async def test_add_tiger_data_empty_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(__file__)
tar.close()
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
tokenizer_mock())
assert tiger_table.count() == 0