mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
Compare commits
52 Commits
1cf5464d3a
...
master
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2ddb19c0b0 | ||
|
|
3f14f89bdf | ||
|
|
8ed7a3875a | ||
|
|
70b9140f13 | ||
|
|
3285948130 | ||
|
|
9d0732a941 | ||
|
|
5314e6c881 | ||
|
|
2750d66470 | ||
|
|
0d423ad7a7 | ||
|
|
dd332caa4d | ||
|
|
d691cfc35d | ||
|
|
d274a5aecc | ||
|
|
35a023d133 | ||
|
|
79682a94ce | ||
|
|
aa42dc8a93 | ||
|
|
29fcd0b763 | ||
|
|
2237ce7124 | ||
|
|
58295e0643 | ||
|
|
fed64cda5a | ||
|
|
b995803c66 | ||
|
|
986d303c95 | ||
|
|
310d6e3c92 | ||
|
|
7a3ea55f3d | ||
|
|
d10d70944d | ||
|
|
73590baf15 | ||
|
|
e17d0cb5cf | ||
|
|
7a62c7d812 | ||
|
|
615804b1b3 | ||
|
|
79bbdfd55c | ||
|
|
509f59b193 | ||
|
|
f84b279540 | ||
|
|
e62811cf97 | ||
|
|
cd2f6e458b | ||
|
|
fa2a789e27 | ||
|
|
fc49a77e70 | ||
|
|
28baa34bdc | ||
|
|
151a5b64a8 | ||
|
|
6fee784c9f | ||
|
|
3db7c6d804 | ||
|
|
b2f868d2fc | ||
|
|
ae7301921a | ||
|
|
8188689765 | ||
|
|
135453e463 | ||
|
|
cc9c8963f3 | ||
|
|
c882718355 | ||
|
|
3f02a4e33b | ||
|
|
45972811e3 | ||
|
|
e021f558bf | ||
|
|
5e2ce10fe0 | ||
|
|
58cae70596 | ||
|
|
bf0ee6685b | ||
|
|
ff1f1b06d9 |
@@ -42,6 +42,15 @@ description or in documentation need to
|
||||
sufficient. You need to show that the code actually solves the problem
|
||||
the PR claims to solve.
|
||||
|
||||
## Getting Started with Development
|
||||
|
||||
Please see the development section of the Nominatim documentation for
|
||||
|
||||
* [an architecture overview](https://nominatim.org/release-docs/develop/develop/overview/)
|
||||
and backgrounds on some of the algorithms
|
||||
* [how to set up a development environment](https://nominatim.org/release-docs/develop/develop/Development-Environment/)
|
||||
* and background on [how tests are organised](https://nominatim.org/release-docs/develop/develop/Testing/)
|
||||
|
||||
|
||||
## Coding style
|
||||
|
||||
|
||||
@@ -10,14 +10,14 @@ Nominatim. Please refer to the documentation of
|
||||
[Nginx](https://nginx.org/en/docs/) for background information on how
|
||||
to configure it.
|
||||
|
||||
!!! Note
|
||||
Throughout this page, we assume your Nominatim project directory is
|
||||
located in `/srv/nominatim-project`. If you have put it somewhere else,
|
||||
you need to adjust the commands and configuration accordingly.
|
||||
|
||||
|
||||
### Installing the required packages
|
||||
|
||||
!!! warning
|
||||
ASGI support in gunicorn requires at least version 25.0. If you need
|
||||
to work with an older version of gunicorn, please refer to
|
||||
[older Nominatim deployment documentation](https://nominatim.org/release-docs/5.2/admin/Deployment-Python/)
|
||||
to learn how to run gunicorn with uvicorn.
|
||||
|
||||
The Nominatim frontend is best run from its own virtual environment. If
|
||||
you have already created one for the database backend during the
|
||||
[installation](Installation.md#building-nominatim), you can use that. Otherwise
|
||||
@@ -37,23 +37,27 @@ cd Nominatim
|
||||
```
|
||||
|
||||
The recommended way to deploy a Python ASGI application is to run
|
||||
the ASGI runner [uvicorn](https://www.uvicorn.org/)
|
||||
together with [gunicorn](https://gunicorn.org/) HTTP server. We use
|
||||
the [gunicorn](https://gunicorn.org/) HTTP server. We use
|
||||
Falcon here as the web framework.
|
||||
|
||||
Add the necessary packages to your virtual environment:
|
||||
|
||||
``` sh
|
||||
/srv/nominatim-venv/bin/pip install falcon uvicorn gunicorn
|
||||
/srv/nominatim-venv/bin/pip install falcon gunicorn
|
||||
```
|
||||
|
||||
### Setting up Nominatim as a systemd job
|
||||
|
||||
!!! Note
|
||||
These instructions assume your Nominatim project directory is
|
||||
located in `/srv/nominatim-project`. If you have put it somewhere else,
|
||||
you need to adjust the commands and configuration accordingly.
|
||||
|
||||
Next you need to set up the service that runs the Nominatim frontend. This is
|
||||
easiest done with a systemd job.
|
||||
|
||||
First you need to tell systemd to create a socket file to be used by
|
||||
hunicorn. Create the following file `/etc/systemd/system/nominatim.socket`:
|
||||
gunicorn. Create the following file `/etc/systemd/system/nominatim.socket`:
|
||||
|
||||
``` systemd
|
||||
[Unit]
|
||||
@@ -81,10 +85,8 @@ Type=simple
|
||||
User=www-data
|
||||
Group=www-data
|
||||
WorkingDirectory=/srv/nominatim-project
|
||||
ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker "nominatim_api.server.falcon.server:run_wsgi()"
|
||||
ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 --worker-class asgi --protocol uwsgi --worker-connections 1000 "nominatim_api.server.falcon.server:run_wsgi()"
|
||||
ExecReload=/bin/kill -s HUP $MAINPID
|
||||
StandardOutput=append:/var/log/gunicorn-nominatim.log
|
||||
StandardError=inherit
|
||||
PrivateTmp=true
|
||||
TimeoutStopSec=5
|
||||
KillMode=mixed
|
||||
@@ -96,7 +98,10 @@ WantedBy=multi-user.target
|
||||
This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
|
||||
its own Python process using
|
||||
[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
|
||||
connections to the database to serve requests in parallel.
|
||||
connections to the database to serve requests in parallel. The parameter
|
||||
`--worker-connections` restricts how many requests gunicorn will queue for
|
||||
each worker. This can help distribute work better when the server is under
|
||||
high load.
|
||||
|
||||
Make the new services known to systemd and start it:
|
||||
|
||||
@@ -108,13 +113,15 @@ sudo systemctl enable nominatim.service
|
||||
sudo systemctl start nominatim.service
|
||||
```
|
||||
|
||||
This sets the service up, so that Nominatim is automatically started
|
||||
This sets the service up so that Nominatim is automatically started
|
||||
on reboot.
|
||||
|
||||
### Configuring nginx
|
||||
|
||||
To make the service available to the world, you need to proxy it through
|
||||
nginx. Add the following definition to the default configuration:
|
||||
nginx. We use the binary uwsgi protocol to speed up communication
|
||||
between nginx and gunicorn. Add the following definition to the default
|
||||
configuration:
|
||||
|
||||
``` nginx
|
||||
upstream nominatim_service {
|
||||
@@ -129,11 +136,8 @@ server {
|
||||
index /search;
|
||||
|
||||
location / {
|
||||
proxy_set_header Host $http_host;
|
||||
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
|
||||
proxy_set_header X-Forwarded-Proto $scheme;
|
||||
proxy_redirect off;
|
||||
proxy_pass http://nominatim_service;
|
||||
uwsgi_pass nominatim_service;
|
||||
include uwsgi_params;
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
@@ -56,7 +56,7 @@ The easiest way, to handle these Python dependencies is to run your
|
||||
development from within a virtual environment.
|
||||
|
||||
```sh
|
||||
sudo apt install libsqlite3-mod-spatialite osm2pgsql \
|
||||
sudo apt install build-essential libsqlite3-mod-spatialite osm2pgsql \
|
||||
postgresql-postgis postgresql-postgis-scripts \
|
||||
pkg-config libicu-dev virtualenv
|
||||
```
|
||||
@@ -68,11 +68,11 @@ virtualenv ~/nominatim-dev-venv
|
||||
~/nominatim-dev-venv/bin/pip install\
|
||||
psutil 'psycopg[binary]' PyICU SQLAlchemy \
|
||||
python-dotenv jinja2 pyYAML \
|
||||
mkdocs 'mkdocstrings[python]' mkdocs-gen-files \
|
||||
mkdocs 'mkdocstrings[python]' mkdocs-gen-files mkdocs-material \
|
||||
pytest pytest-asyncio pytest-bdd flake8 \
|
||||
types-jinja2 types-markupsafe types-psutil types-psycopg2 \
|
||||
types-pygments types-pyyaml types-requests types-ujson \
|
||||
types-urllib3 typing-extensions unicorn falcon starlette \
|
||||
types-urllib3 typing-extensions gunicorn falcon starlette \
|
||||
uvicorn mypy osmium aiosqlite mwparserfromhell
|
||||
```
|
||||
|
||||
|
||||
@@ -13,7 +13,8 @@ for infile in VAGRANT_PATH.glob('Install-on-*.sh'):
|
||||
outfile = f"admin/{infile.stem}.md"
|
||||
title = infile.stem.replace('-', ' ')
|
||||
|
||||
with mkdocs_gen_files.open(outfile, "w") as outfd, infile.open() as infd:
|
||||
with mkdocs_gen_files.open(outfile, "w", encoding='utf-8') as outfd, \
|
||||
infile.open(encoding='utf-8') as infd:
|
||||
print("#", title, file=outfd)
|
||||
has_empty = False
|
||||
for line in infd:
|
||||
|
||||
@@ -29,6 +29,7 @@ DECLARE
|
||||
location RECORD;
|
||||
result prepare_update_info;
|
||||
extra_names HSTORE;
|
||||
default_language VARCHAR(10);
|
||||
BEGIN
|
||||
IF not p.address ? '_inherited' THEN
|
||||
result.address := p.address;
|
||||
@@ -85,6 +86,13 @@ BEGIN
|
||||
|
||||
IF location.name is not NULL THEN
|
||||
{% if debug %}RAISE WARNING 'Names original: %, location: %', result.name, location.name;{% endif %}
|
||||
|
||||
-- Add the linked-place (e.g. city) name as a searchable placename in the default language (if any)
|
||||
default_language := get_country_language_code(location.country_code);
|
||||
IF default_language is not NULL AND NOT location.name ? ('name:' || default_language) THEN
|
||||
location.name := location.name || hstore('name:' || default_language, location.name->'name');
|
||||
END IF;
|
||||
|
||||
-- Add all names from the place nodes that deviate from the name
|
||||
-- in the relation with the prefix '_place_'. Deviation means that
|
||||
-- either the value is different or a given key is missing completely
|
||||
@@ -672,7 +680,7 @@ CREATE OR REPLACE FUNCTION placex_insert()
|
||||
AS $$
|
||||
DECLARE
|
||||
postcode TEXT;
|
||||
result BOOLEAN;
|
||||
result INT;
|
||||
is_area BOOLEAN;
|
||||
country_code VARCHAR(2);
|
||||
diameter FLOAT;
|
||||
@@ -777,11 +785,12 @@ BEGIN
|
||||
|
||||
|
||||
-- add to tables for special search
|
||||
-- Note: won't work on initial import because the classtype tables
|
||||
-- do not yet exist. It won't hurt either.
|
||||
classtable := 'place_classtype_' || NEW.class || '_' || NEW.type;
|
||||
SELECT count(*)>0 FROM pg_tables WHERE tablename = classtable and schemaname = current_schema() INTO result;
|
||||
IF result THEN
|
||||
SELECT count(*) INTO result
|
||||
FROM pg_tables
|
||||
WHERE classtable NOT SIMILAR TO '%\W%'
|
||||
AND tablename = classtable and schemaname = current_schema();
|
||||
IF result > 0 THEN
|
||||
EXECUTE 'INSERT INTO ' || classtable::regclass || ' (place_id, centroid) VALUES ($1,$2)'
|
||||
USING NEW.place_id, NEW.centroid;
|
||||
END IF;
|
||||
@@ -1337,6 +1346,7 @@ CREATE OR REPLACE FUNCTION placex_delete()
|
||||
AS $$
|
||||
DECLARE
|
||||
b BOOLEAN;
|
||||
result INT;
|
||||
classtable TEXT;
|
||||
BEGIN
|
||||
-- RAISE WARNING 'placex_delete % %',OLD.osm_type,OLD.osm_id;
|
||||
@@ -1395,8 +1405,12 @@ BEGIN
|
||||
|
||||
-- remove from tables for special search
|
||||
classtable := 'place_classtype_' || OLD.class || '_' || OLD.type;
|
||||
SELECT count(*)>0 FROM pg_tables WHERE tablename = classtable and schemaname = current_schema() INTO b;
|
||||
IF b THEN
|
||||
SELECT count(*) INTO result
|
||||
FROM pg_tables
|
||||
WHERE classtable NOT SIMILAR TO '%\W%'
|
||||
AND tablename = classtable and schemaname = current_schema();
|
||||
|
||||
IF result > 0 THEN
|
||||
EXECUTE 'DELETE FROM ' || classtable::regclass || ' WHERE place_id = $1' USING OLD.place_id;
|
||||
END IF;
|
||||
|
||||
|
||||
47
lib-sql/grants.sql
Normal file
47
lib-sql/grants.sql
Normal file
@@ -0,0 +1,47 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
--
|
||||
-- Grant read-only access to the web user for all Nominatim tables.
|
||||
|
||||
-- Core tables
|
||||
GRANT SELECT ON import_status TO "{{config.DATABASE_WEBUSER}}";
|
||||
GRANT SELECT ON country_name TO "{{config.DATABASE_WEBUSER}}";
|
||||
GRANT SELECT ON nominatim_properties TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
-- Location tables
|
||||
GRANT SELECT ON location_property_tiger TO "{{config.DATABASE_WEBUSER}}";
|
||||
GRANT SELECT ON location_property_osmline TO "{{config.DATABASE_WEBUSER}}";
|
||||
GRANT SELECT ON location_postcodes TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
-- Search tables
|
||||
{% if not db.reverse_only %}
|
||||
GRANT SELECT ON search_name TO "{{config.DATABASE_WEBUSER}}";
|
||||
{% endif %}
|
||||
|
||||
-- Main place tables
|
||||
GRANT SELECT ON placex TO "{{config.DATABASE_WEBUSER}}";
|
||||
GRANT SELECT ON place_addressline TO "{{config.DATABASE_WEBUSER}}";
|
||||
GRANT SELECT ON placex_entrance TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
-- Error/delete tracking tables
|
||||
GRANT SELECT ON import_polygon_error TO "{{config.DATABASE_WEBUSER}}";
|
||||
GRANT SELECT ON import_polygon_delete TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
-- Country grid
|
||||
GRANT SELECT ON country_osm_grid TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
-- Tokenizer tables (word table)
|
||||
{% if 'word' in db.tables %}
|
||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||
{% endif %}
|
||||
|
||||
-- Special phrase tables
|
||||
{% for table in db.tables %}
|
||||
{% if table.startswith('place_classtype_') %}
|
||||
GRANT SELECT ON {{ table }} TO "{{config.DATABASE_WEBUSER}}";
|
||||
{% endif %}
|
||||
{% endfor %}
|
||||
@@ -5,292 +5,21 @@
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
drop table if exists import_status;
|
||||
CREATE TABLE import_status (
|
||||
lastimportdate timestamp with time zone NOT NULL,
|
||||
sequence_id integer,
|
||||
indexed boolean
|
||||
);
|
||||
GRANT SELECT ON import_status TO "{{config.DATABASE_WEBUSER}}" ;
|
||||
|
||||
drop table if exists import_osmosis_log;
|
||||
CREATE TABLE import_osmosis_log (
|
||||
batchend timestamp,
|
||||
batchseq integer,
|
||||
batchsize bigint,
|
||||
starttime timestamp,
|
||||
endtime timestamp,
|
||||
event text
|
||||
);
|
||||
|
||||
GRANT SELECT ON TABLE country_name TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP TABLE IF EXISTS nominatim_properties;
|
||||
CREATE TABLE nominatim_properties (
|
||||
property TEXT NOT NULL,
|
||||
value TEXT
|
||||
);
|
||||
GRANT SELECT ON TABLE nominatim_properties TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
drop table IF EXISTS location_area CASCADE;
|
||||
CREATE TABLE location_area (
|
||||
place_id BIGINT NOT NULL,
|
||||
keywords INTEGER[] NOT NULL,
|
||||
partition SMALLINT NOT NULL,
|
||||
rank_search SMALLINT NOT NULL,
|
||||
rank_address SMALLINT NOT NULL,
|
||||
country_code VARCHAR(2),
|
||||
isguess BOOL NOT NULL,
|
||||
postcode TEXT,
|
||||
centroid GEOMETRY(Point, 4326) NOT NULL,
|
||||
geometry GEOMETRY(Geometry, 4326) NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE location_area_large () INHERITS (location_area);
|
||||
|
||||
DROP TABLE IF EXISTS location_area_country;
|
||||
CREATE TABLE location_area_country (
|
||||
place_id BIGINT NOT NULL,
|
||||
country_code varchar(2) NOT NULL,
|
||||
geometry GEOMETRY(Geometry, 4326) NOT NULL
|
||||
) {{db.tablespace.address_data}};
|
||||
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {{db.tablespace.address_index}};
|
||||
|
||||
|
||||
CREATE TABLE location_property_tiger (
|
||||
place_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
startnumber INTEGER NOT NULL,
|
||||
endnumber INTEGER NOT NULL,
|
||||
step SMALLINT NOT NULL,
|
||||
partition SMALLINT NOT NULL,
|
||||
linegeo GEOMETRY NOT NULL,
|
||||
postcode TEXT);
|
||||
GRANT SELECT ON location_property_tiger TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
drop table if exists location_property_osmline;
|
||||
CREATE TABLE location_property_osmline (
|
||||
place_id BIGINT NOT NULL,
|
||||
osm_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
geometry_sector INTEGER NOT NULL,
|
||||
indexed_date TIMESTAMP,
|
||||
startnumber INTEGER,
|
||||
endnumber INTEGER,
|
||||
step SMALLINT,
|
||||
partition SMALLINT NOT NULL,
|
||||
indexed_status SMALLINT NOT NULL,
|
||||
linegeo GEOMETRY NOT NULL,
|
||||
address HSTORE,
|
||||
token_info JSONB, -- custom column for tokenizer use only
|
||||
postcode TEXT,
|
||||
country_code VARCHAR(2)
|
||||
){{db.tablespace.search_data}};
|
||||
CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline USING BTREE (geometry_sector) {{db.tablespace.address_index}};
|
||||
CREATE INDEX idx_osmline_linegeo ON location_property_osmline USING GIST (linegeo) {{db.tablespace.search_index}}
|
||||
WHERE startnumber is not null;
|
||||
GRANT SELECT ON location_property_osmline TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
drop table IF EXISTS search_name;
|
||||
{% if not db.reverse_only %}
|
||||
CREATE TABLE search_name (
|
||||
place_id BIGINT NOT NULL,
|
||||
importance FLOAT NOT NULL,
|
||||
search_rank SMALLINT NOT NULL,
|
||||
address_rank SMALLINT NOT NULL,
|
||||
name_vector integer[] NOT NULL,
|
||||
nameaddress_vector integer[] NOT NULL,
|
||||
country_code varchar(2),
|
||||
centroid GEOMETRY(Geometry, 4326) NOT NULL
|
||||
) {{db.tablespace.search_data}};
|
||||
CREATE UNIQUE INDEX idx_search_name_place_id
|
||||
ON search_name USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
GRANT SELECT ON search_name to "{{config.DATABASE_WEBUSER}}" ;
|
||||
{% endif %}
|
||||
|
||||
drop table IF EXISTS place_addressline;
|
||||
CREATE TABLE place_addressline (
|
||||
place_id BIGINT NOT NULL,
|
||||
address_place_id BIGINT NOT NULL,
|
||||
distance FLOAT NOT NULL,
|
||||
cached_rank_address SMALLINT NOT NULL,
|
||||
fromarea boolean NOT NULL,
|
||||
isaddress boolean NOT NULL
|
||||
) {{db.tablespace.search_data}};
|
||||
CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
|
||||
--------- PLACEX - storage for all indexed places -----------------
|
||||
|
||||
DROP TABLE IF EXISTS placex;
|
||||
CREATE TABLE placex (
|
||||
place_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
linked_place_id BIGINT,
|
||||
importance FLOAT,
|
||||
indexed_date TIMESTAMP,
|
||||
geometry_sector INTEGER NOT NULL,
|
||||
rank_address SMALLINT NOT NULL,
|
||||
rank_search SMALLINT NOT NULL,
|
||||
partition SMALLINT NOT NULL,
|
||||
indexed_status SMALLINT NOT NULL,
|
||||
LIKE place INCLUDING CONSTRAINTS,
|
||||
wikipedia TEXT, -- calculated wikipedia article name (language:title)
|
||||
token_info JSONB, -- custom column for tokenizer use only
|
||||
country_code varchar(2),
|
||||
housenumber TEXT,
|
||||
postcode TEXT,
|
||||
centroid GEOMETRY(Geometry, 4326) NOT NULL
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
{% for osm_type in ('N', 'W', 'R') %}
|
||||
CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex
|
||||
USING BTREE (osm_id) {{db.tablespace.search_index}}
|
||||
WHERE osm_type = '{{osm_type}}';
|
||||
{% endfor %}
|
||||
|
||||
-- Usage: - removing linkage status on update
|
||||
-- - lookup linked places for /details
|
||||
CREATE INDEX idx_placex_linked_place_id ON placex
|
||||
USING BTREE (linked_place_id) {{db.tablespace.address_index}}
|
||||
WHERE linked_place_id IS NOT NULL;
|
||||
|
||||
-- Usage: - check that admin boundaries do not overtake each other rank-wise
|
||||
-- - check that place node in a admin boundary with the same address level
|
||||
-- - boundary is not completely contained in a place area
|
||||
-- - parenting of large-area or unparentable features
|
||||
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
|
||||
USING gist (geometry) {{db.tablespace.address_index}}
|
||||
WHERE rank_address between 1 and 25
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
|
||||
|
||||
-- Usage: - POI is within building with housenumber
|
||||
CREATE INDEX idx_placex_geometry_buildings ON placex
|
||||
USING SPGIST (geometry) {{db.tablespace.address_index}}
|
||||
WHERE address is not null and rank_search = 30
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
|
||||
|
||||
-- Usage: - linking of similar named places to boundaries
|
||||
-- - linking of place nodes with same type to boundaries
|
||||
CREATE INDEX idx_placex_geometry_placenode ON placex
|
||||
USING SPGIST (geometry) {{db.tablespace.address_index}}
|
||||
WHERE osm_type = 'N' and rank_search < 26 and class = 'place';
|
||||
|
||||
-- Usage: - is node part of a way?
|
||||
-- - find parent of interpolation spatially
|
||||
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
|
||||
USING SPGIST (geometry) {{db.tablespace.address_index}}
|
||||
WHERE osm_type = 'W' and rank_search >= 26;
|
||||
|
||||
-- Usage: - linking place nodes by wikidata tag to boundaries
|
||||
CREATE INDEX idx_placex_wikidata on placex
|
||||
USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}}
|
||||
WHERE extratags ? 'wikidata' and class = 'place'
|
||||
and osm_type = 'N' and rank_search < 26;
|
||||
|
||||
-- The following two indexes function as a todo list for indexing.
|
||||
|
||||
CREATE INDEX idx_placex_rank_address_sector ON placex
|
||||
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE indexed_status > 0;
|
||||
|
||||
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
|
||||
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE class = 'boundary' and type = 'administrative'
|
||||
and indexed_status > 0;
|
||||
|
||||
|
||||
DROP SEQUENCE IF EXISTS seq_place;
|
||||
CREATE SEQUENCE seq_place start 1;
|
||||
GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT on place_addressline to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT ON planet_osm_ways to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ;
|
||||
GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ;
|
||||
|
||||
-- Table for synthetic postcodes.
|
||||
DROP TABLE IF EXISTS location_postcodes;
|
||||
CREATE TABLE location_postcodes (
|
||||
place_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
osm_id BIGINT,
|
||||
rank_search SMALLINT NOT NULL,
|
||||
indexed_status SMALLINT NOT NULL,
|
||||
indexed_date TIMESTAMP,
|
||||
country_code varchar(2) NOT NULL,
|
||||
postcode TEXT NOT NULL,
|
||||
centroid GEOMETRY(Geometry, 4326) NOT NULL,
|
||||
geometry GEOMETRY(Geometry, 4326) NOT NULL
|
||||
);
|
||||
CREATE UNIQUE INDEX idx_location_postcodes_id ON location_postcodes
|
||||
USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_location_postcodes_geometry ON location_postcodes
|
||||
USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
CREATE INDEX IF NOT EXISTS idx_location_postcodes_postcode
|
||||
ON location_postcodes USING BTREE (postcode, country_code)
|
||||
{{db.tablespace.search_index}};
|
||||
CREATE INDEX IF NOT EXISTS idx_location_postcodes_osmid
|
||||
ON location_postcodes USING BTREE (osm_id) {{db.tablespace.search_index}};
|
||||
GRANT SELECT ON location_postcodes TO "{{config.DATABASE_WEBUSER}}" ;
|
||||
|
||||
-- Table to store location of entrance nodes
|
||||
DROP TABLE IF EXISTS placex_entrance;
|
||||
CREATE TABLE placex_entrance (
|
||||
place_id BIGINT NOT NULL,
|
||||
osm_id BIGINT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
location GEOMETRY(Point, 4326) NOT NULL,
|
||||
extratags HSTORE
|
||||
);
|
||||
CREATE UNIQUE INDEX idx_placex_entrance_place_id_osm_id ON placex_entrance
|
||||
USING BTREE (place_id, osm_id) {{db.tablespace.search_index}};
|
||||
GRANT SELECT ON placex_entrance TO "{{config.DATABASE_WEBUSER}}" ;
|
||||
|
||||
-- Create an index on the place table for lookups to populate the entrance
|
||||
-- table
|
||||
CREATE INDEX IF NOT EXISTS idx_placex_entrance_lookup ON place
|
||||
USING BTREE (osm_id)
|
||||
WHERE class IN ('routing:entrance', 'entrance');
|
||||
|
||||
DROP TABLE IF EXISTS import_polygon_error;
|
||||
CREATE TABLE import_polygon_error (
|
||||
osm_id BIGINT,
|
||||
osm_type CHAR(1),
|
||||
class TEXT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
name HSTORE,
|
||||
country_code varchar(2),
|
||||
updated timestamp,
|
||||
errormessage text,
|
||||
prevgeometry GEOMETRY(Geometry, 4326),
|
||||
newgeometry GEOMETRY(Geometry, 4326)
|
||||
);
|
||||
CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
|
||||
GRANT SELECT ON import_polygon_error TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP TABLE IF EXISTS import_polygon_delete;
|
||||
CREATE TABLE import_polygon_delete (
|
||||
osm_id BIGINT,
|
||||
osm_type CHAR(1),
|
||||
class TEXT NOT NULL,
|
||||
type TEXT NOT NULL
|
||||
);
|
||||
CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);
|
||||
GRANT SELECT ON import_polygon_delete TO "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
DROP SEQUENCE IF EXISTS file;
|
||||
CREATE SEQUENCE file start 1;
|
||||
|
||||
{% if 'wikimedia_importance' not in db.tables and 'wikipedia_article' not in db.tables %}
|
||||
-- create dummy tables here, if nothing was imported
|
||||
CREATE TABLE wikimedia_importance (
|
||||
language TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
importance double precision NOT NULL,
|
||||
wikidata TEXT
|
||||
) {{db.tablespace.address_data}};
|
||||
{% endif %}
|
||||
{% include('tables/status.sql') %}
|
||||
{% include('tables/nominatim_properties.sql') %}
|
||||
{% include('tables/location_area.sql') %}
|
||||
{% include('tables/tiger.sql') %}
|
||||
{% include('tables/interpolation.sql') %}
|
||||
{% include('tables/search_name.sql') %}
|
||||
{% include('tables/addressline.sql') %}
|
||||
{% include('tables/placex.sql') %}
|
||||
{% include('tables/postcodes.sql') %}
|
||||
{% include('tables/entrance.sql') %}
|
||||
{% include('tables/import_reports.sql') %}
|
||||
{% include('tables/importance_tables.sql') %}
|
||||
|
||||
-- osm2pgsql does not create indexes on the middle tables for Nominatim
|
||||
-- Add one for lookup of associated street relations.
|
||||
@@ -308,5 +37,3 @@ CREATE INDEX planet_osm_rels_relation_members_idx ON planet_osm_rels USING gin(p
|
||||
CREATE INDEX IF NOT EXISTS idx_place_interpolations
|
||||
ON place USING gist(geometry) {{db.tablespace.address_index}}
|
||||
WHERE osm_type = 'W' and address ? 'interpolation';
|
||||
|
||||
GRANT SELECT ON table country_osm_grid to "{{config.DATABASE_WEBUSER}}";
|
||||
|
||||
20
lib-sql/tables/addressline.sql
Normal file
20
lib-sql/tables/addressline.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS place_addressline;
|
||||
|
||||
CREATE TABLE place_addressline (
|
||||
place_id BIGINT NOT NULL,
|
||||
address_place_id BIGINT NOT NULL,
|
||||
distance FLOAT NOT NULL,
|
||||
cached_rank_address SMALLINT NOT NULL,
|
||||
fromarea boolean NOT NULL,
|
||||
isaddress boolean NOT NULL
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE INDEX idx_place_addressline_place_id ON place_addressline
|
||||
USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
20
lib-sql/tables/entrance.sql
Normal file
20
lib-sql/tables/entrance.sql
Normal file
@@ -0,0 +1,20 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
-- Table to store location of entrance nodes
|
||||
DROP TABLE IF EXISTS placex_entrance;
|
||||
|
||||
CREATE TABLE placex_entrance (
|
||||
place_id BIGINT NOT NULL,
|
||||
osm_id BIGINT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
location GEOMETRY(Point, 4326) NOT NULL,
|
||||
extratags HSTORE
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX idx_placex_entrance_place_id_osm_id ON placex_entrance
|
||||
USING BTREE (place_id, osm_id) {{db.tablespace.search_index}};
|
||||
35
lib-sql/tables/import_reports.sql
Normal file
35
lib-sql/tables/import_reports.sql
Normal file
@@ -0,0 +1,35 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS import_polygon_error;
|
||||
CREATE TABLE import_polygon_error (
|
||||
osm_id BIGINT,
|
||||
osm_type CHAR(1),
|
||||
class TEXT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
name HSTORE,
|
||||
country_code varchar(2),
|
||||
updated timestamp,
|
||||
errormessage text,
|
||||
prevgeometry GEOMETRY(Geometry, 4326),
|
||||
newgeometry GEOMETRY(Geometry, 4326)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error
|
||||
USING BTREE (osm_type, osm_id);
|
||||
|
||||
|
||||
DROP TABLE IF EXISTS import_polygon_delete;
|
||||
CREATE TABLE import_polygon_delete (
|
||||
osm_id BIGINT,
|
||||
osm_type CHAR(1),
|
||||
class TEXT NOT NULL,
|
||||
type TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete
|
||||
USING BTREE (osm_type, osm_id);
|
||||
16
lib-sql/tables/importance_tables.sql
Normal file
16
lib-sql/tables/importance_tables.sql
Normal file
@@ -0,0 +1,16 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
{% if 'wikimedia_importance' not in db.tables and 'wikipedia_article' not in db.tables %}
|
||||
-- create dummy tables here if nothing was imported
|
||||
CREATE TABLE wikimedia_importance (
|
||||
language TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
importance double precision NOT NULL,
|
||||
wikidata TEXT
|
||||
) {{db.tablespace.address_data}};
|
||||
{% endif %}
|
||||
34
lib-sql/tables/interpolation.sql
Normal file
34
lib-sql/tables/interpolation.sql
Normal file
@@ -0,0 +1,34 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS location_property_osmline;
|
||||
CREATE TABLE location_property_osmline (
|
||||
place_id BIGINT NOT NULL,
|
||||
osm_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
geometry_sector INTEGER NOT NULL,
|
||||
indexed_date TIMESTAMP,
|
||||
startnumber INTEGER,
|
||||
endnumber INTEGER,
|
||||
step SMALLINT,
|
||||
partition SMALLINT NOT NULL,
|
||||
indexed_status SMALLINT NOT NULL,
|
||||
linegeo GEOMETRY NOT NULL,
|
||||
address HSTORE,
|
||||
token_info JSONB, -- custom column for tokenizer use only
|
||||
postcode TEXT,
|
||||
country_code VARCHAR(2)
|
||||
){{db.tablespace.search_data}};
|
||||
|
||||
CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline
|
||||
USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline
|
||||
USING BTREE (geometry_sector) {{db.tablespace.address_index}};
|
||||
CREATE INDEX idx_osmline_linegeo ON location_property_osmline
|
||||
USING GIST (linegeo) {{db.tablespace.search_index}}
|
||||
WHERE startnumber is not null;
|
||||
|
||||
32
lib-sql/tables/location_area.sql
Normal file
32
lib-sql/tables/location_area.sql
Normal file
@@ -0,0 +1,32 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS location_area CASCADE;
|
||||
CREATE TABLE location_area (
|
||||
place_id BIGINT NOT NULL,
|
||||
keywords INTEGER[] NOT NULL,
|
||||
partition SMALLINT NOT NULL,
|
||||
rank_search SMALLINT NOT NULL,
|
||||
rank_address SMALLINT NOT NULL,
|
||||
country_code VARCHAR(2),
|
||||
isguess BOOL NOT NULL,
|
||||
postcode TEXT,
|
||||
centroid GEOMETRY(Point, 4326) NOT NULL,
|
||||
geometry GEOMETRY(Geometry, 4326) NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE location_area_large () INHERITS (location_area);
|
||||
|
||||
DROP TABLE IF EXISTS location_area_country;
|
||||
CREATE TABLE location_area_country (
|
||||
place_id BIGINT NOT NULL,
|
||||
country_code varchar(2) NOT NULL,
|
||||
geometry GEOMETRY(Geometry, 4326) NOT NULL
|
||||
) {{db.tablespace.address_data}};
|
||||
|
||||
CREATE INDEX idx_location_area_country_geometry ON location_area_country
|
||||
USING GIST (geometry) {{db.tablespace.address_index}};
|
||||
12
lib-sql/tables/nominatim_properties.sql
Normal file
12
lib-sql/tables/nominatim_properties.sql
Normal file
@@ -0,0 +1,12 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS nominatim_properties;
|
||||
CREATE TABLE nominatim_properties (
|
||||
property TEXT NOT NULL,
|
||||
value TEXT
|
||||
);
|
||||
87
lib-sql/tables/placex.sql
Normal file
87
lib-sql/tables/placex.sql
Normal file
@@ -0,0 +1,87 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
-- placex - main table for searchable places
|
||||
|
||||
DROP TABLE IF EXISTS placex;
|
||||
CREATE TABLE placex (
|
||||
place_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
linked_place_id BIGINT,
|
||||
importance FLOAT,
|
||||
indexed_date TIMESTAMP,
|
||||
geometry_sector INTEGER NOT NULL,
|
||||
rank_address SMALLINT NOT NULL,
|
||||
rank_search SMALLINT NOT NULL,
|
||||
partition SMALLINT NOT NULL,
|
||||
indexed_status SMALLINT NOT NULL,
|
||||
LIKE place INCLUDING CONSTRAINTS,
|
||||
wikipedia TEXT, -- calculated wikipedia article name (language:title)
|
||||
token_info JSONB, -- custom column for tokenizer use only
|
||||
country_code varchar(2),
|
||||
housenumber TEXT,
|
||||
postcode TEXT,
|
||||
centroid GEOMETRY(Geometry, 4326) NOT NULL
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
{% for osm_type in ('N', 'W', 'R') %}
|
||||
CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex
|
||||
USING BTREE (osm_id) {{db.tablespace.search_index}}
|
||||
WHERE osm_type = '{{osm_type}}';
|
||||
{% endfor %}
|
||||
|
||||
-- Usage: - removing linkage status on update
|
||||
-- - lookup linked places for /details
|
||||
CREATE INDEX idx_placex_linked_place_id ON placex
|
||||
USING BTREE (linked_place_id) {{db.tablespace.address_index}}
|
||||
WHERE linked_place_id IS NOT NULL;
|
||||
|
||||
-- Usage: - check that admin boundaries do not overtake each other rank-wise
|
||||
-- - check that place node in a admin boundary with the same address level
|
||||
-- - boundary is not completely contained in a place area
|
||||
-- - parenting of large-area or unparentable features
|
||||
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
|
||||
USING gist (geometry) {{db.tablespace.address_index}}
|
||||
WHERE rank_address between 1 and 25
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
|
||||
|
||||
-- Usage: - POI is within building with housenumber
|
||||
CREATE INDEX idx_placex_geometry_buildings ON placex
|
||||
USING SPGIST (geometry) {{db.tablespace.address_index}}
|
||||
WHERE address is not null and rank_search = 30
|
||||
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
|
||||
|
||||
-- Usage: - linking of similar named places to boundaries
|
||||
-- - linking of place nodes with same type to boundaries
|
||||
CREATE INDEX idx_placex_geometry_placenode ON placex
|
||||
USING SPGIST (geometry) {{db.tablespace.address_index}}
|
||||
WHERE osm_type = 'N' and rank_search < 26 and class = 'place';
|
||||
|
||||
-- Usage: - is node part of a way?
|
||||
-- - find parent of interpolation spatially
|
||||
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
|
||||
USING SPGIST (geometry) {{db.tablespace.address_index}}
|
||||
WHERE osm_type = 'W' and rank_search >= 26;
|
||||
|
||||
-- Usage: - linking place nodes by wikidata tag to boundaries
|
||||
CREATE INDEX idx_placex_wikidata on placex
|
||||
USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}}
|
||||
WHERE extratags ? 'wikidata' and class = 'place'
|
||||
and osm_type = 'N' and rank_search < 26;
|
||||
|
||||
-- The following two indexes function as a todo list for indexing.
|
||||
|
||||
CREATE INDEX idx_placex_rank_address_sector ON placex
|
||||
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE indexed_status > 0;
|
||||
|
||||
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
|
||||
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
|
||||
WHERE class = 'boundary' and type = 'administrative'
|
||||
and indexed_status > 0;
|
||||
|
||||
30
lib-sql/tables/postcodes.sql
Normal file
30
lib-sql/tables/postcodes.sql
Normal file
@@ -0,0 +1,30 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS location_postcodes;
|
||||
CREATE TABLE location_postcodes (
|
||||
place_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
osm_id BIGINT,
|
||||
rank_search SMALLINT NOT NULL,
|
||||
indexed_status SMALLINT NOT NULL,
|
||||
indexed_date TIMESTAMP,
|
||||
country_code varchar(2) NOT NULL,
|
||||
postcode TEXT NOT NULL,
|
||||
centroid GEOMETRY(Geometry, 4326) NOT NULL,
|
||||
geometry GEOMETRY(Geometry, 4326) NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX idx_location_postcodes_id ON location_postcodes
|
||||
USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
CREATE INDEX idx_location_postcodes_geometry ON location_postcodes
|
||||
USING GIST (geometry) {{db.tablespace.search_index}};
|
||||
CREATE INDEX IF NOT EXISTS idx_location_postcodes_postcode ON location_postcodes
|
||||
USING BTREE (postcode, country_code) {{db.tablespace.search_index}};
|
||||
CREATE INDEX IF NOT EXISTS idx_location_postcodes_osmid ON location_postcodes
|
||||
USING BTREE (osm_id) {{db.tablespace.search_index}};
|
||||
|
||||
26
lib-sql/tables/search_name.sql
Normal file
26
lib-sql/tables/search_name.sql
Normal file
@@ -0,0 +1,26 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS search_name;
|
||||
|
||||
{% if not create_reverse_only %}
|
||||
|
||||
CREATE TABLE search_name (
|
||||
place_id BIGINT NOT NULL,
|
||||
importance FLOAT NOT NULL,
|
||||
search_rank SMALLINT NOT NULL,
|
||||
address_rank SMALLINT NOT NULL,
|
||||
name_vector integer[] NOT NULL,
|
||||
nameaddress_vector integer[] NOT NULL,
|
||||
country_code varchar(2),
|
||||
centroid GEOMETRY(Geometry, 4326) NOT NULL
|
||||
) {{db.tablespace.search_data}};
|
||||
|
||||
CREATE UNIQUE INDEX idx_search_name_place_id
|
||||
ON search_name USING BTREE (place_id) {{db.tablespace.search_index}};
|
||||
|
||||
{% endif %}
|
||||
23
lib-sql/tables/status.sql
Normal file
23
lib-sql/tables/status.sql
Normal file
@@ -0,0 +1,23 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS import_status;
|
||||
CREATE TABLE import_status (
|
||||
lastimportdate TIMESTAMP WITH TIME ZONE NOT NULL,
|
||||
sequence_id INTEGER,
|
||||
indexed BOOLEAN
|
||||
);
|
||||
|
||||
DROP TABLE IF EXISTS import_osmosis_log;
|
||||
CREATE TABLE import_osmosis_log (
|
||||
batchend TIMESTAMP,
|
||||
batchseq INTEGER,
|
||||
batchsize BIGINT,
|
||||
starttime TIMESTAMP,
|
||||
endtime TIMESTAMP,
|
||||
event TEXT
|
||||
);
|
||||
17
lib-sql/tables/tiger.sql
Normal file
17
lib-sql/tables/tiger.sql
Normal file
@@ -0,0 +1,17 @@
|
||||
-- SPDX-License-Identifier: GPL-2.0-only
|
||||
--
|
||||
-- This file is part of Nominatim. (https://nominatim.org)
|
||||
--
|
||||
-- Copyright (C) 2026 by the Nominatim developer community.
|
||||
-- For a full list of authors see the git log.
|
||||
|
||||
DROP TABLE IF EXISTS location_property_tiger;
|
||||
CREATE TABLE location_property_tiger (
|
||||
place_id BIGINT NOT NULL,
|
||||
parent_place_id BIGINT,
|
||||
startnumber INTEGER NOT NULL,
|
||||
endnumber INTEGER NOT NULL,
|
||||
step SMALLINT NOT NULL,
|
||||
partition SMALLINT NOT NULL,
|
||||
linegeo GEOMETRY NOT NULL,
|
||||
postcode TEXT);
|
||||
@@ -23,7 +23,7 @@ an ASGI-capable server like uvicorn. To install them from pypi run:
|
||||
You need to have a Nominatim database imported with the 'nominatim-db'
|
||||
package. Go to the project directory, then run uvicorn as:
|
||||
|
||||
uvicorn --factory nominatim.server.falcon.server:run_wsgi
|
||||
uvicorn --factory nominatim_api.server.falcon.server:run_wsgi
|
||||
|
||||
## Documentation
|
||||
|
||||
|
||||
@@ -119,6 +119,7 @@ class NominatimArgs:
|
||||
enable_debug_statements: bool
|
||||
data_object: Sequence[Tuple[str, int]]
|
||||
data_area: Sequence[Tuple[str, int]]
|
||||
ro_access: bool
|
||||
|
||||
# Arguments to 'replication'
|
||||
init: bool
|
||||
|
||||
@@ -65,6 +65,8 @@ class UpdateRefresh:
|
||||
help='Update secondary importance raster data')
|
||||
group.add_argument('--importance', action='store_true',
|
||||
help='Recompute place importances (expensive!)')
|
||||
group.add_argument('--ro-access', action='store_true',
|
||||
help='Grant read-only access to web user for all tables')
|
||||
group.add_argument('--website', action='store_true',
|
||||
help='DEPRECATED. This function has no function anymore'
|
||||
' and will be removed in a future version.')
|
||||
@@ -159,6 +161,11 @@ class UpdateRefresh:
|
||||
LOG.error('WARNING: Website setup is no longer required. '
|
||||
'This function will be removed in future version of Nominatim.')
|
||||
|
||||
if args.ro_access:
|
||||
from ..tools import admin
|
||||
LOG.warning('Grant read-only access to web user')
|
||||
admin.grant_ro_access(args.config.get_libpq_dsn(), args.config)
|
||||
|
||||
if args.data_object or args.data_area:
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
for obj in args.data_object or []:
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Nominatim configuration accessor.
|
||||
@@ -12,6 +12,7 @@ import importlib.util
|
||||
import logging
|
||||
import os
|
||||
import sys
|
||||
import re
|
||||
from pathlib import Path
|
||||
import json
|
||||
import yaml
|
||||
@@ -80,6 +81,10 @@ class Configuration:
|
||||
self.lib_dir = _LibDirs()
|
||||
self._private_plugins: Dict[str, object] = {}
|
||||
|
||||
if re.fullmatch(r'[\w-]+', self.DATABASE_WEBUSER) is None:
|
||||
raise UsageError("Misconfigured DATABASE_WEBUSER. "
|
||||
"Only alphnumberic characters, - and _ are allowed.")
|
||||
|
||||
def set_libdirs(self, **kwargs: StrPath) -> None:
|
||||
""" Set paths to library functions and data.
|
||||
"""
|
||||
|
||||
@@ -38,6 +38,7 @@ class QueryPool:
|
||||
""" Schedule a query for execution.
|
||||
"""
|
||||
if self.is_cancelled:
|
||||
self.clear_queue()
|
||||
await self.finish()
|
||||
return
|
||||
|
||||
@@ -47,6 +48,7 @@ class QueryPool:
|
||||
await asyncio.sleep(0)
|
||||
|
||||
if self.is_cancelled:
|
||||
self.clear_queue()
|
||||
await self.finish()
|
||||
|
||||
async def finish(self) -> None:
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Preprocessing of SQL files.
|
||||
"""
|
||||
from typing import Set, Dict, Any, cast
|
||||
import re
|
||||
|
||||
import jinja2
|
||||
|
||||
@@ -34,7 +35,9 @@ def _get_tables(conn: Connection) -> Set[str]:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'")
|
||||
|
||||
return set((row[0] for row in list(cur)))
|
||||
# paranoia check: make sure we don't get table names that cause
|
||||
# an SQL injection later
|
||||
return {row[0] for row in list(cur) if re.fullmatch(r'\w+', row[0])}
|
||||
|
||||
|
||||
def _get_middle_db_format(conn: Connection, tables: Set[str]) -> str:
|
||||
|
||||
@@ -56,10 +56,10 @@ class Indexer:
|
||||
cur.execute('ANALYZE')
|
||||
|
||||
while True:
|
||||
if await self.index_by_rank(0, 4) > 0:
|
||||
if await self.index_by_rank(1, 4) > 0:
|
||||
_analyze()
|
||||
|
||||
if await self.index_boundaries(0, 30) > 100:
|
||||
if await self.index_boundaries() > 100:
|
||||
_analyze()
|
||||
|
||||
if await self.index_by_rank(5, 25) > 100:
|
||||
@@ -68,13 +68,16 @@ class Indexer:
|
||||
if await self.index_by_rank(26, 30) > 1000:
|
||||
_analyze()
|
||||
|
||||
# Special case: rank zero depends on the previously-indexed [1..30] ranks
|
||||
await self.index_by_rank(0, 0)
|
||||
|
||||
if await self.index_postcodes() > 100:
|
||||
_analyze()
|
||||
|
||||
if not self.has_pending():
|
||||
break
|
||||
|
||||
async def index_boundaries(self, minrank: int, maxrank: int) -> int:
|
||||
async def index_boundaries(self, minrank: int = 0, maxrank: int = 30) -> int:
|
||||
""" Index only administrative boundaries within the given rank range.
|
||||
"""
|
||||
total = 0
|
||||
@@ -147,8 +150,11 @@ class Indexer:
|
||||
total += await self._index(runners.RankRunner(rank, analyzer),
|
||||
batch=batch, total_tuples=total_tuples.get(rank, 0))
|
||||
|
||||
if maxrank == 30:
|
||||
# Special case: rank zero depends on ranks [1..30]
|
||||
if minrank == 0:
|
||||
total += await self._index(runners.RankRunner(0, analyzer))
|
||||
|
||||
if maxrank == 30:
|
||||
total += await self._index(runners.InterpolationRunner(analyzer), batch=20)
|
||||
|
||||
return total
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tokenizer implementing normalisation as used before Nominatim 4 but using
|
||||
@@ -294,13 +294,12 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
with connect(self.dsn) as conn:
|
||||
drop_tables(conn, 'word')
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f"ALTER TABLE {old} RENAME TO word")
|
||||
for idx in ('word_token', 'word_id'):
|
||||
cur.execute(f"""ALTER INDEX idx_{old}_{idx}
|
||||
RENAME TO idx_word_{idx}""")
|
||||
for name, _ in WORD_TYPES:
|
||||
cur.execute(f"""ALTER INDEX idx_{old}_{name}
|
||||
RENAME TO idx_word_{name}""")
|
||||
cur.execute(pysql.SQL("ALTER TABLE {} RENAME TO word")
|
||||
.format(pysql.Identifier(old)))
|
||||
for idx in ['word_token', 'word_id'] + [n[0] for n in WORD_TYPES]:
|
||||
cur.execute(pysql.SQL("ALTER INDEX {} RENAME TO {}")
|
||||
.format(pysql.Identifier(f"idx_{old}_{idx}"),
|
||||
pysql.Identifier(f"idx_word_{idx}")))
|
||||
conn.commit()
|
||||
|
||||
|
||||
|
||||
@@ -16,6 +16,7 @@ from psycopg.types.json import Json
|
||||
from ..typing import DictCursorResult
|
||||
from ..config import Configuration
|
||||
from ..db.connection import connect, Cursor, register_hstore
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..errors import UsageError
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
from ..data.place_info import PlaceInfo
|
||||
@@ -105,3 +106,12 @@ def clean_deleted_relations(config: Configuration, age: str) -> None:
|
||||
except psycopg.DataError as exc:
|
||||
raise UsageError('Invalid PostgreSQL time interval format') from exc
|
||||
conn.commit()
|
||||
|
||||
|
||||
def grant_ro_access(dsn: str, config: Configuration) -> None:
|
||||
""" Grant read-only access to the web user for all Nominatim tables.
|
||||
This can be used to grant access to a different user after import.
|
||||
"""
|
||||
with connect(dsn) as conn:
|
||||
sql = SQLPreprocessor(conn, config)
|
||||
sql.run_sql_file(conn, 'grants.sql')
|
||||
|
||||
@@ -152,10 +152,11 @@ def create_tables(conn: Connection, config: Configuration, reverse_only: bool =
|
||||
When `reverse_only` is True, then the main table for searching will
|
||||
be skipped and only reverse search is possible.
|
||||
"""
|
||||
sql = SQLPreprocessor(conn, config)
|
||||
sql.env.globals['db']['reverse_only'] = reverse_only
|
||||
SQLPreprocessor(conn, config).run_sql_file(conn, 'tables.sql',
|
||||
create_reverse_only=reverse_only)
|
||||
|
||||
sql.run_sql_file(conn, 'tables.sql')
|
||||
# reinitiate the preprocessor to get all the newly created tables
|
||||
SQLPreprocessor(conn, config).run_sql_file(conn, 'grants.sql')
|
||||
|
||||
|
||||
def create_table_triggers(conn: Connection, config: Configuration) -> None:
|
||||
@@ -193,7 +194,7 @@ def truncate_data_tables(conn: Connection) -> None:
|
||||
WHERE tablename LIKE 'location_road_%'""")
|
||||
|
||||
for table in [r[0] for r in list(cur)]:
|
||||
cur.execute('TRUNCATE ' + table)
|
||||
cur.execute(pysql.SQL('TRUNCATE {}').format(pysql.Identifier(table)))
|
||||
|
||||
conn.commit()
|
||||
|
||||
|
||||
@@ -18,7 +18,6 @@ UPDATE_TABLES = [
|
||||
'address_levels',
|
||||
'gb_postcode',
|
||||
'import_osmosis_log',
|
||||
'import_polygon_%',
|
||||
'location_area%',
|
||||
'location_road%',
|
||||
'place',
|
||||
|
||||
@@ -159,7 +159,7 @@ class _PostcodeCollector:
|
||||
|
||||
if fname.is_file():
|
||||
LOG.info("Using external postcode file '%s'.", fname)
|
||||
return gzip.open(fname, 'rt')
|
||||
return gzip.open(fname, 'rt', encoding='utf-8')
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@@ -141,7 +141,9 @@ def import_importance_csv(dsn: str, data_file: Path) -> int:
|
||||
|
||||
copy_cmd = """COPY wikimedia_importance(language, title, importance, wikidata)
|
||||
FROM STDIN"""
|
||||
with gzip.open(str(data_file), 'rt') as fd, cur.copy(copy_cmd) as copy:
|
||||
with gzip.open(
|
||||
str(data_file), 'rt', encoding='utf-8') as fd, \
|
||||
cur.copy(copy_cmd) as copy:
|
||||
for row in csv.DictReader(fd, delimiter='\t', quotechar='|'):
|
||||
wd_id = int(row['wikidata_id'][1:])
|
||||
copy.write_row((row['language'],
|
||||
|
||||
@@ -297,9 +297,8 @@ Feature: Linking of places
|
||||
| R1 | LabelPlace |
|
||||
|
||||
|
||||
@skip
|
||||
Scenario: Linked places expand default language names
|
||||
Given the grid
|
||||
Given the grid with origin CO
|
||||
| 1 | | 2 |
|
||||
| | 9 | |
|
||||
| 4 | | 3 |
|
||||
|
||||
@@ -43,7 +43,7 @@ def opl_writer(tmp_path, node_grid):
|
||||
def _write(data):
|
||||
fname = tmp_path / f"test_osm_{nr[0]}.opl"
|
||||
nr[0] += 1
|
||||
with fname.open('wt') as fd:
|
||||
with fname.open('wt', encoding='utf-8') as fd:
|
||||
for line in data.split('\n'):
|
||||
if line.startswith('n') and ' x' not in line:
|
||||
coord = node_grid.get(line[1:].split(' ')[0]) \
|
||||
@@ -59,7 +59,7 @@ def opl_writer(tmp_path, node_grid):
|
||||
@given('the lua style file', target_fixture='osm2pgsql_options')
|
||||
def set_lua_style_file(osm2pgsql_options, docstring, tmp_path):
|
||||
style = tmp_path / 'custom.lua'
|
||||
style.write_text(docstring)
|
||||
style.write_text(docstring, encoding='utf-8')
|
||||
osm2pgsql_options['osm2pgsql_style'] = str(style)
|
||||
|
||||
return osm2pgsql_options
|
||||
|
||||
@@ -58,7 +58,7 @@ gb:
|
||||
pattern: "(l?ld[A-Z0-9]?) ?(dll)"
|
||||
output: \1 \2
|
||||
|
||||
""")
|
||||
""", encoding='utf-8')
|
||||
|
||||
return project_env
|
||||
|
||||
|
||||
@@ -91,8 +91,9 @@ class TestCliWithDb:
|
||||
postcode_mock = async_mock_func_factory(nominatim_db.indexer.indexer.Indexer,
|
||||
'index_postcodes')
|
||||
|
||||
has_pending_retvals = [True, False]
|
||||
monkeypatch.setattr(nominatim_db.indexer.indexer.Indexer, 'has_pending',
|
||||
[False, True].pop)
|
||||
lambda *args, **kwargs: has_pending_retvals.pop(0))
|
||||
|
||||
assert self.call_nominatim('index', *params) == 0
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Test for loading dotenv configuration.
|
||||
@@ -48,7 +48,7 @@ def test_no_project_dir(make_config):
|
||||
@pytest.mark.parametrize("val", ('apache', '"apache"'))
|
||||
def test_prefer_project_setting_over_default(make_config, val, tmp_path):
|
||||
envfile = tmp_path / '.env'
|
||||
envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val))
|
||||
envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val), encoding='utf-8')
|
||||
|
||||
config = make_config(tmp_path)
|
||||
|
||||
@@ -57,7 +57,7 @@ def test_prefer_project_setting_over_default(make_config, val, tmp_path):
|
||||
|
||||
def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_path):
|
||||
envfile = tmp_path / '.env'
|
||||
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n')
|
||||
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n', encoding='utf-8')
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody')
|
||||
|
||||
@@ -68,13 +68,13 @@ def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_pa
|
||||
|
||||
def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path):
|
||||
envfile = tmp_path / '.env'
|
||||
envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n')
|
||||
envfile.write_text('NOMINATIM_OSM2PGSQL_BINARY=osm2pgsql\n', encoding='utf-8')
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '')
|
||||
monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', '')
|
||||
|
||||
config = make_config(tmp_path)
|
||||
|
||||
assert config.DATABASE_WEBUSER == ''
|
||||
assert config.OSM2PGSQL_BINARY == ''
|
||||
|
||||
|
||||
def test_get_os_env_add_defaults(make_config, monkeypatch):
|
||||
@@ -232,7 +232,7 @@ def test_get_import_style_intern(make_config, src_dir, monkeypatch):
|
||||
|
||||
def test_get_import_style_extern_relative(make_config_path, monkeypatch):
|
||||
config = make_config_path()
|
||||
(config.project_dir / 'custom.style').write_text('x')
|
||||
(config.project_dir / 'custom.style').write_text('x', encoding='utf-8')
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', 'custom.style')
|
||||
|
||||
@@ -243,7 +243,7 @@ def test_get_import_style_extern_absolute(make_config, tmp_path, monkeypatch):
|
||||
config = make_config()
|
||||
cfgfile = tmp_path / 'test.style'
|
||||
|
||||
cfgfile.write_text('x')
|
||||
cfgfile.write_text('x', encoding='utf-8')
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', str(cfgfile))
|
||||
|
||||
@@ -254,10 +254,10 @@ def test_load_subconf_from_project_dir(make_config_path):
|
||||
config = make_config_path()
|
||||
|
||||
testfile = config.project_dir / 'test.yaml'
|
||||
testfile.write_text('cow: muh\ncat: miau\n')
|
||||
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
|
||||
|
||||
testfile = config.config_dir / 'test.yaml'
|
||||
testfile.write_text('cow: miau\ncat: muh\n')
|
||||
testfile.write_text('cow: miau\ncat: muh\n', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.yaml')
|
||||
|
||||
@@ -268,7 +268,7 @@ def test_load_subconf_from_settings_dir(make_config_path):
|
||||
config = make_config_path()
|
||||
|
||||
testfile = config.config_dir / 'test.yaml'
|
||||
testfile.write_text('cow: muh\ncat: miau\n')
|
||||
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.yaml')
|
||||
|
||||
@@ -280,7 +280,7 @@ def test_load_subconf_empty_env_conf(make_config_path, monkeypatch):
|
||||
config = make_config_path()
|
||||
|
||||
testfile = config.config_dir / 'test.yaml'
|
||||
testfile.write_text('cow: muh\ncat: miau\n')
|
||||
testfile.write_text('cow: muh\ncat: miau\n', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
|
||||
|
||||
@@ -291,8 +291,8 @@ def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path
|
||||
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
|
||||
config = make_config_path()
|
||||
|
||||
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
|
||||
(tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n')
|
||||
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
|
||||
(tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
|
||||
|
||||
@@ -303,7 +303,7 @@ def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_
|
||||
monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
|
||||
config = make_config_path()
|
||||
|
||||
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
|
||||
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
|
||||
|
||||
with pytest.raises(UsageError, match='Config file not found.'):
|
||||
config.load_sub_configuration('test.yaml', config='MY_CONFIG')
|
||||
@@ -314,8 +314,8 @@ def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location
|
||||
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
|
||||
config = make_config_path()
|
||||
|
||||
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
|
||||
(getattr(config, location) / 'other.yaml').write_text('dog: bark\n')
|
||||
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
|
||||
(getattr(config, location) / 'other.yaml').write_text('dog: bark\n', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
|
||||
|
||||
@@ -326,7 +326,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
|
||||
monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
|
||||
config = make_config_path()
|
||||
|
||||
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
|
||||
(config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n', encoding='utf-8')
|
||||
|
||||
with pytest.raises(UsageError, match='Config file not found.'):
|
||||
config.load_sub_configuration('test.yaml', config='MY_CONFIG')
|
||||
@@ -335,7 +335,7 @@ def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
|
||||
def test_load_subconf_json(make_config_path):
|
||||
config = make_config_path()
|
||||
|
||||
(config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}')
|
||||
(config.project_dir / 'test.json').write_text('{"cow": "muh", "cat": "miau"}', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.json')
|
||||
|
||||
@@ -352,7 +352,7 @@ def test_load_subconf_not_found(make_config_path):
|
||||
def test_load_subconf_env_unknown_format(make_config_path):
|
||||
config = make_config_path()
|
||||
|
||||
(config.project_dir / 'test.xml').write_text('<html></html>')
|
||||
(config.project_dir / 'test.xml').write_text('<html></html>', encoding='utf-8')
|
||||
|
||||
with pytest.raises(UsageError, match='unknown format'):
|
||||
config.load_sub_configuration('test.xml')
|
||||
@@ -362,8 +362,8 @@ def test_load_subconf_include_absolute(make_config_path, tmp_path):
|
||||
config = make_config_path()
|
||||
|
||||
testfile = config.config_dir / 'test.yaml'
|
||||
testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n')
|
||||
(tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
|
||||
testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n', encoding='utf-8')
|
||||
(tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.yaml')
|
||||
|
||||
@@ -375,8 +375,8 @@ def test_load_subconf_include_relative(make_config_path, tmp_path, location):
|
||||
config = make_config_path()
|
||||
|
||||
testfile = config.config_dir / 'test.yaml'
|
||||
testfile.write_text('base: !include inc.yaml\n')
|
||||
(getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
|
||||
testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
|
||||
(getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.yaml')
|
||||
|
||||
@@ -387,8 +387,8 @@ def test_load_subconf_include_bad_format(make_config_path):
|
||||
config = make_config_path()
|
||||
|
||||
testfile = config.config_dir / 'test.yaml'
|
||||
testfile.write_text('base: !include inc.txt\n')
|
||||
(config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n')
|
||||
testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
|
||||
(config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n', encoding='utf-8')
|
||||
|
||||
with pytest.raises(UsageError, match='Cannot handle config file format.'):
|
||||
config.load_sub_configuration('test.yaml')
|
||||
@@ -398,7 +398,7 @@ def test_load_subconf_include_not_found(make_config_path):
|
||||
config = make_config_path()
|
||||
|
||||
testfile = config.config_dir / 'test.yaml'
|
||||
testfile.write_text('base: !include inc.txt\n')
|
||||
testfile.write_text('base: !include inc.txt\n', encoding='utf-8')
|
||||
|
||||
with pytest.raises(UsageError, match='Config file not found.'):
|
||||
config.load_sub_configuration('test.yaml')
|
||||
@@ -408,9 +408,9 @@ def test_load_subconf_include_recursive(make_config_path):
|
||||
config = make_config_path()
|
||||
|
||||
testfile = config.config_dir / 'test.yaml'
|
||||
testfile.write_text('base: !include inc.yaml\n')
|
||||
(config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n')
|
||||
(config.config_dir / 'more.yaml').write_text('- the end\n')
|
||||
testfile.write_text('base: !include inc.yaml\n', encoding='utf-8')
|
||||
(config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n', encoding='utf-8')
|
||||
(config.config_dir / 'more.yaml').write_text('- the end\n', encoding='utf-8')
|
||||
|
||||
rules = config.load_sub_configuration('test.yaml')
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ def test_load_default_module_with_hyphen(test_config):
|
||||
def test_load_plugin_module(test_config, tmp_path):
|
||||
(tmp_path / 'project' / 'testpath').mkdir()
|
||||
(tmp_path / 'project' / 'testpath' / 'mymod.py')\
|
||||
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'")
|
||||
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
|
||||
|
||||
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
|
||||
|
||||
@@ -49,7 +49,7 @@ def test_load_plugin_module(test_config, tmp_path):
|
||||
|
||||
# also test reloading module
|
||||
(tmp_path / 'project' / 'testpath' / 'mymod.py')\
|
||||
.write_text("def my_test_function():\n return 'hjothjorhj'")
|
||||
.write_text("def my_test_function():\n return 'hjothjorhj'", encoding='utf-8')
|
||||
|
||||
module = test_config.load_plugin_module('testpath/mymod.py', 'private.something')
|
||||
|
||||
@@ -61,9 +61,9 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
|
||||
pythonpath = tmp_path / 'priv-python'
|
||||
pythonpath.mkdir()
|
||||
(pythonpath / MODULE_NAME).mkdir()
|
||||
(pythonpath / MODULE_NAME / '__init__.py').write_text('')
|
||||
(pythonpath / MODULE_NAME / '__init__.py').write_text('', encoding='utf-8')
|
||||
(pythonpath / MODULE_NAME / 'tester.py')\
|
||||
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'")
|
||||
.write_text("def my_test_function():\n return 'gjwitlsSG42TG%'", encoding='utf-8')
|
||||
|
||||
monkeypatch.syspath_prepend(pythonpath)
|
||||
|
||||
@@ -73,7 +73,7 @@ def test_load_external_library_module(test_config, tmp_path, monkeypatch):
|
||||
|
||||
# also test reloading module
|
||||
(pythonpath / MODULE_NAME / 'tester.py')\
|
||||
.write_text("def my_test_function():\n return 'dfigjreigj'")
|
||||
.write_text("def my_test_function():\n return 'dfigjreigj'", encoding='utf-8')
|
||||
|
||||
module = test_config.load_plugin_module(f'{MODULE_NAME}.tester', 'private.something')
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
import itertools
|
||||
import sys
|
||||
@@ -17,12 +17,11 @@ SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve()
|
||||
sys.path.insert(0, str(SRC_DIR / 'src'))
|
||||
|
||||
from nominatim_db.config import Configuration
|
||||
from nominatim_db.db import connection
|
||||
from nominatim_db.db import connection, properties
|
||||
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
|
||||
import nominatim_db.tokenizer.factory
|
||||
|
||||
import dummy_tokenizer
|
||||
import mocks
|
||||
from cursor import CursorForTesting
|
||||
|
||||
|
||||
@@ -60,7 +59,7 @@ def temp_db(monkeypatch):
|
||||
|
||||
with psycopg.connect(dbname='postgres', autocommit=True) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('DROP DATABASE IF EXISTS {}'.format(name))
|
||||
cur.execute(pysql.SQL('DROP DATABASE IF EXISTS') + pysql.Identifier(name))
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -104,7 +103,9 @@ def table_factory(temp_db_conn):
|
||||
"""
|
||||
def mk_table(name, definition='id INT', content=None):
|
||||
with psycopg.ClientCursor(temp_db_conn) as cur:
|
||||
cur.execute('CREATE TABLE {} ({})'.format(name, definition))
|
||||
cur.execute(pysql.SQL("CREATE TABLE {} ({})")
|
||||
.format(pysql.Identifier(name),
|
||||
pysql.SQL(definition)))
|
||||
if content:
|
||||
sql = pysql.SQL("INSERT INTO {} VALUES ({})")\
|
||||
.format(pysql.Identifier(name),
|
||||
@@ -130,28 +131,49 @@ def project_env(tmp_path):
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def property_table(table_factory, temp_db_conn):
|
||||
table_factory('nominatim_properties', 'property TEXT, value TEXT')
|
||||
|
||||
return mocks.MockPropertyTable(temp_db_conn)
|
||||
def country_table(table_factory):
|
||||
table_factory('country_name', 'partition INT, country_code varchar(2), name hstore')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def status_table(table_factory):
|
||||
def country_row(country_table, temp_db_cursor):
|
||||
def _add(partition=None, country=None, names=None):
|
||||
temp_db_cursor.insert_row('country_name', partition=partition,
|
||||
country_code=country, name=names)
|
||||
|
||||
return _add
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def load_sql(temp_db_conn, country_row):
|
||||
proc = SQLPreprocessor(temp_db_conn, Configuration(None))
|
||||
|
||||
def _run(filename, **kwargs):
|
||||
proc.run_sql_file(temp_db_conn, filename, **kwargs)
|
||||
|
||||
return _run
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def property_table(load_sql, temp_db_conn):
|
||||
load_sql('tables/nominatim_properties.sql')
|
||||
|
||||
class _PropTable:
|
||||
def set(self, name, value):
|
||||
properties.set_property(temp_db_conn, name, value)
|
||||
|
||||
def get(self, name):
|
||||
return properties.get_property(temp_db_conn, name)
|
||||
|
||||
return _PropTable()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def status_table(load_sql):
|
||||
""" Create an empty version of the status table and
|
||||
the status logging table.
|
||||
"""
|
||||
table_factory('import_status',
|
||||
"""lastimportdate timestamp with time zone NOT NULL,
|
||||
sequence_id integer,
|
||||
indexed boolean""")
|
||||
table_factory('import_osmosis_log',
|
||||
"""batchend timestamp,
|
||||
batchseq integer,
|
||||
batchsize bigint,
|
||||
starttime timestamp,
|
||||
endtime timestamp,
|
||||
event text""")
|
||||
load_sql('tables/status.sql')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -176,12 +198,14 @@ def place_row(place_table, temp_db_cursor):
|
||||
prerequisite to the fixture.
|
||||
"""
|
||||
idseq = itertools.count(1001)
|
||||
|
||||
def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
|
||||
admin_level=None, address=None, extratags=None, geom=None):
|
||||
temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
|
||||
(osm_id or next(idseq), osm_type, cls, typ, names,
|
||||
admin_level, address, extratags,
|
||||
geom or 'SRID=4326;POINT(0 0)'))
|
||||
admin_level=None, address=None, extratags=None, geom='POINT(0 0)'):
|
||||
args = {'osm_type': osm_type, 'osm_id': osm_id or next(idseq),
|
||||
'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level,
|
||||
'address': address, 'extratags': extratags,
|
||||
'geometry': _with_srid(geom)}
|
||||
temp_db_cursor.insert_row('place', **args)
|
||||
|
||||
return _insert
|
||||
|
||||
@@ -201,50 +225,104 @@ def place_postcode_table(temp_db_with_extensions, table_factory):
|
||||
|
||||
@pytest.fixture
|
||||
def place_postcode_row(place_postcode_table, temp_db_cursor):
|
||||
""" A factory for rows in the place table. The table is created as a
|
||||
""" A factory for rows in the place_postcode table. The table is created as a
|
||||
prerequisite to the fixture.
|
||||
"""
|
||||
idseq = itertools.count(5001)
|
||||
|
||||
def _insert(osm_type='N', osm_id=None, postcode=None, country=None,
|
||||
centroid=None, geom=None):
|
||||
temp_db_cursor.execute("INSERT INTO place_postcode VALUES (%s, %s, %s, %s, %s, %s)",
|
||||
(osm_type, osm_id or next(idseq),
|
||||
postcode, country,
|
||||
_with_srid(centroid, 'POINT(12.0 4.0)'),
|
||||
_with_srid(geom)))
|
||||
centroid='POINT(12.0 4.0)', geom=None):
|
||||
temp_db_cursor.insert_row('place_postcode',
|
||||
osm_type=osm_type, osm_id=osm_id or next(idseq),
|
||||
postcode=postcode, country_code=country,
|
||||
centroid=_with_srid(centroid),
|
||||
geometry=_with_srid(geom))
|
||||
|
||||
return _insert
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def placex_table(temp_db_with_extensions, temp_db_conn):
|
||||
""" Create an empty version of the place table.
|
||||
def placex_table(temp_db_with_extensions, temp_db_conn, load_sql, place_table):
|
||||
""" Create an empty version of the placex table.
|
||||
"""
|
||||
return mocks.MockPlacexTable(temp_db_conn)
|
||||
load_sql('tables/placex.sql')
|
||||
temp_db_conn.execute("CREATE SEQUENCE IF NOT EXISTS seq_place START 1")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def osmline_table(temp_db_with_extensions, table_factory):
|
||||
table_factory('location_property_osmline',
|
||||
"""place_id BIGINT,
|
||||
osm_id BIGINT,
|
||||
parent_place_id BIGINT,
|
||||
geometry_sector INTEGER,
|
||||
indexed_date TIMESTAMP,
|
||||
startnumber INTEGER,
|
||||
endnumber INTEGER,
|
||||
partition SMALLINT,
|
||||
indexed_status SMALLINT,
|
||||
linegeo GEOMETRY,
|
||||
interpolationtype TEXT,
|
||||
address HSTORE,
|
||||
postcode TEXT,
|
||||
country_code VARCHAR(2)""")
|
||||
def placex_row(placex_table, temp_db_cursor):
|
||||
""" A factory for rows in the placex table. The table is created as a
|
||||
prerequisite to the fixture.
|
||||
"""
|
||||
idseq = itertools.count(1001)
|
||||
|
||||
def _add(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
|
||||
admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
|
||||
country=None, housenumber=None, rank_search=30, rank_address=30,
|
||||
centroid='POINT(10 4)', indexed_status=0, indexed_date=None):
|
||||
args = {'place_id': pysql.SQL("nextval('seq_place')"),
|
||||
'osm_type': osm_type, 'osm_id': osm_id or next(idseq),
|
||||
'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level,
|
||||
'address': address, 'housenumber': housenumber,
|
||||
'rank_search': rank_search, 'rank_address': rank_address,
|
||||
'extratags': extratags,
|
||||
'centroid': _with_srid(centroid), 'geometry': _with_srid(geom),
|
||||
'country_code': country,
|
||||
'indexed_status': indexed_status, 'indexed_date': indexed_date,
|
||||
'partition': pysql.Literal(0), 'geometry_sector': pysql.Literal(1)}
|
||||
return temp_db_cursor.insert_row('placex', **args)
|
||||
|
||||
return _add
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions):
|
||||
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
|
||||
def osmline_table(temp_db_with_extensions, load_sql):
|
||||
load_sql('tables/interpolation.sql')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def osmline_row(osmline_table, temp_db_cursor):
|
||||
idseq = itertools.count(20001)
|
||||
|
||||
def _add(osm_id=None, geom='LINESTRING(12.0 11.0, 12.003 11.0)'):
|
||||
return temp_db_cursor.insert_row(
|
||||
'location_property_osmline',
|
||||
place_id=pysql.SQL("nextval('seq_place')"),
|
||||
osm_id=osm_id or next(idseq),
|
||||
geometry_sector=pysql.Literal(20),
|
||||
partition=pysql.Literal(0),
|
||||
indexed_status=1,
|
||||
linegeo=_with_srid(geom))
|
||||
|
||||
return _add
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def postcode_table(temp_db_with_extensions, load_sql):
|
||||
load_sql('tables/postcodes.sql')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def postcode_row(postcode_table, temp_db_cursor):
|
||||
def _add(country, postcode, x=34.5, y=-9.33):
|
||||
geom = _with_srid(f"POINT({x} {y})")
|
||||
return temp_db_cursor.insert_row(
|
||||
'location_postcodes',
|
||||
place_id=pysql.SQL("nextval('seq_place')"),
|
||||
indexed_status=pysql.Literal(1),
|
||||
country_code=country, postcode=postcode,
|
||||
centroid=geom,
|
||||
rank_search=pysql.Literal(16),
|
||||
geometry=('ST_Expand(%s::geometry, 0.005)', geom))
|
||||
|
||||
return _add
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions, country_row):
|
||||
for part in range(3):
|
||||
country_row(partition=part)
|
||||
|
||||
cfg = Configuration(None)
|
||||
cfg.set_libdirs(sql=tmp_path)
|
||||
return cfg
|
||||
|
||||
@@ -2,12 +2,13 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Specialised psycopg cursor with shortcut functions useful for testing.
|
||||
"""
|
||||
import psycopg
|
||||
from psycopg import sql as pysql
|
||||
|
||||
|
||||
class CursorForTesting(psycopg.Cursor):
|
||||
@@ -52,7 +53,49 @@ class CursorForTesting(psycopg.Cursor):
|
||||
def table_rows(self, table, where=None):
|
||||
""" Return the number of rows in the given table.
|
||||
"""
|
||||
if where is None:
|
||||
return self.scalar('SELECT count(*) FROM ' + table)
|
||||
sql = pysql.SQL('SELECT count(*) FROM') + pysql.Identifier(table)
|
||||
if where is not None:
|
||||
sql += pysql.SQL('WHERE') + pysql.SQL(where)
|
||||
|
||||
return self.scalar('SELECT count(*) FROM {} WHERE {}'.format(table, where))
|
||||
return self.scalar(sql)
|
||||
|
||||
def insert_row(self, table, **data):
|
||||
""" Insert a row into the given table.
|
||||
|
||||
'data' is a dictionary of column names and associated values.
|
||||
When the value is a pysql.Literal or pysql.SQL, then the expression
|
||||
will be inserted as is instead of loading the value. When the
|
||||
value is a tuple, then the first element will be added as an
|
||||
SQL expression for the value and the second element is treated
|
||||
as the actual value to insert. The SQL expression must contain
|
||||
a %s placeholder in that case.
|
||||
|
||||
If data contains a 'place_id' column, then the value of the
|
||||
place_id column after insert is returned. Otherwise the function
|
||||
returns nothing.
|
||||
"""
|
||||
columns = []
|
||||
placeholders = []
|
||||
values = []
|
||||
for k, v in data.items():
|
||||
columns.append(pysql.Identifier(k))
|
||||
if isinstance(v, tuple):
|
||||
placeholders.append(pysql.SQL(v[0]))
|
||||
values.append(v[1])
|
||||
elif isinstance(v, (pysql.Literal, pysql.SQL)):
|
||||
placeholders.append(v)
|
||||
else:
|
||||
placeholders.append(pysql.Placeholder())
|
||||
values.append(v)
|
||||
|
||||
sql = pysql.SQL("INSERT INTO {table} ({columns}) VALUES({values})")\
|
||||
.format(table=pysql.Identifier(table),
|
||||
columns=pysql.SQL(',').join(columns),
|
||||
values=pysql.SQL(',').join(placeholders))
|
||||
|
||||
if 'place_id' in data:
|
||||
sql += pysql.SQL('RETURNING place_id')
|
||||
|
||||
self.execute(sql, values)
|
||||
|
||||
return self.fetchone()[0] if 'place_id' in data else None
|
||||
|
||||
@@ -22,7 +22,8 @@ def loaded_country(def_config):
|
||||
def env_with_country_config(project_env):
|
||||
|
||||
def _mk_config(cfg):
|
||||
(project_env.project_dir / 'country_settings.yaml').write_text(dedent(cfg))
|
||||
(project_env.project_dir / 'country_settings.yaml').write_text(
|
||||
dedent(cfg), encoding='utf-8')
|
||||
|
||||
return project_env
|
||||
|
||||
@@ -52,11 +53,10 @@ def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cur
|
||||
|
||||
@pytest.mark.parametrize("languages", (None, ['fr', 'en']))
|
||||
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
|
||||
table_factory, tokenizer_mock, languages, loaded_country):
|
||||
|
||||
table_factory('country_name', 'country_code varchar(2), name hstore',
|
||||
content=(('us', '"name"=>"us1","name:af"=>"us2"'),
|
||||
('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
|
||||
country_row, tokenizer_mock, languages, loaded_country):
|
||||
temp_db_cursor.execute('TRUNCATE country_name')
|
||||
country_row(country='us', names={"name": "us1", "name:af": "us2"})
|
||||
country_row(country='fr', names={"name": "Fra", "name:en": "Fren"})
|
||||
|
||||
assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
|
||||
|
||||
|
||||
@@ -22,7 +22,7 @@ def sql_factory(tmp_path):
|
||||
BEGIN
|
||||
{}
|
||||
END;
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body))
|
||||
$$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body), encoding='utf-8')
|
||||
return 'test.sql'
|
||||
|
||||
return _mk_sql
|
||||
@@ -63,7 +63,7 @@ def test_load_file_with_params(sql_preprocessor, sql_factory, temp_db_conn, temp
|
||||
async def test_load_parallel_file(dsn, sql_preprocessor, tmp_path, temp_db_cursor):
|
||||
(tmp_path / 'test.sql').write_text("""
|
||||
CREATE TABLE foo (a TEXT);
|
||||
CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);")
|
||||
CREATE TABLE foo2(a TEXT);""" + "\n---\nCREATE TABLE bar (b INT);", encoding='utf-8')
|
||||
|
||||
await sql_preprocessor.run_parallel_sql_file(dsn, 'test.sql', num_threads=4)
|
||||
|
||||
|
||||
@@ -15,7 +15,8 @@ from nominatim_db.errors import UsageError
|
||||
|
||||
def test_execute_file_success(dsn, temp_db_cursor, tmp_path):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);')
|
||||
tmpfile.write_text(
|
||||
'CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);', encoding='utf-8')
|
||||
|
||||
db_utils.execute_file(dsn, tmpfile)
|
||||
|
||||
@@ -29,7 +30,7 @@ def test_execute_file_bad_file(dsn, tmp_path):
|
||||
|
||||
def test_execute_file_bad_sql(dsn, tmp_path):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE STABLE test (id INT)')
|
||||
tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
db_utils.execute_file(dsn, tmpfile)
|
||||
@@ -37,14 +38,14 @@ def test_execute_file_bad_sql(dsn, tmp_path):
|
||||
|
||||
def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE STABLE test (id INT)')
|
||||
tmpfile.write_text('CREATE STABLE test (id INT)', encoding='utf-8')
|
||||
|
||||
db_utils.execute_file(dsn, tmpfile, ignore_errors=True)
|
||||
|
||||
|
||||
def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('INSERT INTO test VALUES(4)')
|
||||
tmpfile.write_text('INSERT INTO test VALUES(4)', encoding='utf-8')
|
||||
|
||||
db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)')
|
||||
|
||||
@@ -53,7 +54,7 @@ def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor):
|
||||
|
||||
def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor):
|
||||
tmpfile = tmp_path / 'test.sql'
|
||||
tmpfile.write_text('CREATE TABLE test (id INT)')
|
||||
tmpfile.write_text('CREATE TABLE test (id INT)', encoding='utf-8')
|
||||
|
||||
db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)')
|
||||
|
||||
|
||||
@@ -1,13 +1,12 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for running the indexing.
|
||||
"""
|
||||
import itertools
|
||||
|
||||
import pytest
|
||||
import pytest_asyncio # noqa
|
||||
|
||||
@@ -15,129 +14,57 @@ from nominatim_db.indexer import indexer
|
||||
from nominatim_db.tokenizer import factory
|
||||
|
||||
|
||||
class IndexerTestDB:
|
||||
class TestIndexing:
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self, temp_db_conn, project_env, tokenizer_mock,
|
||||
placex_table, postcode_table, osmline_table):
|
||||
self.conn = temp_db_conn
|
||||
temp_db_conn.execute("""
|
||||
CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
|
||||
NEW.indexed_date = now();
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END; $$ LANGUAGE plpgsql;
|
||||
|
||||
def __init__(self, conn):
|
||||
self.placex_id = itertools.count(100000)
|
||||
self.osmline_id = itertools.count(500000)
|
||||
self.postcode_id = itertools.count(700000)
|
||||
DROP TYPE IF EXISTS prepare_update_info CASCADE;
|
||||
CREATE TYPE prepare_update_info AS (
|
||||
name HSTORE,
|
||||
address HSTORE,
|
||||
rank_address SMALLINT,
|
||||
country_code TEXT,
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
linked_place_id BIGINT
|
||||
);
|
||||
CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
|
||||
OUT result prepare_update_info) AS $$
|
||||
BEGIN
|
||||
result.address := p.address;
|
||||
result.name := p.name;
|
||||
result.class := p.class;
|
||||
result.type := p.type;
|
||||
result.country_code := p.country_code;
|
||||
result.rank_address := p.rank_address;
|
||||
END; $$ LANGUAGE plpgsql STABLE;
|
||||
|
||||
self.conn = conn
|
||||
self.conn.autocimmit = True
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE placex (place_id BIGINT,
|
||||
name HSTORE,
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
linked_place_id BIGINT,
|
||||
rank_address SMALLINT,
|
||||
rank_search SMALLINT,
|
||||
indexed_status SMALLINT,
|
||||
indexed_date TIMESTAMP,
|
||||
partition SMALLINT,
|
||||
admin_level SMALLINT,
|
||||
country_code TEXT,
|
||||
address HSTORE,
|
||||
token_info JSONB,
|
||||
geometry_sector INTEGER)""")
|
||||
cur.execute("""CREATE TABLE location_property_osmline (
|
||||
place_id BIGINT,
|
||||
osm_id BIGINT,
|
||||
address HSTORE,
|
||||
token_info JSONB,
|
||||
indexed_status SMALLINT,
|
||||
indexed_date TIMESTAMP,
|
||||
geometry_sector INTEGER)""")
|
||||
cur.execute("""CREATE TABLE location_postcodes (
|
||||
place_id BIGINT,
|
||||
indexed_status SMALLINT,
|
||||
indexed_date TIMESTAMP,
|
||||
country_code varchar(2),
|
||||
postcode TEXT)""")
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
|
||||
AS $$
|
||||
BEGIN
|
||||
IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
|
||||
NEW.indexed_date = now();
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END; $$ LANGUAGE plpgsql;""")
|
||||
cur.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
|
||||
cur.execute("""CREATE TYPE prepare_update_info AS (
|
||||
name HSTORE,
|
||||
address HSTORE,
|
||||
rank_address SMALLINT,
|
||||
country_code TEXT,
|
||||
class TEXT,
|
||||
type TEXT,
|
||||
linked_place_id BIGINT
|
||||
)""")
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
|
||||
OUT result prepare_update_info)
|
||||
AS $$
|
||||
BEGIN
|
||||
result.address := p.address;
|
||||
result.name := p.name;
|
||||
result.class := p.class;
|
||||
result.type := p.type;
|
||||
result.country_code := p.country_code;
|
||||
result.rank_address := p.rank_address;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE;
|
||||
""")
|
||||
cur.execute("""CREATE OR REPLACE FUNCTION
|
||||
get_interpolation_address(in_address HSTORE, wayid BIGINT)
|
||||
RETURNS HSTORE AS $$
|
||||
BEGIN
|
||||
RETURN in_address;
|
||||
END;
|
||||
$$ LANGUAGE plpgsql STABLE;
|
||||
""")
|
||||
CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
|
||||
RETURNS HSTORE AS $$ SELECT in_address $$ LANGUAGE sql STABLE;
|
||||
""")
|
||||
|
||||
for table in ('placex', 'location_property_osmline', 'location_postcodes'):
|
||||
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
|
||||
FOR EACH ROW EXECUTE PROCEDURE date_update()
|
||||
""".format(table))
|
||||
for table in ('placex', 'location_property_osmline', 'location_postcodes'):
|
||||
temp_db_conn.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
|
||||
FOR EACH ROW EXECUTE PROCEDURE date_update()
|
||||
""".format(table))
|
||||
|
||||
self.tokenizer = factory.create_tokenizer(project_env)
|
||||
|
||||
def scalar(self, query):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(query)
|
||||
return cur.fetchone()[0]
|
||||
|
||||
def add_place(self, cls='place', typ='locality',
|
||||
rank_search=30, rank_address=30, sector=20):
|
||||
next_id = next(self.placex_id)
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO placex
|
||||
(place_id, class, type, rank_search, rank_address,
|
||||
indexed_status, geometry_sector)
|
||||
VALUES (%s, %s, %s, %s, %s, 1, %s)""",
|
||||
(next_id, cls, typ, rank_search, rank_address, sector))
|
||||
return next_id
|
||||
|
||||
def add_admin(self, **kwargs):
|
||||
kwargs['cls'] = 'boundary'
|
||||
kwargs['typ'] = 'administrative'
|
||||
return self.add_place(**kwargs)
|
||||
|
||||
def add_osmline(self, sector=20):
|
||||
next_id = next(self.osmline_id)
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO location_property_osmline
|
||||
(place_id, osm_id, indexed_status, geometry_sector)
|
||||
VALUES (%s, %s, 1, %s)""",
|
||||
(next_id, next_id, sector))
|
||||
return next_id
|
||||
|
||||
def add_postcode(self, country, postcode):
|
||||
next_id = next(self.postcode_id)
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO location_postcodes
|
||||
(place_id, indexed_status, country_code, postcode)
|
||||
VALUES (%s, 1, %s, %s)""",
|
||||
(next_id, country, postcode))
|
||||
return next_id
|
||||
|
||||
def placex_unindexed(self):
|
||||
return self.scalar('SELECT count(*) from placex where indexed_status > 0')
|
||||
|
||||
@@ -145,148 +72,133 @@ class IndexerTestDB:
|
||||
return self.scalar("""SELECT count(*) from location_property_osmline
|
||||
WHERE indexed_status > 0""")
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_all_by_rank(self, dsn, threads, placex_row, osmline_row):
|
||||
for rank in range(31):
|
||||
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
|
||||
osmline_row()
|
||||
|
||||
@pytest.fixture
|
||||
def test_db(temp_db_conn):
|
||||
yield IndexerTestDB(temp_db_conn)
|
||||
assert self.placex_unindexed() == 31
|
||||
assert self.osmline_unindexed() == 1
|
||||
|
||||
idx = indexer.Indexer(dsn, self.tokenizer, threads)
|
||||
await idx.index_by_rank(0, 30)
|
||||
|
||||
@pytest.fixture
|
||||
def test_tokenizer(tokenizer_mock, project_env):
|
||||
return factory.create_tokenizer(project_env)
|
||||
assert self.placex_unindexed() == 0
|
||||
assert self.osmline_unindexed() == 0
|
||||
|
||||
assert self.scalar("""SELECT count(*) from placex
|
||||
WHERE indexed_status = 0 and indexed_date is null""") == 0
|
||||
# ranks come in order of rank address
|
||||
assert self.scalar("""
|
||||
SELECT count(*) FROM placex p WHERE rank_address > 0
|
||||
AND indexed_date >= (SELECT min(indexed_date) FROM placex o
|
||||
WHERE p.rank_address < o.rank_address)""") == 0
|
||||
# placex address ranked objects come before interpolations
|
||||
assert self.scalar(
|
||||
"""SELECT count(*) FROM placex WHERE rank_address > 0
|
||||
AND indexed_date >
|
||||
(SELECT min(indexed_date) FROM location_property_osmline)""") == 0
|
||||
# rank 0 comes after all other placex objects
|
||||
assert self.scalar(
|
||||
"""SELECT count(*) FROM placex WHERE rank_address > 0
|
||||
AND indexed_date >
|
||||
(SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_all_by_rank(test_db, threads, test_tokenizer):
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_partial_without_30(self, dsn, threads, placex_row, osmline_row):
|
||||
for rank in range(31):
|
||||
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
|
||||
osmline_row()
|
||||
|
||||
assert test_db.placex_unindexed() == 31
|
||||
assert test_db.osmline_unindexed() == 1
|
||||
assert self.placex_unindexed() == 31
|
||||
assert self.osmline_unindexed() == 1
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
await idx.index_by_rank(0, 30)
|
||||
idx = indexer.Indexer(dsn, self.tokenizer, threads)
|
||||
await idx.index_by_rank(4, 15)
|
||||
|
||||
assert test_db.placex_unindexed() == 0
|
||||
assert test_db.osmline_unindexed() == 0
|
||||
assert self.placex_unindexed() == 19
|
||||
assert self.osmline_unindexed() == 1
|
||||
|
||||
assert test_db.scalar("""SELECT count(*) from placex
|
||||
WHERE indexed_status = 0 and indexed_date is null""") == 0
|
||||
# ranks come in order of rank address
|
||||
assert test_db.scalar("""
|
||||
SELECT count(*) FROM placex p WHERE rank_address > 0
|
||||
AND indexed_date >= (SELECT min(indexed_date) FROM placex o
|
||||
WHERE p.rank_address < o.rank_address)""") == 0
|
||||
# placex address ranked objects come before interpolations
|
||||
assert test_db.scalar(
|
||||
"""SELECT count(*) FROM placex WHERE rank_address > 0
|
||||
AND indexed_date >
|
||||
(SELECT min(indexed_date) FROM location_property_osmline)""") == 0
|
||||
# rank 0 comes after all other placex objects
|
||||
assert test_db.scalar(
|
||||
"""SELECT count(*) FROM placex WHERE rank_address > 0
|
||||
AND indexed_date >
|
||||
(SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0
|
||||
assert self.scalar("""
|
||||
SELECT count(*) FROM placex
|
||||
WHERE indexed_status = 0 AND not rank_address between 4 and 15""") == 0
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_partial_with_30(self, dsn, threads, placex_row, osmline_row):
|
||||
for rank in range(31):
|
||||
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
|
||||
osmline_row()
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_partial_without_30(test_db, threads, test_tokenizer):
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
assert self.placex_unindexed() == 31
|
||||
assert self.osmline_unindexed() == 1
|
||||
|
||||
assert test_db.placex_unindexed() == 31
|
||||
assert test_db.osmline_unindexed() == 1
|
||||
idx = indexer.Indexer(dsn, self.tokenizer, threads)
|
||||
await idx.index_by_rank(28, 30)
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
|
||||
test_tokenizer, threads)
|
||||
await idx.index_by_rank(4, 15)
|
||||
assert self.placex_unindexed() == 28
|
||||
assert self.osmline_unindexed() == 0
|
||||
|
||||
assert test_db.placex_unindexed() == 19
|
||||
assert test_db.osmline_unindexed() == 1
|
||||
assert self.scalar("""
|
||||
SELECT count(*) FROM placex
|
||||
WHERE indexed_status = 0 AND rank_address between 0 and 27""") == 0
|
||||
|
||||
assert test_db.scalar("""
|
||||
SELECT count(*) FROM placex
|
||||
WHERE indexed_status = 0 AND not rank_address between 4 and 15""") == 0
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_boundaries(self, dsn, threads, placex_row, osmline_row):
|
||||
for rank in range(4, 10):
|
||||
placex_row(cls='boundary', typ='administrative',
|
||||
rank_address=rank, rank_search=rank, indexed_status=1)
|
||||
for rank in range(31):
|
||||
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
|
||||
osmline_row()
|
||||
|
||||
assert self.placex_unindexed() == 37
|
||||
assert self.osmline_unindexed() == 1
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_partial_with_30(test_db, threads, test_tokenizer):
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
idx = indexer.Indexer(dsn, self.tokenizer, threads)
|
||||
await idx.index_boundaries()
|
||||
|
||||
assert test_db.placex_unindexed() == 31
|
||||
assert test_db.osmline_unindexed() == 1
|
||||
assert self.placex_unindexed() == 31
|
||||
assert self.osmline_unindexed() == 1
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
await idx.index_by_rank(28, 30)
|
||||
assert self.scalar("""
|
||||
SELECT count(*) FROM placex
|
||||
WHERE indexed_status = 0 AND class != 'boundary'""") == 0
|
||||
|
||||
assert test_db.placex_unindexed() == 27
|
||||
assert test_db.osmline_unindexed() == 0
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_postcodes(self, dsn, threads, postcode_row):
|
||||
for postcode in range(1000):
|
||||
postcode_row(country='de', postcode=postcode)
|
||||
for postcode in range(32000, 33000):
|
||||
postcode_row(country='us', postcode=postcode)
|
||||
|
||||
assert test_db.scalar("""
|
||||
SELECT count(*) FROM placex
|
||||
WHERE indexed_status = 0 AND rank_address between 1 and 27""") == 0
|
||||
idx = indexer.Indexer(dsn, self.tokenizer, threads)
|
||||
await idx.index_postcodes()
|
||||
|
||||
assert self.scalar("""SELECT count(*) FROM location_postcodes
|
||||
WHERE indexed_status != 0""") == 0
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_boundaries(test_db, threads, test_tokenizer):
|
||||
for rank in range(4, 10):
|
||||
test_db.add_admin(rank_address=rank, rank_search=rank)
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
@pytest.mark.parametrize("analyse", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_full(self, dsn, analyse, placex_row, osmline_row, postcode_row):
|
||||
for rank in range(4, 10):
|
||||
placex_row(cls='boundary', typ='administrative',
|
||||
rank_address=rank, rank_search=rank, indexed_status=1)
|
||||
for rank in range(31):
|
||||
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
|
||||
osmline_row()
|
||||
for postcode in range(1000):
|
||||
postcode_row(country='de', postcode=postcode)
|
||||
|
||||
assert test_db.placex_unindexed() == 37
|
||||
assert test_db.osmline_unindexed() == 1
|
||||
idx = indexer.Indexer(dsn, self.tokenizer, 4)
|
||||
await idx.index_full(analyse=analyse)
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
await idx.index_boundaries(0, 30)
|
||||
|
||||
assert test_db.placex_unindexed() == 31
|
||||
assert test_db.osmline_unindexed() == 1
|
||||
|
||||
assert test_db.scalar("""
|
||||
SELECT count(*) FROM placex
|
||||
WHERE indexed_status = 0 AND class != 'boundary'""") == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", [1, 15])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_postcodes(test_db, threads, test_tokenizer):
|
||||
for postcode in range(1000):
|
||||
test_db.add_postcode('de', postcode)
|
||||
for postcode in range(32000, 33000):
|
||||
test_db.add_postcode('us', postcode)
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||
await idx.index_postcodes()
|
||||
|
||||
assert test_db.scalar("""SELECT count(*) FROM location_postcodes
|
||||
WHERE indexed_status != 0""") == 0
|
||||
|
||||
|
||||
@pytest.mark.parametrize("analyse", [True, False])
|
||||
@pytest.mark.asyncio
|
||||
async def test_index_full(test_db, analyse, test_tokenizer):
|
||||
for rank in range(4, 10):
|
||||
test_db.add_admin(rank_address=rank, rank_search=rank)
|
||||
for rank in range(31):
|
||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||
test_db.add_osmline()
|
||||
for postcode in range(1000):
|
||||
test_db.add_postcode('de', postcode)
|
||||
|
||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
|
||||
await idx.index_full(analyse=analyse)
|
||||
|
||||
assert test_db.placex_unindexed() == 0
|
||||
assert test_db.osmline_unindexed() == 0
|
||||
assert test_db.scalar("""SELECT count(*) FROM location_postcodes
|
||||
WHERE indexed_status != 0""") == 0
|
||||
assert self.placex_unindexed() == 0
|
||||
assert self.osmline_unindexed() == 0
|
||||
assert self.scalar("""SELECT count(*) FROM location_postcodes
|
||||
WHERE indexed_status != 0""") == 0
|
||||
|
||||
@@ -1,85 +0,0 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Custom mocks for testing.
|
||||
"""
|
||||
import itertools
|
||||
|
||||
from nominatim_db.db import properties
|
||||
|
||||
|
||||
class MockPlacexTable:
|
||||
""" A placex table for testing.
|
||||
"""
|
||||
def __init__(self, conn):
|
||||
self.idseq = itertools.count(10000)
|
||||
self.conn = conn
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE placex (
|
||||
place_id BIGINT,
|
||||
parent_place_id BIGINT,
|
||||
linked_place_id BIGINT,
|
||||
importance FLOAT,
|
||||
indexed_date TIMESTAMP,
|
||||
geometry_sector INTEGER,
|
||||
rank_address SMALLINT,
|
||||
rank_search SMALLINT,
|
||||
partition SMALLINT,
|
||||
indexed_status SMALLINT,
|
||||
osm_id int8,
|
||||
osm_type char(1),
|
||||
class text,
|
||||
type text,
|
||||
name hstore,
|
||||
admin_level smallint,
|
||||
address hstore,
|
||||
extratags hstore,
|
||||
token_info jsonb,
|
||||
geometry Geometry(Geometry,4326),
|
||||
wikipedia TEXT,
|
||||
country_code varchar(2),
|
||||
housenumber TEXT,
|
||||
postcode TEXT,
|
||||
centroid GEOMETRY(Geometry, 4326))""")
|
||||
cur.execute("CREATE SEQUENCE IF NOT EXISTS seq_place")
|
||||
conn.commit()
|
||||
|
||||
def add(self, osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
|
||||
admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
|
||||
country=None, housenumber=None, rank_search=30, centroid=None):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO placex (place_id, osm_type, osm_id, class,
|
||||
type, name, admin_level, address,
|
||||
housenumber, rank_search,
|
||||
extratags, centroid, geometry, country_code)
|
||||
VALUES(nextval('seq_place'), %s, %s, %s, %s, %s, %s,
|
||||
%s, %s, %s, %s, %s, %s, %s)
|
||||
RETURNING place_id""",
|
||||
(osm_type, osm_id or next(self.idseq), cls, typ, names,
|
||||
admin_level, address, housenumber, rank_search,
|
||||
extratags, centroid, 'SRID=4326;' + geom,
|
||||
country))
|
||||
place_id = cur.fetchone()[0]
|
||||
self.conn.commit()
|
||||
return place_id
|
||||
|
||||
|
||||
class MockPropertyTable:
|
||||
""" A property table for testing.
|
||||
"""
|
||||
def __init__(self, conn):
|
||||
self.conn = conn
|
||||
|
||||
def set(self, name, value):
|
||||
""" Set a property in the table to the given value.
|
||||
"""
|
||||
properties.set_property(self.conn, name, value)
|
||||
|
||||
def get(self, name):
|
||||
""" Set a property in the table to the given value.
|
||||
"""
|
||||
return properties.get_property(self.conn, name)
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for ICU tokenizer.
|
||||
@@ -15,7 +15,6 @@ import pytest
|
||||
from nominatim_db.tokenizer import icu_tokenizer
|
||||
import nominatim_db.tokenizer.icu_rule_loader
|
||||
from nominatim_db.db import properties
|
||||
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
|
||||
from nominatim_db.data.place_info import PlaceInfo
|
||||
|
||||
from mock_icu_word_table import MockIcuWordTable
|
||||
@@ -31,7 +30,7 @@ def test_config(project_env, tmp_path):
|
||||
sqldir = tmp_path / 'sql'
|
||||
sqldir.mkdir()
|
||||
(sqldir / 'tokenizer').mkdir()
|
||||
(sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'")
|
||||
(sqldir / 'tokenizer' / 'icu_tokenizer.sql').write_text("SELECT 'a'", encoding='utf-8')
|
||||
|
||||
project_env.lib_dir.sql = sqldir
|
||||
|
||||
@@ -58,7 +57,7 @@ def db_prop(temp_db_conn):
|
||||
def analyzer(tokenizer_factory, test_config, monkeypatch,
|
||||
temp_db_with_extensions, tmp_path):
|
||||
sql = tmp_path / 'sql' / 'tokenizer' / 'icu_tokenizer.sql'
|
||||
sql.write_text("SELECT 'a';")
|
||||
sql.write_text("SELECT 'a';", encoding='utf-8')
|
||||
|
||||
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', ':: lower();')
|
||||
tok = tokenizer_factory()
|
||||
@@ -80,7 +79,8 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
|
||||
if with_postcode:
|
||||
cfgstr['token-analysis'].append({'id': '@postcode',
|
||||
'analyzer': 'postcodes'})
|
||||
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(yaml.dump(cfgstr))
|
||||
(test_config.project_dir / 'icu_tokenizer.yaml').write_text(
|
||||
yaml.dump(cfgstr), encoding='utf-8')
|
||||
tok.loader = nominatim_db.tokenizer.icu_rule_loader.ICURuleLoader(test_config)
|
||||
|
||||
return tok.name_analyzer()
|
||||
@@ -89,13 +89,9 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sql_functions(temp_db_conn, def_config, src_dir):
|
||||
orig_sql = def_config.lib_dir.sql
|
||||
def_config.lib_dir.sql = src_dir / 'lib-sql'
|
||||
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
|
||||
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
|
||||
sqlproc.run_sql_file(temp_db_conn, 'tokenizer/icu_tokenizer.sql')
|
||||
def_config.lib_dir.sql = orig_sql
|
||||
def sql_functions(load_sql):
|
||||
load_sql('functions/utils.sql')
|
||||
load_sql('tokenizer/icu_tokenizer.sql')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -190,7 +186,7 @@ def test_update_sql_functions(db_prop, temp_db_cursor,
|
||||
table_factory('test', 'txt TEXT')
|
||||
|
||||
func_file = test_config.lib_dir.sql / 'tokenizer' / 'icu_tokenizer.sql'
|
||||
func_file.write_text("""INSERT INTO test VALUES (1133)""")
|
||||
func_file.write_text("""INSERT INTO test VALUES (1133)""", encoding='utf-8')
|
||||
|
||||
tok.update_sql_functions(test_config)
|
||||
|
||||
@@ -652,22 +648,21 @@ class TestUpdateWordTokens:
|
||||
self.tok.update_word_tokens()
|
||||
assert word_table.count_housenumbers() == 1
|
||||
|
||||
def test_keep_housenumbers_from_placex_table(self, add_housenumber, word_table,
|
||||
placex_table):
|
||||
def test_keep_housenumbers_from_placex_table(self, add_housenumber, word_table, placex_row):
|
||||
add_housenumber(9999, '5432a')
|
||||
add_housenumber(9990, '34z')
|
||||
placex_table.add(housenumber='34z')
|
||||
placex_table.add(housenumber='25432a')
|
||||
placex_row(housenumber='34z')
|
||||
placex_row(housenumber='25432a')
|
||||
|
||||
assert word_table.count_housenumbers() == 2
|
||||
self.tok.update_word_tokens()
|
||||
assert word_table.count_housenumbers() == 1
|
||||
|
||||
def test_keep_housenumbers_from_placex_table_hnr_list(self, add_housenumber,
|
||||
word_table, placex_table):
|
||||
word_table, placex_row):
|
||||
add_housenumber(9991, '9 b')
|
||||
add_housenumber(9990, '34z')
|
||||
placex_table.add(housenumber='9 a;9 b;9 c')
|
||||
placex_row(housenumber='9 a;9 b;9 c')
|
||||
|
||||
assert word_table.count_housenumbers() == 2
|
||||
self.tok.update_word_tokens()
|
||||
|
||||
@@ -27,7 +27,8 @@ class TestIcuRuleLoader:
|
||||
self.project_env = project_env
|
||||
|
||||
def write_config(self, content):
|
||||
(self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(dedent(content))
|
||||
(self.project_env.project_dir / 'icu_tokenizer.yaml').write_text(
|
||||
dedent(content), encoding='utf-8')
|
||||
|
||||
def config_rules(self, *variants):
|
||||
content = dedent("""\
|
||||
@@ -119,7 +120,7 @@ class TestIcuRuleLoader:
|
||||
variants:
|
||||
""")
|
||||
transpath = self.project_env.project_dir / ('transliteration.yaml')
|
||||
transpath.write_text('- "x > y"')
|
||||
transpath.write_text('- "x > y"', encoding='utf-8')
|
||||
|
||||
loader = ICURuleLoader(self.project_env)
|
||||
rules = loader.get_transliteration_rules()
|
||||
|
||||
@@ -21,7 +21,7 @@ if [ "$*" = "--version" ]; then
|
||||
else
|
||||
echo "$@"
|
||||
fi
|
||||
""")
|
||||
""", encoding='utf-8')
|
||||
osm2pgsql_exec.chmod(0o777)
|
||||
|
||||
return dict(osm2pgsql=str(osm2pgsql_exec),
|
||||
|
||||
@@ -2,17 +2,17 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for maintenance and analysis functions.
|
||||
"""
|
||||
import pytest
|
||||
import datetime as dt
|
||||
|
||||
from nominatim_db.errors import UsageError
|
||||
from nominatim_db.tools import admin
|
||||
from nominatim_db.tokenizer import factory
|
||||
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
@@ -61,15 +61,14 @@ def test_analyse_indexing_unknown_osmid(project_env):
|
||||
admin.analyse_indexing(project_env, osm_id='W12345674')
|
||||
|
||||
|
||||
def test_analyse_indexing_with_place_id(project_env, temp_db_cursor):
|
||||
temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)")
|
||||
def test_analyse_indexing_with_place_id(project_env, placex_row):
|
||||
place_id = placex_row()
|
||||
|
||||
admin.analyse_indexing(project_env, place_id=12345)
|
||||
admin.analyse_indexing(project_env, place_id=place_id)
|
||||
|
||||
|
||||
def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
|
||||
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id)
|
||||
VALUES(9988, 'N', 10000)""")
|
||||
def test_analyse_indexing_with_osm_id(project_env, placex_row):
|
||||
placex_row(osm_type='N', osm_id=10000)
|
||||
|
||||
admin.analyse_indexing(project_env, osm_id='N10000')
|
||||
|
||||
@@ -77,8 +76,8 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
|
||||
class TestAdminCleanDeleted:
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup_polygon_delete(self, project_env, table_factory, place_table,
|
||||
osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
|
||||
def setup_polygon_delete(self, project_env, table_factory, place_table, placex_row,
|
||||
osmline_table, temp_db_cursor, load_sql):
|
||||
""" Set up place_force_delete function and related tables
|
||||
"""
|
||||
self.project_env = project_env
|
||||
@@ -91,12 +90,15 @@ class TestAdminCleanDeleted:
|
||||
((100, 'N', 'boundary', 'administrative'),
|
||||
(145, 'N', 'boundary', 'administrative'),
|
||||
(175, 'R', 'landcover', 'grass')))
|
||||
temp_db_cursor.execute("""
|
||||
INSERT INTO placex (place_id, osm_id, osm_type, class, type,
|
||||
indexed_date, indexed_status)
|
||||
VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
|
||||
(2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
|
||||
(3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
|
||||
|
||||
now = dt.datetime.now()
|
||||
placex_row(osm_type='N', osm_id=100, cls='boundary', typ='administrative',
|
||||
indexed_status=1, indexed_date=now - dt.timedelta(days=30))
|
||||
placex_row(osm_type='N', osm_id=145, cls='boundary', typ='administrative',
|
||||
indexed_status=1, indexed_date=now - dt.timedelta(days=90))
|
||||
placex_row(osm_type='R', osm_id=175, cls='landcover', typ='grass',
|
||||
indexed_status=1, indexed_date=now - dt.timedelta(days=90))
|
||||
|
||||
# set up tables and triggers for utils function
|
||||
table_factory('place_to_be_deleted',
|
||||
"""osm_id BIGINT,
|
||||
@@ -104,7 +106,6 @@ class TestAdminCleanDeleted:
|
||||
class TEXT NOT NULL,
|
||||
type TEXT NOT NULL,
|
||||
deferred BOOLEAN""")
|
||||
table_factory('country_name', 'partition INT')
|
||||
table_factory('import_polygon_error', """osm_id BIGINT,
|
||||
osm_type CHAR(1),
|
||||
class TEXT NOT NULL,
|
||||
@@ -115,11 +116,7 @@ class TestAdminCleanDeleted:
|
||||
$$ LANGUAGE plpgsql;""")
|
||||
temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
|
||||
FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
|
||||
orig_sql = def_config.lib_dir.sql
|
||||
def_config.lib_dir.sql = src_dir / 'lib-sql'
|
||||
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
|
||||
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
|
||||
def_config.lib_dir.sql = orig_sql
|
||||
load_sql('functions/utils.sql')
|
||||
|
||||
def test_admin_clean_deleted_no_records(self):
|
||||
admin.clean_deleted_relations(self.project_env, age='1 year')
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for functions to import a new database.
|
||||
@@ -25,12 +25,14 @@ class TestDatabaseSetup:
|
||||
def setup_nonexistant_db(self):
|
||||
with psycopg.connect(dbname='postgres', autocommit=True) as conn:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
|
||||
cur.execute(pysql.SQL('DROP DATABASE IF EXISTS ')
|
||||
+ pysql.Identifier(self.DBNAME))
|
||||
|
||||
yield True
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(f'DROP DATABASE IF EXISTS {self.DBNAME}')
|
||||
cur.execute(pysql.SQL('DROP DATABASE IF EXISTS ')
|
||||
+ pysql.Identifier(self.DBNAME))
|
||||
|
||||
@pytest.fixture
|
||||
def cursor(self):
|
||||
@@ -62,7 +64,7 @@ class TestDatabaseSetup:
|
||||
def test_create_db_missing_ro_user(self):
|
||||
with pytest.raises(UsageError, match='Missing read-only user.'):
|
||||
database_import.setup_database_skeleton(f'dbname={self.DBNAME}',
|
||||
rouser='sdfwkjkjgdugu2;jgsafkljas;')
|
||||
rouser='sdfwkjkjgdugu2jgsafkljas')
|
||||
|
||||
def test_setup_extensions_old_postgis(self, monkeypatch):
|
||||
monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50))
|
||||
@@ -96,7 +98,7 @@ def test_import_osm_data_multifile(table_factory, tmp_path, osm2pgsql_options, c
|
||||
|
||||
files = [tmp_path / 'file1.osm', tmp_path / 'file2.osm']
|
||||
for f in files:
|
||||
f.write_text('test')
|
||||
f.write_text('test', encoding='utf-8')
|
||||
|
||||
database_import.import_osm_data(files, osm2pgsql_options)
|
||||
captured = capfd.readouterr()
|
||||
@@ -124,7 +126,7 @@ def test_import_osm_data_drop(table_factory, temp_db_cursor, tmp_path, osm2pgsql
|
||||
table_factory('planet_osm_nodes')
|
||||
|
||||
flatfile = tmp_path / 'flatfile'
|
||||
flatfile.write_text('touch')
|
||||
flatfile.write_text('touch', encoding='utf-8')
|
||||
|
||||
osm2pgsql_options['flatnode_file'] = str(flatfile.resolve())
|
||||
|
||||
@@ -168,14 +170,41 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory, w
|
||||
@pytest.mark.asyncio
|
||||
async def test_load_data(dsn, place_row, placex_table, osmline_table,
|
||||
temp_db_cursor, threads):
|
||||
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
|
||||
temp_db_cursor.execute(pysql.SQL("""CREATE FUNCTION {} (src TEXT)
|
||||
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
|
||||
""").format(pysql.Identifier(func)))
|
||||
for oid in range(100, 130):
|
||||
place_row(osm_id=oid)
|
||||
place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
|
||||
geom='SRID=4326;LINESTRING(0 0, 10 10)')
|
||||
geom='LINESTRING(0 0, 10 10)')
|
||||
|
||||
temp_db_cursor.execute("""
|
||||
CREATE OR REPLACE FUNCTION placex_insert() RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.place_id := nextval('seq_place');
|
||||
NEW.indexed_status := 1;
|
||||
NEW.centroid := ST_Centroid(NEW.geometry);
|
||||
NEW.partition := 0;
|
||||
NEW.geometry_sector := 2424;
|
||||
NEW.rank_address := 30;
|
||||
NEW.rank_search := 30;
|
||||
RETURN NEW;
|
||||
END; $$ LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
|
||||
CREATE OR REPLACE FUNCTION osmline_insert() RETURNS TRIGGER AS $$
|
||||
BEGIN
|
||||
NEW.place_id := nextval('seq_place');
|
||||
IF NEW.indexed_status IS NULL THEN
|
||||
NEW.indexed_status := 1;
|
||||
NEW.partition := 0;
|
||||
NEW.geometry_sector := 2424;
|
||||
END IF;
|
||||
RETURN NEW;
|
||||
END; $$ LANGUAGE plpgsql STABLE PARALLEL SAFE;
|
||||
|
||||
CREATE TRIGGER placex_before_insert BEFORE INSERT ON placex
|
||||
FOR EACH ROW EXECUTE PROCEDURE placex_insert();
|
||||
|
||||
CREATE TRIGGER osmline_before_insert BEFORE INSERT ON location_property_osmline
|
||||
FOR EACH ROW EXECUTE PROCEDURE osmline_insert();
|
||||
""")
|
||||
|
||||
await database_import.load_data(dsn, threads)
|
||||
|
||||
@@ -193,7 +222,7 @@ class TestSetupSQL:
|
||||
self.config = def_config
|
||||
|
||||
def write_sql(self, fname, content):
|
||||
(self.config.lib_dir.sql / fname).write_text(content)
|
||||
(self.config.lib_dir.sql / fname).write_text(content, encoding='utf-8')
|
||||
|
||||
@pytest.mark.parametrize("reverse", [True, False])
|
||||
def test_create_tables(self, temp_db_conn, temp_db_cursor, reverse):
|
||||
@@ -201,6 +230,8 @@ class TestSetupSQL:
|
||||
"""CREATE FUNCTION test() RETURNS bool
|
||||
AS $$ SELECT {{db.reverse_only}} $$ LANGUAGE SQL""")
|
||||
|
||||
self.write_sql('grants.sql', "-- Mock grants file for testing\n")
|
||||
|
||||
database_import.create_tables(temp_db_conn, self.config, reverse)
|
||||
|
||||
temp_db_cursor.scalar('SELECT test()') == reverse
|
||||
|
||||
@@ -54,7 +54,7 @@ def test_drop_flatnode_file_file_already_gone(tmp_path):
|
||||
|
||||
def test_drop_flatnode_file_delete(tmp_path):
|
||||
flatfile = tmp_path / 'flatnode.store'
|
||||
flatfile.write_text('Some content')
|
||||
flatfile.write_text('Some content', encoding="utf-8")
|
||||
|
||||
freeze.drop_flatnode_file(flatfile)
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for import special phrases methods
|
||||
@@ -30,7 +30,7 @@ def xml_wiki_content(src_dir):
|
||||
return the content of the static xml test file.
|
||||
"""
|
||||
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
|
||||
return xml_test_content.read_text()
|
||||
return xml_test_content.read_text(encoding='utf-8')
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -125,9 +125,8 @@ def test_grant_access_to_web_user(temp_db_conn, temp_db_cursor, table_factory,
|
||||
phrase_class, phrase_type)
|
||||
|
||||
|
||||
def test_create_place_classtype_table_and_indexes(
|
||||
temp_db_cursor, def_config, placex_table,
|
||||
sp_importer, temp_db_conn, monkeypatch):
|
||||
def test_create_place_classtype_table_and_indexes(temp_db_cursor, def_config, placex_row,
|
||||
sp_importer, temp_db_conn, monkeypatch):
|
||||
"""
|
||||
Test that _create_place_classtype_table_and_indexes()
|
||||
create the right place_classtype tables and place_id indexes
|
||||
@@ -136,7 +135,7 @@ def test_create_place_classtype_table_and_indexes(
|
||||
"""
|
||||
pairs = set([('class1', 'type1'), ('class2', 'type2')])
|
||||
for pair in pairs:
|
||||
placex_table.add(cls=pair[0], typ=pair[1]) # adding to db
|
||||
placex_row(cls=pair[0], typ=pair[1]) # adding to db
|
||||
sp_importer._create_classtype_table_and_indexes(pairs)
|
||||
temp_db_conn.commit()
|
||||
|
||||
@@ -178,7 +177,7 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
|
||||
|
||||
@pytest.mark.parametrize("should_replace", [(True), (False)])
|
||||
def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer,
|
||||
placex_table, table_factory, tokenizer_mock,
|
||||
placex_row, table_factory, tokenizer_mock,
|
||||
xml_wiki_content, should_replace):
|
||||
"""
|
||||
Check that the main import_phrases() method is well executed.
|
||||
@@ -199,8 +198,8 @@ def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer,
|
||||
type_test = 'zip_line'
|
||||
|
||||
tokenizer = tokenizer_mock()
|
||||
placex_table.add(cls=class_test, typ=type_test) # in db for special phrase filtering
|
||||
placex_table.add(cls='amenity', typ='animal_shelter') # in db for special phrase filtering
|
||||
placex_row(cls=class_test, typ=type_test) # in db for special phrase filtering
|
||||
placex_row(cls='amenity', typ='animal_shelter') # in db for special phrase filtering
|
||||
sp_importer.import_phrases(tokenizer, should_replace)
|
||||
|
||||
assert len(tokenizer.analyser_cache['special_phrases']) == 19
|
||||
@@ -257,7 +256,7 @@ def check_placeid_and_centroid_indexes(temp_db_cursor, phrase_class, phrase_type
|
||||
|
||||
@pytest.mark.parametrize("should_replace", [(True), (False)])
|
||||
def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, def_config,
|
||||
sp_importer, placex_table, tokenizer_mock,
|
||||
sp_importer, placex_row, tokenizer_mock,
|
||||
xml_wiki_content, should_replace):
|
||||
|
||||
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
|
||||
@@ -266,7 +265,7 @@ def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, de
|
||||
class_test = 'aerialway'
|
||||
type_test = 'zip_line'
|
||||
|
||||
placex_table.add(cls=class_test, typ=type_test) # add to the database to make valid
|
||||
placex_row(cls=class_test, typ=type_test) # add to the database to make valid
|
||||
tokenizer = tokenizer_mock()
|
||||
sp_importer.import_phrases(tokenizer, should_replace)
|
||||
|
||||
@@ -276,11 +275,11 @@ def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, de
|
||||
assert check_grant_access(temp_db_cursor, def_config.DATABASE_WEBUSER, class_test, type_test)
|
||||
|
||||
|
||||
def test_get_classtype_pairs_directly(placex_table, temp_db_conn, sp_importer):
|
||||
def test_get_classtype_pairs_directly(placex_row, temp_db_conn, sp_importer):
|
||||
for _ in range(101):
|
||||
placex_table.add(cls='highway', typ='residential')
|
||||
placex_row(cls='highway', typ='residential')
|
||||
for _ in range(99):
|
||||
placex_table.add(cls='amenity', typ='toilet')
|
||||
placex_row(cls='amenity', typ='toilet')
|
||||
|
||||
temp_db_conn.commit()
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for functions to maintain the artificial postcode table.
|
||||
@@ -75,20 +75,18 @@ class MockPostcodeTable:
|
||||
@pytest.fixture
|
||||
def postcode_table(def_config, temp_db_conn, placex_table, table_factory):
|
||||
country_info.setup_country_config(def_config)
|
||||
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
|
||||
return MockPostcodeTable(temp_db_conn, def_config)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def insert_implicit_postcode(placex_table, place_postcode_row):
|
||||
def insert_implicit_postcode(placex_row, place_postcode_row):
|
||||
""" Insert data into the placex and place table
|
||||
which can then be used to compute one postcode.
|
||||
"""
|
||||
def _insert_implicit_postcode(osm_id, country, geometry, postcode, in_placex=False):
|
||||
if in_placex:
|
||||
placex_table.add(osm_id=osm_id, country=country, geom=geometry,
|
||||
centroid=f'SRID=4326;{geometry}',
|
||||
address={'postcode': postcode})
|
||||
placex_row(osm_id=osm_id, country=country, geom=geometry,
|
||||
centroid=geometry, address={'postcode': postcode})
|
||||
else:
|
||||
place_postcode_row(osm_id=osm_id, centroid=geometry,
|
||||
country=country, postcode=postcode)
|
||||
@@ -245,7 +243,7 @@ def test_postcodes_extern(postcode_update, postcode_table, tmp_path,
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
|
||||
|
||||
extfile = tmp_path / 'xx_postcodes.csv'
|
||||
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
|
||||
extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
|
||||
|
||||
if gzipped:
|
||||
subprocess.run(['gzip', str(extfile)])
|
||||
@@ -262,7 +260,7 @@ def test_postcodes_extern_bad_column(postcode_update, postcode_table, tmp_path,
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
|
||||
|
||||
extfile = tmp_path / 'xx_postcodes.csv'
|
||||
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
|
||||
extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10", encoding='utf-8')
|
||||
|
||||
postcode_update(tmp_path)
|
||||
|
||||
@@ -274,7 +272,8 @@ def test_postcodes_extern_bad_number(postcode_update, insert_implicit_postcode,
|
||||
insert_implicit_postcode(1, 'xx', 'POINT(10 12)', 'AB 4511')
|
||||
|
||||
extfile = tmp_path / 'xx_postcodes.csv'
|
||||
extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
|
||||
extfile.write_text(
|
||||
"postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0", encoding='utf-8')
|
||||
|
||||
postcode_update(tmp_path)
|
||||
|
||||
|
||||
@@ -42,8 +42,8 @@ def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, r
|
||||
|
||||
|
||||
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
|
||||
def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp_db_cursor):
|
||||
placex_table.add(osm_type=osm_type, osm_id=57283)
|
||||
def test_invalidate_osm_object_simple(placex_row, osm_type, temp_db_conn, temp_db_cursor):
|
||||
placex_row(osm_type=osm_type, osm_id=57283)
|
||||
|
||||
refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn, recursive=False)
|
||||
temp_db_conn.commit()
|
||||
@@ -53,8 +53,8 @@ def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp
|
||||
(osm_type, 57283))
|
||||
|
||||
|
||||
def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, temp_db_cursor):
|
||||
placex_table.add(osm_type='W', osm_id=57283)
|
||||
def test_invalidate_osm_object_nonexisting_simple(placex_row, temp_db_conn, temp_db_cursor):
|
||||
placex_row(osm_type='W', osm_id=57283)
|
||||
|
||||
refresh.invalidate_osm_object('N', 57283, temp_db_conn, recursive=False)
|
||||
temp_db_conn.commit()
|
||||
@@ -64,8 +64,8 @@ def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, te
|
||||
|
||||
|
||||
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
|
||||
def test_invalidate_osm_object_recursive(placex_table, osm_type, temp_db_conn, temp_db_cursor):
|
||||
placex_table.add(osm_type=osm_type, osm_id=57283)
|
||||
def test_invalidate_osm_object_recursive(placex_row, osm_type, temp_db_conn, temp_db_cursor):
|
||||
placex_row(osm_type=osm_type, osm_id=57283)
|
||||
|
||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_force_update(placeid BIGINT)
|
||||
RETURNS BOOLEAN AS $$
|
||||
|
||||
@@ -22,7 +22,7 @@ def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config):
|
||||
|
||||
def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
|
||||
test_file = project_env.project_dir / 'address-levels.json'
|
||||
test_file.write_text('[{"tags":{"place":{"sea":2}}}]')
|
||||
test_file.write_text('[{"tags":{"place":{"sea":2}}}]', encoding='utf-8')
|
||||
|
||||
load_address_levels_from_config(temp_db_conn, project_env)
|
||||
|
||||
@@ -31,7 +31,7 @@ def test_load_ranks_from_project_dir(project_env, temp_db_conn, temp_db_cursor):
|
||||
|
||||
def test_load_ranks_from_broken_file(project_env, temp_db_conn):
|
||||
test_file = project_env.project_dir / 'address-levels.json'
|
||||
test_file.write_text('[{"tags":"place":{"sea":2}}}]')
|
||||
test_file.write_text('[{"tags":"place":{"sea":2}}}]', encoding='utf-8')
|
||||
|
||||
with pytest.raises(json.decoder.JSONDecodeError):
|
||||
load_address_levels_from_config(temp_db_conn, project_env)
|
||||
|
||||
@@ -21,7 +21,7 @@ class TestCreateFunctions:
|
||||
|
||||
def write_functions(self, content):
|
||||
sqlfile = self.config.lib_dir.sql / 'functions.sql'
|
||||
sqlfile.write_text(content)
|
||||
sqlfile.write_text(content, encoding='utf-8')
|
||||
|
||||
def test_create_functions(self, temp_db_cursor):
|
||||
self.write_functions("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for correctly assigning wikipedia pages to places.
|
||||
@@ -20,7 +20,7 @@ from nominatim_db.tools.refresh import (import_wikipedia_articles,
|
||||
@pytest.fixture
|
||||
def wiki_csv(tmp_path, sql_preprocessor):
|
||||
def _import(data):
|
||||
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd:
|
||||
with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt', encoding='utf-8') as fd:
|
||||
writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title',
|
||||
'importance', 'wikidata_id'],
|
||||
delimiter='\t', quotechar='|')
|
||||
@@ -38,7 +38,7 @@ def wiki_csv(tmp_path, sql_preprocessor):
|
||||
{'wikipedia': 'en:Test'},
|
||||
{'wikidata': 'Q123'}])
|
||||
def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
|
||||
def_config, wiki_csv, placex_table, extra):
|
||||
def_config, wiki_csv, placex_row, extra):
|
||||
import_wikipedia_articles(dsn, wiki_csv([('en', 'Test', 0.3, 'Q123')]))
|
||||
create_functions(temp_db_conn, def_config)
|
||||
|
||||
@@ -46,7 +46,7 @@ def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
|
||||
'SELECT language, title, importance, wikidata FROM wikimedia_importance')
|
||||
assert content == set([('en', 'Test', 0.3, 'Q123')])
|
||||
|
||||
place_id = placex_table.add(osm_id=12, extratags=extra)
|
||||
place_id = placex_row(osm_id=12, extratags=extra)
|
||||
table_factory('search_name',
|
||||
'place_id BIGINT, importance FLOAT',
|
||||
[(place_id, 0.2)])
|
||||
@@ -61,11 +61,11 @@ def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
|
||||
|
||||
|
||||
def test_wikipedia_no_match(dsn, temp_db_conn, temp_db_cursor, def_config, wiki_csv,
|
||||
placex_table, table_factory):
|
||||
placex_row, table_factory):
|
||||
import_wikipedia_articles(dsn, wiki_csv([('de', 'Test', 0.3, 'Q123')]))
|
||||
create_functions(temp_db_conn, def_config)
|
||||
|
||||
place_id = placex_table.add(osm_id=12, extratags={'wikipedia': 'en:Test'}, rank_search=10)
|
||||
place_id = placex_row(osm_id=12, extratags={'wikipedia': 'en:Test'}, rank_search=10)
|
||||
table_factory('search_name',
|
||||
'place_id BIGINT, importance FLOAT',
|
||||
[(place_id, 0.2)])
|
||||
|
||||
@@ -1,69 +1,70 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
|
||||
from nominatim_db.tools.special_phrases.sp_importer import SPImporter
|
||||
|
||||
|
||||
# Testing Database Class Pair Retrival using Conftest.py and placex
|
||||
def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn):
|
||||
def test_get_classtype_pair_data(placex_row, def_config, temp_db_conn):
|
||||
for _ in range(100):
|
||||
placex_table.add(cls='highway', typ='motorway') # edge case 100
|
||||
placex_row(cls='highway', typ='motorway') # edge case 100
|
||||
|
||||
for _ in range(99):
|
||||
placex_table.add(cls='amenity', typ='prison') # edge case 99
|
||||
placex_row(cls='amenity', typ='prison') # edge case 99
|
||||
|
||||
for _ in range(150):
|
||||
placex_table.add(cls='tourism', typ='hotel')
|
||||
placex_row(cls='tourism', typ='hotel')
|
||||
|
||||
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
|
||||
|
||||
result = importer.get_classtype_pairs(min=100)
|
||||
|
||||
expected = {
|
||||
assert result == {
|
||||
("highway", "motorway"),
|
||||
("tourism", "hotel")
|
||||
}
|
||||
|
||||
assert result == expected, f"Expected {expected}, got {result}"
|
||||
|
||||
|
||||
def test_get_classtype_pair_data_more(placex_table, def_config, temp_db_conn):
|
||||
def test_get_classtype_pair_data_more(placex_row, def_config, temp_db_conn):
|
||||
for _ in range(99):
|
||||
placex_table.add(cls='emergency', typ='firehydrant') # edge case 99, not included
|
||||
placex_row(cls='emergency', typ='firehydrant') # edge case 99, not included
|
||||
|
||||
for _ in range(199):
|
||||
placex_table.add(cls='amenity', typ='prison')
|
||||
placex_row(cls='amenity', typ='prison')
|
||||
|
||||
for _ in range(3478):
|
||||
placex_table.add(cls='tourism', typ='hotel')
|
||||
placex_row(cls='tourism', typ='hotel')
|
||||
|
||||
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
|
||||
|
||||
result = importer.get_classtype_pairs(min=100)
|
||||
|
||||
expected = {
|
||||
assert result == {
|
||||
("amenity", "prison"),
|
||||
("tourism", "hotel")
|
||||
}
|
||||
|
||||
assert result == expected, f"Expected {expected}, got {result}"
|
||||
|
||||
|
||||
def test_get_classtype_pair_data_default(placex_table, def_config, temp_db_conn):
|
||||
def test_get_classtype_pair_data_default(placex_row, def_config, temp_db_conn):
|
||||
for _ in range(1):
|
||||
placex_table.add(cls='emergency', typ='firehydrant')
|
||||
placex_row(cls='emergency', typ='firehydrant')
|
||||
|
||||
for _ in range(199):
|
||||
placex_table.add(cls='amenity', typ='prison')
|
||||
placex_row(cls='amenity', typ='prison')
|
||||
|
||||
for _ in range(3478):
|
||||
placex_table.add(cls='tourism', typ='hotel')
|
||||
placex_row(cls='tourism', typ='hotel')
|
||||
|
||||
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
|
||||
|
||||
result = importer.get_classtype_pairs()
|
||||
|
||||
expected = {
|
||||
assert result == {
|
||||
("amenity", "prison"),
|
||||
("tourism", "hotel"),
|
||||
("emergency", "firehydrant")
|
||||
}
|
||||
|
||||
assert result == expected, f"Expected {expected}, got {result}"
|
||||
|
||||
@@ -21,7 +21,7 @@ def sp_wiki_loader(src_dir, monkeypatch, def_config):
|
||||
|
||||
def _mock_wiki_content(lang):
|
||||
xml_test_content = src_dir / 'test' / 'testdata' / 'special_phrases_test_content.txt'
|
||||
return xml_test_content.read_text()
|
||||
return xml_test_content.read_text(encoding='utf-8')
|
||||
|
||||
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
|
||||
_mock_wiki_content)
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# Copyright (C) 2026 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Test for tiger data function
|
||||
@@ -13,59 +13,10 @@ from textwrap import dedent
|
||||
import pytest
|
||||
import pytest_asyncio # noqa: F401
|
||||
|
||||
from nominatim_db.db.connection import execute_scalar
|
||||
from nominatim_db.tools import tiger_data, freeze
|
||||
from nominatim_db.tools import tiger_data
|
||||
from nominatim_db.errors import UsageError
|
||||
|
||||
|
||||
class MockTigerTable:
|
||||
|
||||
def __init__(self, conn):
|
||||
self.conn = conn
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""CREATE TABLE tiger (linegeo GEOMETRY,
|
||||
start INTEGER,
|
||||
stop INTEGER,
|
||||
interpol TEXT,
|
||||
token_info JSONB,
|
||||
postcode TEXT)""")
|
||||
|
||||
# We need this table to determine if the database is frozen or not
|
||||
cur.execute("CREATE TABLE place (number INTEGER)")
|
||||
# We need this table to determine if the database is in reverse-only mode
|
||||
cur.execute("CREATE TABLE search_name (place_id BIGINT)")
|
||||
|
||||
def count(self):
|
||||
return execute_scalar(self.conn, "SELECT count(*) FROM tiger")
|
||||
|
||||
def row(self):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("SELECT * FROM tiger LIMIT 1")
|
||||
return cur.fetchone()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def tiger_table(def_config, temp_db_conn, sql_preprocessor,
|
||||
temp_db_with_extensions, tmp_path):
|
||||
def_config.lib_dir.sql = tmp_path / 'sql'
|
||||
def_config.lib_dir.sql.mkdir()
|
||||
|
||||
(def_config.lib_dir.sql / 'tiger_import_start.sql').write_text(
|
||||
"""CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, start INTEGER,
|
||||
stop INTEGER, interpol TEXT,
|
||||
token_info JSONB, postcode TEXT)
|
||||
RETURNS INTEGER AS $$
|
||||
INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
|
||||
RETURNING 1
|
||||
$$ LANGUAGE SQL;""")
|
||||
(def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
|
||||
"""DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
|
||||
in_endnumber INTEGER, interpolationtype TEXT,
|
||||
token_info JSONB, in_postcode TEXT);""")
|
||||
|
||||
return MockTigerTable(temp_db_conn)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def csv_factory(tmp_path):
|
||||
def _mk_file(fname, hnr_from=1, hnr_to=9, interpol='odd', street='Main St',
|
||||
@@ -75,112 +26,115 @@ def csv_factory(tmp_path):
|
||||
from;to;interpolation;street;city;state;postcode;geometry
|
||||
{};{};{};{};{};{};{};{}
|
||||
""".format(hnr_from, hnr_to, interpol, street, city, state,
|
||||
postcode, geometry)))
|
||||
postcode, geometry)), encoding='utf-8')
|
||||
|
||||
return _mk_file
|
||||
|
||||
|
||||
@pytest.mark.parametrize("threads", (1, 5))
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock, threads):
|
||||
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
|
||||
def_config, threads, tokenizer_mock())
|
||||
class TestTiger:
|
||||
|
||||
assert tiger_table.count() == 6213
|
||||
@pytest.fixture(autouse=True)
|
||||
def setup(self, temp_db_conn, placex_row, load_sql):
|
||||
load_sql('tables/search_name.sql', create_reverse_only=False)
|
||||
load_sql('tables/tiger.sql')
|
||||
|
||||
# fake parent roads
|
||||
for x in range(-870, -863):
|
||||
for y in range(323, 328):
|
||||
placex_row(rank_search=26, rank_address=26,
|
||||
geom=f"LINESTRING({x/10 - 0.1} {y/10}, {x/10 + 0.1} {y/10})")
|
||||
|
||||
@pytest.mark.parametrize("threads", (1, 5))
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_database_frozen(def_config, src_dir, temp_db_conn, tiger_table,
|
||||
tokenizer_mock, threads):
|
||||
freeze.drop_update_tables(temp_db_conn)
|
||||
temp_db_conn.execute("""
|
||||
CREATE OR REPLACE FUNCTION get_partition(cc VARCHAR(10)) RETURNS INTEGER AS $$
|
||||
SELECT 0;
|
||||
$$ LANGUAGE sql;
|
||||
CREATE OR REPLACE FUNCTION token_matches_street(i JSONB, s INT[]) RETURNS BOOLEAN AS $$
|
||||
SELECT false
|
||||
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
|
||||
""")
|
||||
|
||||
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
|
||||
def_config, threads, tokenizer_mock())
|
||||
|
||||
assert tiger_table.count() == 6213
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_reverse_only(def_config, src_dir, temp_db_conn, tiger_table,
|
||||
tokenizer_mock):
|
||||
with temp_db_conn.cursor() as cur:
|
||||
cur.execute("DROP TABLE search_name")
|
||||
temp_db_conn.commit()
|
||||
|
||||
with pytest.raises(UsageError,
|
||||
match="Cannot perform tiger import: required tables are missing. "
|
||||
"See https://github.com/osm-search/Nominatim/issues/2463 for details."):
|
||||
@pytest.mark.parametrize("threads", (1, 5))
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_database_frozen(self, def_config, src_dir, temp_db_cursor,
|
||||
tokenizer_mock, threads):
|
||||
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
|
||||
def_config, 1, tokenizer_mock())
|
||||
def_config, threads, tokenizer_mock())
|
||||
|
||||
assert tiger_table.count() == 0
|
||||
assert temp_db_cursor.table_rows('location_property_tiger') == 6209
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_reverse_only(self, def_config, src_dir, temp_db_cursor,
|
||||
tokenizer_mock):
|
||||
temp_db_cursor.execute("DROP TABLE search_name")
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
|
||||
tmp_path):
|
||||
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
|
||||
with pytest.raises(UsageError,
|
||||
match="Cannot perform tiger import: required tables are missing. "
|
||||
"See https://github.com/osm-search/Nominatim/issues/2463 for details."):
|
||||
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
|
||||
def_config, 1, tokenizer_mock())
|
||||
|
||||
assert tiger_table.count() == 0
|
||||
assert temp_db_cursor.table_rows('location_property_tiger') == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_no_files(self, def_config, temp_db_cursor, tokenizer_mock,
|
||||
tmp_path):
|
||||
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
|
||||
tmp_path):
|
||||
sqlfile = tmp_path / '1010.csv'
|
||||
sqlfile.write_text("""Random text""")
|
||||
assert temp_db_cursor.table_rows('location_property_tiger') == 0
|
||||
|
||||
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_bad_file(self, def_config, temp_db_cursor, tokenizer_mock,
|
||||
tmp_path):
|
||||
sqlfile = tmp_path / '1010.csv'
|
||||
sqlfile.write_text('Random text', encoding='utf-8')
|
||||
|
||||
assert tiger_table.count() == 0
|
||||
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
|
||||
|
||||
assert temp_db_cursor.table_rows('location_property_tiger') == 0
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_hnr_nan(def_config, tiger_table, tokenizer_mock,
|
||||
csv_factory, tmp_path):
|
||||
csv_factory('file1', hnr_from=99)
|
||||
csv_factory('file2', hnr_from='L12')
|
||||
csv_factory('file3', hnr_to='12.4')
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_hnr_nan(self, def_config, temp_db_cursor, tokenizer_mock,
|
||||
csv_factory, tmp_path):
|
||||
csv_factory('file1', hnr_to=99)
|
||||
csv_factory('file2', hnr_from='L12')
|
||||
csv_factory('file3', hnr_to='12.4')
|
||||
|
||||
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
|
||||
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
|
||||
|
||||
assert tiger_table.count() == 1
|
||||
assert tiger_table.row().start == 99
|
||||
rows = temp_db_cursor.row_set("""
|
||||
SELECT startnumber, endnumber FROM location_property_tiger""")
|
||||
|
||||
assert rows == {(1, 99)}
|
||||
|
||||
@pytest.mark.parametrize("threads", (1, 5))
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
|
||||
tmp_path, src_dir, threads):
|
||||
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
|
||||
tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
|
||||
tar.close()
|
||||
@pytest.mark.parametrize("threads", (1, 5))
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_tarfile(self, def_config, temp_db_cursor, tokenizer_mock,
|
||||
tmp_path, src_dir, threads):
|
||||
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
|
||||
tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
|
||||
tar.close()
|
||||
|
||||
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
|
||||
tokenizer_mock())
|
||||
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
|
||||
tokenizer_mock())
|
||||
|
||||
assert tiger_table.count() == 6213
|
||||
assert temp_db_cursor.table_rows('location_property_tiger') == 6209
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_bad_tarfile(self, def_config, tokenizer_mock, tmp_path):
|
||||
tarfile = tmp_path / 'sample.tar.gz'
|
||||
tarfile.write_text("""Random text""", encoding='utf-8')
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
|
||||
tmp_path):
|
||||
tarfile = tmp_path / 'sample.tar.gz'
|
||||
tarfile.write_text("""Random text""")
|
||||
with pytest.raises(UsageError):
|
||||
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
|
||||
|
||||
with pytest.raises(UsageError):
|
||||
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_empty_tarfile(self, def_config, temp_db_cursor, tokenizer_mock,
|
||||
tmp_path):
|
||||
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
|
||||
tar.add(__file__)
|
||||
tar.close()
|
||||
|
||||
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
|
||||
tokenizer_mock())
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_add_tiger_data_empty_tarfile(def_config, tiger_table, tokenizer_mock,
|
||||
tmp_path):
|
||||
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
|
||||
tar.add(__file__)
|
||||
tar.close()
|
||||
|
||||
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
|
||||
tokenizer_mock())
|
||||
|
||||
assert tiger_table.count() == 0
|
||||
assert temp_db_cursor.table_rows('location_property_tiger') == 0
|
||||
|
||||
Reference in New Issue
Block a user