Compare commits

...

18 Commits

Author SHA1 Message Date
Sarah Hoffmann
2ddb19c0b0 Merge pull request #3986 from lonvia/rework-tiger-tests
Rework unit tests for import of tiger data
2026-02-13 14:11:04 +01:00
Sarah Hoffmann
3f14f89bdf Merge pull request #3985 from lonvia/rework-indexing-test
Rework unit tests for indexing
2026-02-13 12:06:51 +01:00
Sarah Hoffmann
8ed7a3875a rework tiger unit tests to use production pgsql functions 2026-02-13 11:16:40 +01:00
Sarah Hoffmann
70b9140f13 pass reverse-only as a separate parameter
... instead of hacking the internal structures of SQLPreprocessor
2026-02-13 11:12:23 +01:00
Sarah Hoffmann
3285948130 rewrite indexing tests to use standard table fixtures 2026-02-13 11:03:18 +01:00
Sarah Hoffmann
9d0732a941 add fixtures for postcode, interpolation table creation and filling 2026-02-13 10:57:59 +01:00
Sarah Hoffmann
5314e6c881 Merge pull request #3984 from lonvia/avoid-custom-table-definition-in-tests
Reuse table creation SQL in unittest
2026-02-13 09:16:44 +01:00
Sarah Hoffmann
2750d66470 use load_sql fixture instead of explicit SQLPreprocessor 2026-02-12 22:42:58 +01:00
Sarah Hoffmann
0d423ad7a7 reorganise fixtures for placex table
Now follows the same pattern as fixtures for other tables and
uses the production SQL for table creation.
2026-02-12 22:14:15 +01:00
Sarah Hoffmann
dd332caa4d simplify property test table implementation 2026-02-12 21:15:03 +01:00
Sarah Hoffmann
d691cfc35d switch table definitions in conftest to use production SQL 2026-02-12 21:12:10 +01:00
Sarah Hoffmann
d274a5aecc add fixtures for country table 2026-02-12 20:55:59 +01:00
Sarah Hoffmann
35a023d133 add function for inserting data to testing cursor 2026-02-12 20:44:04 +01:00
Sarah Hoffmann
79682a94ce use better SQL quoting in test cursor implementation 2026-02-12 20:44:04 +01:00
Sarah Hoffmann
aa42dc8a93 fix potential cancelling race with full queue 2026-02-12 20:44:04 +01:00
Sarah Hoffmann
29fcd0b763 Merge pull request #3982 from lonvia/split-table-creation
Split up table creation SQL
2026-02-12 17:42:45 +01:00
Sarah Hoffmann
2237ce7124 split up table creation SQL into separate files 2026-02-12 16:36:10 +01:00
Sarah Hoffmann
58295e0643 remove unused indexes and sequences 2026-02-12 16:33:45 +01:00
29 changed files with 890 additions and 877 deletions

View File

@@ -5,276 +5,21 @@
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
drop table if exists import_status;
CREATE TABLE import_status (
lastimportdate timestamp with time zone NOT NULL,
sequence_id integer,
indexed boolean
);
drop table if exists import_osmosis_log;
CREATE TABLE import_osmosis_log (
batchend timestamp,
batchseq integer,
batchsize bigint,
starttime timestamp,
endtime timestamp,
event text
);
DROP TABLE IF EXISTS nominatim_properties;
CREATE TABLE nominatim_properties (
property TEXT NOT NULL,
value TEXT
);
drop table IF EXISTS location_area CASCADE;
CREATE TABLE location_area (
place_id BIGINT NOT NULL,
keywords INTEGER[] NOT NULL,
partition SMALLINT NOT NULL,
rank_search SMALLINT NOT NULL,
rank_address SMALLINT NOT NULL,
country_code VARCHAR(2),
isguess BOOL NOT NULL,
postcode TEXT,
centroid GEOMETRY(Point, 4326) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
);
CREATE TABLE location_area_large () INHERITS (location_area);
DROP TABLE IF EXISTS location_area_country;
CREATE TABLE location_area_country (
place_id BIGINT NOT NULL,
country_code varchar(2) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.address_data}};
CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {{db.tablespace.address_index}};
CREATE TABLE location_property_tiger (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
startnumber INTEGER NOT NULL,
endnumber INTEGER NOT NULL,
step SMALLINT NOT NULL,
partition SMALLINT NOT NULL,
linegeo GEOMETRY NOT NULL,
postcode TEXT);
drop table if exists location_property_osmline;
CREATE TABLE location_property_osmline (
place_id BIGINT NOT NULL,
osm_id BIGINT NOT NULL,
parent_place_id BIGINT,
geometry_sector INTEGER NOT NULL,
indexed_date TIMESTAMP,
startnumber INTEGER,
endnumber INTEGER,
step SMALLINT,
partition SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
linegeo GEOMETRY NOT NULL,
address HSTORE,
token_info JSONB, -- custom column for tokenizer use only
postcode TEXT,
country_code VARCHAR(2)
){{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline USING BTREE (place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline USING BTREE (geometry_sector) {{db.tablespace.address_index}};
CREATE INDEX idx_osmline_linegeo ON location_property_osmline USING GIST (linegeo) {{db.tablespace.search_index}}
WHERE startnumber is not null;
drop table IF EXISTS search_name;
{% if not db.reverse_only %}
CREATE TABLE search_name (
place_id BIGINT NOT NULL,
importance FLOAT NOT NULL,
search_rank SMALLINT NOT NULL,
address_rank SMALLINT NOT NULL,
name_vector integer[] NOT NULL,
nameaddress_vector integer[] NOT NULL,
country_code varchar(2),
centroid GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_search_name_place_id
ON search_name USING BTREE (place_id) {{db.tablespace.search_index}};
{% endif %}
drop table IF EXISTS place_addressline;
CREATE TABLE place_addressline (
place_id BIGINT NOT NULL,
address_place_id BIGINT NOT NULL,
distance FLOAT NOT NULL,
cached_rank_address SMALLINT NOT NULL,
fromarea boolean NOT NULL,
isaddress boolean NOT NULL
) {{db.tablespace.search_data}};
CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {{db.tablespace.search_index}};
--------- PLACEX - storage for all indexed places -----------------
DROP TABLE IF EXISTS placex;
CREATE TABLE placex (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
indexed_date TIMESTAMP,
geometry_sector INTEGER NOT NULL,
rank_address SMALLINT NOT NULL,
rank_search SMALLINT NOT NULL,
partition SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
LIKE place INCLUDING CONSTRAINTS,
wikipedia TEXT, -- calculated wikipedia article name (language:title)
token_info JSONB, -- custom column for tokenizer use only
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}};
{% for osm_type in ('N', 'W', 'R') %}
CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex
USING BTREE (osm_id) {{db.tablespace.search_index}}
WHERE osm_type = '{{osm_type}}';
{% endfor %}
-- Usage: - removing linkage status on update
-- - lookup linked places for /details
CREATE INDEX idx_placex_linked_place_id ON placex
USING BTREE (linked_place_id) {{db.tablespace.address_index}}
WHERE linked_place_id IS NOT NULL;
-- Usage: - check that admin boundaries do not overtake each other rank-wise
-- - check that place node in a admin boundary with the same address level
-- - boundary is not completely contained in a place area
-- - parenting of large-area or unparentable features
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
USING gist (geometry) {{db.tablespace.address_index}}
WHERE rank_address between 1 and 25
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
-- Usage: - POI is within building with housenumber
CREATE INDEX idx_placex_geometry_buildings ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE address is not null and rank_search = 30
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
-- Usage: - linking of similar named places to boundaries
-- - linking of place nodes with same type to boundaries
CREATE INDEX idx_placex_geometry_placenode ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'N' and rank_search < 26 and class = 'place';
-- Usage: - is node part of a way?
-- - find parent of interpolation spatially
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'W' and rank_search >= 26;
-- Usage: - linking place nodes by wikidata tag to boundaries
CREATE INDEX idx_placex_wikidata on placex
USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}}
WHERE extratags ? 'wikidata' and class = 'place'
and osm_type = 'N' and rank_search < 26;
-- The following two indexes function as a todo list for indexing.
CREATE INDEX idx_placex_rank_address_sector ON placex
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
WHERE indexed_status > 0;
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
WHERE class = 'boundary' and type = 'administrative'
and indexed_status > 0;
DROP SEQUENCE IF EXISTS seq_place;
CREATE SEQUENCE seq_place start 1;
-- Table for synthetic postcodes.
DROP TABLE IF EXISTS location_postcodes;
CREATE TABLE location_postcodes (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
osm_id BIGINT,
rank_search SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
indexed_date TIMESTAMP,
country_code varchar(2) NOT NULL,
postcode TEXT NOT NULL,
centroid GEOMETRY(Geometry, 4326) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
);
CREATE UNIQUE INDEX idx_location_postcodes_id ON location_postcodes
USING BTREE (place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_location_postcodes_geometry ON location_postcodes
USING GIST (geometry) {{db.tablespace.search_index}};
CREATE INDEX IF NOT EXISTS idx_location_postcodes_postcode
ON location_postcodes USING BTREE (postcode, country_code)
{{db.tablespace.search_index}};
CREATE INDEX IF NOT EXISTS idx_location_postcodes_osmid
ON location_postcodes USING BTREE (osm_id) {{db.tablespace.search_index}};
-- Table to store location of entrance nodes
DROP TABLE IF EXISTS placex_entrance;
CREATE TABLE placex_entrance (
place_id BIGINT NOT NULL,
osm_id BIGINT NOT NULL,
type TEXT NOT NULL,
location GEOMETRY(Point, 4326) NOT NULL,
extratags HSTORE
);
CREATE UNIQUE INDEX idx_placex_entrance_place_id_osm_id ON placex_entrance
USING BTREE (place_id, osm_id) {{db.tablespace.search_index}};
-- Create an index on the place table for lookups to populate the entrance
-- table
CREATE INDEX IF NOT EXISTS idx_placex_entrance_lookup ON place
USING BTREE (osm_id)
WHERE class IN ('routing:entrance', 'entrance');
DROP TABLE IF EXISTS import_polygon_error;
CREATE TABLE import_polygon_error (
osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL,
name HSTORE,
country_code varchar(2),
updated timestamp,
errormessage text,
prevgeometry GEOMETRY(Geometry, 4326),
newgeometry GEOMETRY(Geometry, 4326)
);
CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
DROP TABLE IF EXISTS import_polygon_delete;
CREATE TABLE import_polygon_delete (
osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL
);
CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);
DROP SEQUENCE IF EXISTS file;
CREATE SEQUENCE file start 1;
{% if 'wikimedia_importance' not in db.tables and 'wikipedia_article' not in db.tables %}
-- create dummy tables here, if nothing was imported
CREATE TABLE wikimedia_importance (
language TEXT NOT NULL,
title TEXT NOT NULL,
importance double precision NOT NULL,
wikidata TEXT
) {{db.tablespace.address_data}};
{% endif %}
{% include('tables/status.sql') %}
{% include('tables/nominatim_properties.sql') %}
{% include('tables/location_area.sql') %}
{% include('tables/tiger.sql') %}
{% include('tables/interpolation.sql') %}
{% include('tables/search_name.sql') %}
{% include('tables/addressline.sql') %}
{% include('tables/placex.sql') %}
{% include('tables/postcodes.sql') %}
{% include('tables/entrance.sql') %}
{% include('tables/import_reports.sql') %}
{% include('tables/importance_tables.sql') %}
-- osm2pgsql does not create indexes on the middle tables for Nominatim
-- Add one for lookup of associated street relations.

View File

@@ -0,0 +1,20 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS place_addressline;
CREATE TABLE place_addressline (
place_id BIGINT NOT NULL,
address_place_id BIGINT NOT NULL,
distance FLOAT NOT NULL,
cached_rank_address SMALLINT NOT NULL,
fromarea boolean NOT NULL,
isaddress boolean NOT NULL
) {{db.tablespace.search_data}};
CREATE INDEX idx_place_addressline_place_id ON place_addressline
USING BTREE (place_id) {{db.tablespace.search_index}};

View File

@@ -0,0 +1,20 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- Table to store location of entrance nodes
DROP TABLE IF EXISTS placex_entrance;
CREATE TABLE placex_entrance (
place_id BIGINT NOT NULL,
osm_id BIGINT NOT NULL,
type TEXT NOT NULL,
location GEOMETRY(Point, 4326) NOT NULL,
extratags HSTORE
);
CREATE UNIQUE INDEX idx_placex_entrance_place_id_osm_id ON placex_entrance
USING BTREE (place_id, osm_id) {{db.tablespace.search_index}};

View File

@@ -0,0 +1,35 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS import_polygon_error;
CREATE TABLE import_polygon_error (
osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL,
name HSTORE,
country_code varchar(2),
updated timestamp,
errormessage text,
prevgeometry GEOMETRY(Geometry, 4326),
newgeometry GEOMETRY(Geometry, 4326)
);
CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error
USING BTREE (osm_type, osm_id);
DROP TABLE IF EXISTS import_polygon_delete;
CREATE TABLE import_polygon_delete (
osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
type TEXT NOT NULL
);
CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete
USING BTREE (osm_type, osm_id);

View File

@@ -0,0 +1,16 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
{% if 'wikimedia_importance' not in db.tables and 'wikipedia_article' not in db.tables %}
-- create dummy tables here if nothing was imported
CREATE TABLE wikimedia_importance (
language TEXT NOT NULL,
title TEXT NOT NULL,
importance double precision NOT NULL,
wikidata TEXT
) {{db.tablespace.address_data}};
{% endif %}

View File

@@ -0,0 +1,34 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS location_property_osmline;
CREATE TABLE location_property_osmline (
place_id BIGINT NOT NULL,
osm_id BIGINT NOT NULL,
parent_place_id BIGINT,
geometry_sector INTEGER NOT NULL,
indexed_date TIMESTAMP,
startnumber INTEGER,
endnumber INTEGER,
step SMALLINT,
partition SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
linegeo GEOMETRY NOT NULL,
address HSTORE,
token_info JSONB, -- custom column for tokenizer use only
postcode TEXT,
country_code VARCHAR(2)
){{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline
USING BTREE (place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline
USING BTREE (geometry_sector) {{db.tablespace.address_index}};
CREATE INDEX idx_osmline_linegeo ON location_property_osmline
USING GIST (linegeo) {{db.tablespace.search_index}}
WHERE startnumber is not null;

View File

@@ -0,0 +1,32 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS location_area CASCADE;
CREATE TABLE location_area (
place_id BIGINT NOT NULL,
keywords INTEGER[] NOT NULL,
partition SMALLINT NOT NULL,
rank_search SMALLINT NOT NULL,
rank_address SMALLINT NOT NULL,
country_code VARCHAR(2),
isguess BOOL NOT NULL,
postcode TEXT,
centroid GEOMETRY(Point, 4326) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
);
CREATE TABLE location_area_large () INHERITS (location_area);
DROP TABLE IF EXISTS location_area_country;
CREATE TABLE location_area_country (
place_id BIGINT NOT NULL,
country_code varchar(2) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.address_data}};
CREATE INDEX idx_location_area_country_geometry ON location_area_country
USING GIST (geometry) {{db.tablespace.address_index}};

View File

@@ -0,0 +1,12 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS nominatim_properties;
CREATE TABLE nominatim_properties (
property TEXT NOT NULL,
value TEXT
);

87
lib-sql/tables/placex.sql Normal file
View File

@@ -0,0 +1,87 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
-- placex - main table for searchable places
DROP TABLE IF EXISTS placex;
CREATE TABLE placex (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
indexed_date TIMESTAMP,
geometry_sector INTEGER NOT NULL,
rank_address SMALLINT NOT NULL,
rank_search SMALLINT NOT NULL,
partition SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
LIKE place INCLUDING CONSTRAINTS,
wikipedia TEXT, -- calculated wikipedia article name (language:title)
token_info JSONB, -- custom column for tokenizer use only
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}};
{% for osm_type in ('N', 'W', 'R') %}
CREATE INDEX idx_placex_osmid_{{osm_type | lower}} ON placex
USING BTREE (osm_id) {{db.tablespace.search_index}}
WHERE osm_type = '{{osm_type}}';
{% endfor %}
-- Usage: - removing linkage status on update
-- - lookup linked places for /details
CREATE INDEX idx_placex_linked_place_id ON placex
USING BTREE (linked_place_id) {{db.tablespace.address_index}}
WHERE linked_place_id IS NOT NULL;
-- Usage: - check that admin boundaries do not overtake each other rank-wise
-- - check that place node in a admin boundary with the same address level
-- - boundary is not completely contained in a place area
-- - parenting of large-area or unparentable features
CREATE INDEX idx_placex_geometry_address_area_candidates ON placex
USING gist (geometry) {{db.tablespace.address_index}}
WHERE rank_address between 1 and 25
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
-- Usage: - POI is within building with housenumber
CREATE INDEX idx_placex_geometry_buildings ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE address is not null and rank_search = 30
and ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon');
-- Usage: - linking of similar named places to boundaries
-- - linking of place nodes with same type to boundaries
CREATE INDEX idx_placex_geometry_placenode ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'N' and rank_search < 26 and class = 'place';
-- Usage: - is node part of a way?
-- - find parent of interpolation spatially
CREATE INDEX idx_placex_geometry_lower_rank_ways ON placex
USING SPGIST (geometry) {{db.tablespace.address_index}}
WHERE osm_type = 'W' and rank_search >= 26;
-- Usage: - linking place nodes by wikidata tag to boundaries
CREATE INDEX idx_placex_wikidata on placex
USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}}
WHERE extratags ? 'wikidata' and class = 'place'
and osm_type = 'N' and rank_search < 26;
-- The following two indexes function as a todo list for indexing.
CREATE INDEX idx_placex_rank_address_sector ON placex
USING BTREE (rank_address, geometry_sector) {{db.tablespace.address_index}}
WHERE indexed_status > 0;
CREATE INDEX idx_placex_rank_boundaries_sector ON placex
USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}
WHERE class = 'boundary' and type = 'administrative'
and indexed_status > 0;

View File

@@ -0,0 +1,30 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS location_postcodes;
CREATE TABLE location_postcodes (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
osm_id BIGINT,
rank_search SMALLINT NOT NULL,
indexed_status SMALLINT NOT NULL,
indexed_date TIMESTAMP,
country_code varchar(2) NOT NULL,
postcode TEXT NOT NULL,
centroid GEOMETRY(Geometry, 4326) NOT NULL,
geometry GEOMETRY(Geometry, 4326) NOT NULL
);
CREATE UNIQUE INDEX idx_location_postcodes_id ON location_postcodes
USING BTREE (place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_location_postcodes_geometry ON location_postcodes
USING GIST (geometry) {{db.tablespace.search_index}};
CREATE INDEX IF NOT EXISTS idx_location_postcodes_postcode ON location_postcodes
USING BTREE (postcode, country_code) {{db.tablespace.search_index}};
CREATE INDEX IF NOT EXISTS idx_location_postcodes_osmid ON location_postcodes
USING BTREE (osm_id) {{db.tablespace.search_index}};

View File

@@ -0,0 +1,26 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS search_name;
{% if not create_reverse_only %}
CREATE TABLE search_name (
place_id BIGINT NOT NULL,
importance FLOAT NOT NULL,
search_rank SMALLINT NOT NULL,
address_rank SMALLINT NOT NULL,
name_vector integer[] NOT NULL,
nameaddress_vector integer[] NOT NULL,
country_code varchar(2),
centroid GEOMETRY(Geometry, 4326) NOT NULL
) {{db.tablespace.search_data}};
CREATE UNIQUE INDEX idx_search_name_place_id
ON search_name USING BTREE (place_id) {{db.tablespace.search_index}};
{% endif %}

23
lib-sql/tables/status.sql Normal file
View File

@@ -0,0 +1,23 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS import_status;
CREATE TABLE import_status (
lastimportdate TIMESTAMP WITH TIME ZONE NOT NULL,
sequence_id INTEGER,
indexed BOOLEAN
);
DROP TABLE IF EXISTS import_osmosis_log;
CREATE TABLE import_osmosis_log (
batchend TIMESTAMP,
batchseq INTEGER,
batchsize BIGINT,
starttime TIMESTAMP,
endtime TIMESTAMP,
event TEXT
);

17
lib-sql/tables/tiger.sql Normal file
View File

@@ -0,0 +1,17 @@
-- SPDX-License-Identifier: GPL-2.0-only
--
-- This file is part of Nominatim. (https://nominatim.org)
--
-- Copyright (C) 2026 by the Nominatim developer community.
-- For a full list of authors see the git log.
DROP TABLE IF EXISTS location_property_tiger;
CREATE TABLE location_property_tiger (
place_id BIGINT NOT NULL,
parent_place_id BIGINT,
startnumber INTEGER NOT NULL,
endnumber INTEGER NOT NULL,
step SMALLINT NOT NULL,
partition SMALLINT NOT NULL,
linegeo GEOMETRY NOT NULL,
postcode TEXT);

View File

@@ -38,6 +38,7 @@ class QueryPool:
""" Schedule a query for execution.
"""
if self.is_cancelled:
self.clear_queue()
await self.finish()
return
@@ -47,6 +48,7 @@ class QueryPool:
await asyncio.sleep(0)
if self.is_cancelled:
self.clear_queue()
await self.finish()
async def finish(self) -> None:

View File

@@ -152,12 +152,11 @@ def create_tables(conn: Connection, config: Configuration, reverse_only: bool =
When `reverse_only` is True, then the main table for searching will
be skipped and only reverse search is possible.
"""
sql = SQLPreprocessor(conn, config)
sql.env.globals['db']['reverse_only'] = reverse_only
SQLPreprocessor(conn, config).run_sql_file(conn, 'tables.sql',
create_reverse_only=reverse_only)
sql.run_sql_file(conn, 'tables.sql')
sql.run_sql_file(conn, 'grants.sql')
# reinitiate the preprocessor to get all the newly created tables
SQLPreprocessor(conn, config).run_sql_file(conn, 'grants.sql')
def create_table_triggers(conn: Connection, config: Configuration) -> None:

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
import itertools
import sys
@@ -17,12 +17,11 @@ SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve()
sys.path.insert(0, str(SRC_DIR / 'src'))
from nominatim_db.config import Configuration
from nominatim_db.db import connection
from nominatim_db.db import connection, properties
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
import nominatim_db.tokenizer.factory
import dummy_tokenizer
import mocks
from cursor import CursorForTesting
@@ -132,28 +131,49 @@ def project_env(tmp_path):
@pytest.fixture
def property_table(table_factory, temp_db_conn):
table_factory('nominatim_properties', 'property TEXT, value TEXT')
return mocks.MockPropertyTable(temp_db_conn)
def country_table(table_factory):
table_factory('country_name', 'partition INT, country_code varchar(2), name hstore')
@pytest.fixture
def status_table(table_factory):
def country_row(country_table, temp_db_cursor):
def _add(partition=None, country=None, names=None):
temp_db_cursor.insert_row('country_name', partition=partition,
country_code=country, name=names)
return _add
@pytest.fixture
def load_sql(temp_db_conn, country_row):
proc = SQLPreprocessor(temp_db_conn, Configuration(None))
def _run(filename, **kwargs):
proc.run_sql_file(temp_db_conn, filename, **kwargs)
return _run
@pytest.fixture
def property_table(load_sql, temp_db_conn):
load_sql('tables/nominatim_properties.sql')
class _PropTable:
def set(self, name, value):
properties.set_property(temp_db_conn, name, value)
def get(self, name):
return properties.get_property(temp_db_conn, name)
return _PropTable()
@pytest.fixture
def status_table(load_sql):
""" Create an empty version of the status table and
the status logging table.
"""
table_factory('import_status',
"""lastimportdate timestamp with time zone NOT NULL,
sequence_id integer,
indexed boolean""")
table_factory('import_osmosis_log',
"""batchend timestamp,
batchseq integer,
batchsize bigint,
starttime timestamp,
endtime timestamp,
event text""")
load_sql('tables/status.sql')
@pytest.fixture
@@ -178,12 +198,14 @@ def place_row(place_table, temp_db_cursor):
prerequisite to the fixture.
"""
idseq = itertools.count(1001)
def _insert(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
admin_level=None, address=None, extratags=None, geom=None):
temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)",
(osm_id or next(idseq), osm_type, cls, typ, names,
admin_level, address, extratags,
geom or 'SRID=4326;POINT(0 0)'))
admin_level=None, address=None, extratags=None, geom='POINT(0 0)'):
args = {'osm_type': osm_type, 'osm_id': osm_id or next(idseq),
'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level,
'address': address, 'extratags': extratags,
'geometry': _with_srid(geom)}
temp_db_cursor.insert_row('place', **args)
return _insert
@@ -203,50 +225,104 @@ def place_postcode_table(temp_db_with_extensions, table_factory):
@pytest.fixture
def place_postcode_row(place_postcode_table, temp_db_cursor):
""" A factory for rows in the place table. The table is created as a
""" A factory for rows in the place_postcode table. The table is created as a
prerequisite to the fixture.
"""
idseq = itertools.count(5001)
def _insert(osm_type='N', osm_id=None, postcode=None, country=None,
centroid=None, geom=None):
temp_db_cursor.execute("INSERT INTO place_postcode VALUES (%s, %s, %s, %s, %s, %s)",
(osm_type, osm_id or next(idseq),
postcode, country,
_with_srid(centroid, 'POINT(12.0 4.0)'),
_with_srid(geom)))
centroid='POINT(12.0 4.0)', geom=None):
temp_db_cursor.insert_row('place_postcode',
osm_type=osm_type, osm_id=osm_id or next(idseq),
postcode=postcode, country_code=country,
centroid=_with_srid(centroid),
geometry=_with_srid(geom))
return _insert
@pytest.fixture
def placex_table(temp_db_with_extensions, temp_db_conn):
""" Create an empty version of the place table.
def placex_table(temp_db_with_extensions, temp_db_conn, load_sql, place_table):
""" Create an empty version of the placex table.
"""
return mocks.MockPlacexTable(temp_db_conn)
load_sql('tables/placex.sql')
temp_db_conn.execute("CREATE SEQUENCE IF NOT EXISTS seq_place START 1")
@pytest.fixture
def osmline_table(temp_db_with_extensions, table_factory):
table_factory('location_property_osmline',
"""place_id BIGINT,
osm_id BIGINT,
parent_place_id BIGINT,
geometry_sector INTEGER,
indexed_date TIMESTAMP,
startnumber INTEGER,
endnumber INTEGER,
partition SMALLINT,
indexed_status SMALLINT,
linegeo GEOMETRY,
interpolationtype TEXT,
address HSTORE,
postcode TEXT,
country_code VARCHAR(2)""")
def placex_row(placex_table, temp_db_cursor):
""" A factory for rows in the placex table. The table is created as a
prerequisite to the fixture.
"""
idseq = itertools.count(1001)
def _add(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
country=None, housenumber=None, rank_search=30, rank_address=30,
centroid='POINT(10 4)', indexed_status=0, indexed_date=None):
args = {'place_id': pysql.SQL("nextval('seq_place')"),
'osm_type': osm_type, 'osm_id': osm_id or next(idseq),
'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level,
'address': address, 'housenumber': housenumber,
'rank_search': rank_search, 'rank_address': rank_address,
'extratags': extratags,
'centroid': _with_srid(centroid), 'geometry': _with_srid(geom),
'country_code': country,
'indexed_status': indexed_status, 'indexed_date': indexed_date,
'partition': pysql.Literal(0), 'geometry_sector': pysql.Literal(1)}
return temp_db_cursor.insert_row('placex', **args)
return _add
@pytest.fixture
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions):
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
def osmline_table(temp_db_with_extensions, load_sql):
load_sql('tables/interpolation.sql')
@pytest.fixture
def osmline_row(osmline_table, temp_db_cursor):
idseq = itertools.count(20001)
def _add(osm_id=None, geom='LINESTRING(12.0 11.0, 12.003 11.0)'):
return temp_db_cursor.insert_row(
'location_property_osmline',
place_id=pysql.SQL("nextval('seq_place')"),
osm_id=osm_id or next(idseq),
geometry_sector=pysql.Literal(20),
partition=pysql.Literal(0),
indexed_status=1,
linegeo=_with_srid(geom))
return _add
@pytest.fixture
def postcode_table(temp_db_with_extensions, load_sql):
load_sql('tables/postcodes.sql')
@pytest.fixture
def postcode_row(postcode_table, temp_db_cursor):
def _add(country, postcode, x=34.5, y=-9.33):
geom = _with_srid(f"POINT({x} {y})")
return temp_db_cursor.insert_row(
'location_postcodes',
place_id=pysql.SQL("nextval('seq_place')"),
indexed_status=pysql.Literal(1),
country_code=country, postcode=postcode,
centroid=geom,
rank_search=pysql.Literal(16),
geometry=('ST_Expand(%s::geometry, 0.005)', geom))
return _add
@pytest.fixture
def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions, country_row):
for part in range(3):
country_row(partition=part)
cfg = Configuration(None)
cfg.set_libdirs(sql=tmp_path)
return cfg

View File

@@ -2,12 +2,13 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Specialised psycopg cursor with shortcut functions useful for testing.
"""
import psycopg
from psycopg import sql as pysql
class CursorForTesting(psycopg.Cursor):
@@ -52,7 +53,49 @@ class CursorForTesting(psycopg.Cursor):
def table_rows(self, table, where=None):
""" Return the number of rows in the given table.
"""
if where is None:
return self.scalar('SELECT count(*) FROM ' + table)
sql = pysql.SQL('SELECT count(*) FROM') + pysql.Identifier(table)
if where is not None:
sql += pysql.SQL('WHERE') + pysql.SQL(where)
return self.scalar('SELECT count(*) FROM {} WHERE {}'.format(table, where))
return self.scalar(sql)
def insert_row(self, table, **data):
""" Insert a row into the given table.
'data' is a dictionary of column names and associated values.
When the value is a pysql.Literal or pysql.SQL, then the expression
will be inserted as is instead of loading the value. When the
value is a tuple, then the first element will be added as an
SQL expression for the value and the second element is treated
as the actual value to insert. The SQL expression must contain
a %s placeholder in that case.
If data contains a 'place_id' column, then the value of the
place_id column after insert is returned. Otherwise the function
returns nothing.
"""
columns = []
placeholders = []
values = []
for k, v in data.items():
columns.append(pysql.Identifier(k))
if isinstance(v, tuple):
placeholders.append(pysql.SQL(v[0]))
values.append(v[1])
elif isinstance(v, (pysql.Literal, pysql.SQL)):
placeholders.append(v)
else:
placeholders.append(pysql.Placeholder())
values.append(v)
sql = pysql.SQL("INSERT INTO {table} ({columns}) VALUES({values})")\
.format(table=pysql.Identifier(table),
columns=pysql.SQL(',').join(columns),
values=pysql.SQL(',').join(placeholders))
if 'place_id' in data:
sql += pysql.SQL('RETURNING place_id')
self.execute(sql, values)
return self.fetchone()[0] if 'place_id' in data else None

View File

@@ -53,11 +53,10 @@ def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cur
@pytest.mark.parametrize("languages", (None, ['fr', 'en']))
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
table_factory, tokenizer_mock, languages, loaded_country):
table_factory('country_name', 'country_code varchar(2), name hstore',
content=(('us', '"name"=>"us1","name:af"=>"us2"'),
('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
country_row, tokenizer_mock, languages, loaded_country):
temp_db_cursor.execute('TRUNCATE country_name')
country_row(country='us', names={"name": "us1", "name:af": "us2"})
country_row(country='fr', names={"name": "Fra", "name:en": "Fren"})
assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2

View File

@@ -1,13 +1,12 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for running the indexing.
"""
import itertools
import pytest
import pytest_asyncio # noqa
@@ -15,129 +14,57 @@ from nominatim_db.indexer import indexer
from nominatim_db.tokenizer import factory
class IndexerTestDB:
class TestIndexing:
@pytest.fixture(autouse=True)
def setup(self, temp_db_conn, project_env, tokenizer_mock,
placex_table, postcode_table, osmline_table):
self.conn = temp_db_conn
temp_db_conn.execute("""
CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER AS $$
BEGIN
IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
NEW.indexed_date = now();
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql;
def __init__(self, conn):
self.placex_id = itertools.count(100000)
self.osmline_id = itertools.count(500000)
self.postcode_id = itertools.count(700000)
DROP TYPE IF EXISTS prepare_update_info CASCADE;
CREATE TYPE prepare_update_info AS (
name HSTORE,
address HSTORE,
rank_address SMALLINT,
country_code TEXT,
class TEXT,
type TEXT,
linked_place_id BIGINT
);
CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
OUT result prepare_update_info) AS $$
BEGIN
result.address := p.address;
result.name := p.name;
result.class := p.class;
result.type := p.type;
result.country_code := p.country_code;
result.rank_address := p.rank_address;
END; $$ LANGUAGE plpgsql STABLE;
self.conn = conn
self.conn.autocimmit = True
with self.conn.cursor() as cur:
cur.execute("""CREATE TABLE placex (place_id BIGINT,
name HSTORE,
class TEXT,
type TEXT,
linked_place_id BIGINT,
rank_address SMALLINT,
rank_search SMALLINT,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
partition SMALLINT,
admin_level SMALLINT,
country_code TEXT,
address HSTORE,
token_info JSONB,
geometry_sector INTEGER)""")
cur.execute("""CREATE TABLE location_property_osmline (
place_id BIGINT,
osm_id BIGINT,
address HSTORE,
token_info JSONB,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
geometry_sector INTEGER)""")
cur.execute("""CREATE TABLE location_postcodes (
place_id BIGINT,
indexed_status SMALLINT,
indexed_date TIMESTAMP,
country_code varchar(2),
postcode TEXT)""")
cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
AS $$
BEGIN
IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
NEW.indexed_date = now();
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql;""")
cur.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE")
cur.execute("""CREATE TYPE prepare_update_info AS (
name HSTORE,
address HSTORE,
rank_address SMALLINT,
country_code TEXT,
class TEXT,
type TEXT,
linked_place_id BIGINT
)""")
cur.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex,
OUT result prepare_update_info)
AS $$
BEGIN
result.address := p.address;
result.name := p.name;
result.class := p.class;
result.type := p.type;
result.country_code := p.country_code;
result.rank_address := p.rank_address;
END;
$$ LANGUAGE plpgsql STABLE;
""")
cur.execute("""CREATE OR REPLACE FUNCTION
get_interpolation_address(in_address HSTORE, wayid BIGINT)
RETURNS HSTORE AS $$
BEGIN
RETURN in_address;
END;
$$ LANGUAGE plpgsql STABLE;
""")
CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
RETURNS HSTORE AS $$ SELECT in_address $$ LANGUAGE sql STABLE;
""")
for table in ('placex', 'location_property_osmline', 'location_postcodes'):
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
FOR EACH ROW EXECUTE PROCEDURE date_update()
""".format(table))
for table in ('placex', 'location_property_osmline', 'location_postcodes'):
temp_db_conn.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
FOR EACH ROW EXECUTE PROCEDURE date_update()
""".format(table))
self.tokenizer = factory.create_tokenizer(project_env)
def scalar(self, query):
with self.conn.cursor() as cur:
cur.execute(query)
return cur.fetchone()[0]
def add_place(self, cls='place', typ='locality',
rank_search=30, rank_address=30, sector=20):
next_id = next(self.placex_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO placex
(place_id, class, type, rank_search, rank_address,
indexed_status, geometry_sector)
VALUES (%s, %s, %s, %s, %s, 1, %s)""",
(next_id, cls, typ, rank_search, rank_address, sector))
return next_id
def add_admin(self, **kwargs):
kwargs['cls'] = 'boundary'
kwargs['typ'] = 'administrative'
return self.add_place(**kwargs)
def add_osmline(self, sector=20):
next_id = next(self.osmline_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO location_property_osmline
(place_id, osm_id, indexed_status, geometry_sector)
VALUES (%s, %s, 1, %s)""",
(next_id, next_id, sector))
return next_id
def add_postcode(self, country, postcode):
next_id = next(self.postcode_id)
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO location_postcodes
(place_id, indexed_status, country_code, postcode)
VALUES (%s, 1, %s, %s)""",
(next_id, country, postcode))
return next_id
def placex_unindexed(self):
return self.scalar('SELECT count(*) from placex where indexed_status > 0')
@@ -145,148 +72,133 @@ class IndexerTestDB:
return self.scalar("""SELECT count(*) from location_property_osmline
WHERE indexed_status > 0""")
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_all_by_rank(self, dsn, threads, placex_row, osmline_row):
for rank in range(31):
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
@pytest.fixture
def test_db(temp_db_conn):
yield IndexerTestDB(temp_db_conn)
assert self.placex_unindexed() == 31
assert self.osmline_unindexed() == 1
idx = indexer.Indexer(dsn, self.tokenizer, threads)
await idx.index_by_rank(0, 30)
@pytest.fixture
def test_tokenizer(tokenizer_mock, project_env):
return factory.create_tokenizer(project_env)
assert self.placex_unindexed() == 0
assert self.osmline_unindexed() == 0
assert self.scalar("""SELECT count(*) from placex
WHERE indexed_status = 0 and indexed_date is null""") == 0
# ranks come in order of rank address
assert self.scalar("""
SELECT count(*) FROM placex p WHERE rank_address > 0
AND indexed_date >= (SELECT min(indexed_date) FROM placex o
WHERE p.rank_address < o.rank_address)""") == 0
# placex address ranked objects come before interpolations
assert self.scalar(
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM location_property_osmline)""") == 0
# rank 0 comes after all other placex objects
assert self.scalar(
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_all_by_rank(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_partial_without_30(self, dsn, threads, placex_row, osmline_row):
for rank in range(31):
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
assert test_db.placex_unindexed() == 31
assert test_db.osmline_unindexed() == 1
assert self.placex_unindexed() == 31
assert self.osmline_unindexed() == 1
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
await idx.index_by_rank(0, 30)
idx = indexer.Indexer(dsn, self.tokenizer, threads)
await idx.index_by_rank(4, 15)
assert test_db.placex_unindexed() == 0
assert test_db.osmline_unindexed() == 0
assert self.placex_unindexed() == 19
assert self.osmline_unindexed() == 1
assert test_db.scalar("""SELECT count(*) from placex
WHERE indexed_status = 0 and indexed_date is null""") == 0
# ranks come in order of rank address
assert test_db.scalar("""
SELECT count(*) FROM placex p WHERE rank_address > 0
AND indexed_date >= (SELECT min(indexed_date) FROM placex o
WHERE p.rank_address < o.rank_address)""") == 0
# placex address ranked objects come before interpolations
assert test_db.scalar(
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM location_property_osmline)""") == 0
# rank 0 comes after all other placex objects
assert test_db.scalar(
"""SELECT count(*) FROM placex WHERE rank_address > 0
AND indexed_date >
(SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0
assert self.scalar("""
SELECT count(*) FROM placex
WHERE indexed_status = 0 AND not rank_address between 4 and 15""") == 0
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_partial_with_30(self, dsn, threads, placex_row, osmline_row):
for rank in range(31):
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_partial_without_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
assert self.placex_unindexed() == 31
assert self.osmline_unindexed() == 1
assert test_db.placex_unindexed() == 31
assert test_db.osmline_unindexed() == 1
idx = indexer.Indexer(dsn, self.tokenizer, threads)
await idx.index_by_rank(28, 30)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
test_tokenizer, threads)
await idx.index_by_rank(4, 15)
assert self.placex_unindexed() == 28
assert self.osmline_unindexed() == 0
assert test_db.placex_unindexed() == 19
assert test_db.osmline_unindexed() == 1
assert self.scalar("""
SELECT count(*) FROM placex
WHERE indexed_status = 0 AND rank_address between 0 and 27""") == 0
assert test_db.scalar("""
SELECT count(*) FROM placex
WHERE indexed_status = 0 AND not rank_address between 4 and 15""") == 0
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_boundaries(self, dsn, threads, placex_row, osmline_row):
for rank in range(4, 10):
placex_row(cls='boundary', typ='administrative',
rank_address=rank, rank_search=rank, indexed_status=1)
for rank in range(31):
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
assert self.placex_unindexed() == 37
assert self.osmline_unindexed() == 1
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_partial_with_30(test_db, threads, test_tokenizer):
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
idx = indexer.Indexer(dsn, self.tokenizer, threads)
await idx.index_boundaries()
assert test_db.placex_unindexed() == 31
assert test_db.osmline_unindexed() == 1
assert self.placex_unindexed() == 31
assert self.osmline_unindexed() == 1
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
await idx.index_by_rank(28, 30)
assert self.scalar("""
SELECT count(*) FROM placex
WHERE indexed_status = 0 AND class != 'boundary'""") == 0
assert test_db.placex_unindexed() == 28
assert test_db.osmline_unindexed() == 0
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_postcodes(self, dsn, threads, postcode_row):
for postcode in range(1000):
postcode_row(country='de', postcode=postcode)
for postcode in range(32000, 33000):
postcode_row(country='us', postcode=postcode)
assert test_db.scalar("""
SELECT count(*) FROM placex
WHERE indexed_status = 0 AND rank_address between 0 and 27""") == 0
idx = indexer.Indexer(dsn, self.tokenizer, threads)
await idx.index_postcodes()
assert self.scalar("""SELECT count(*) FROM location_postcodes
WHERE indexed_status != 0""") == 0
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_boundaries(test_db, threads, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
@pytest.mark.parametrize("analyse", [True, False])
@pytest.mark.asyncio
async def test_index_full(self, dsn, analyse, placex_row, osmline_row, postcode_row):
for rank in range(4, 10):
placex_row(cls='boundary', typ='administrative',
rank_address=rank, rank_search=rank, indexed_status=1)
for rank in range(31):
placex_row(rank_address=rank, rank_search=rank, indexed_status=1)
osmline_row()
for postcode in range(1000):
postcode_row(country='de', postcode=postcode)
assert test_db.placex_unindexed() == 37
assert test_db.osmline_unindexed() == 1
idx = indexer.Indexer(dsn, self.tokenizer, 4)
await idx.index_full(analyse=analyse)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
await idx.index_boundaries()
assert test_db.placex_unindexed() == 31
assert test_db.osmline_unindexed() == 1
assert test_db.scalar("""
SELECT count(*) FROM placex
WHERE indexed_status = 0 AND class != 'boundary'""") == 0
@pytest.mark.parametrize("threads", [1, 15])
@pytest.mark.asyncio
async def test_index_postcodes(test_db, threads, test_tokenizer):
for postcode in range(1000):
test_db.add_postcode('de', postcode)
for postcode in range(32000, 33000):
test_db.add_postcode('us', postcode)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
await idx.index_postcodes()
assert test_db.scalar("""SELECT count(*) FROM location_postcodes
WHERE indexed_status != 0""") == 0
@pytest.mark.parametrize("analyse", [True, False])
@pytest.mark.asyncio
async def test_index_full(test_db, analyse, test_tokenizer):
for rank in range(4, 10):
test_db.add_admin(rank_address=rank, rank_search=rank)
for rank in range(31):
test_db.add_place(rank_address=rank, rank_search=rank)
test_db.add_osmline()
for postcode in range(1000):
test_db.add_postcode('de', postcode)
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
await idx.index_full(analyse=analyse)
assert test_db.placex_unindexed() == 0
assert test_db.osmline_unindexed() == 0
assert test_db.scalar("""SELECT count(*) FROM location_postcodes
WHERE indexed_status != 0""") == 0
assert self.placex_unindexed() == 0
assert self.osmline_unindexed() == 0
assert self.scalar("""SELECT count(*) FROM location_postcodes
WHERE indexed_status != 0""") == 0

View File

@@ -1,85 +0,0 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Custom mocks for testing.
"""
import itertools
from nominatim_db.db import properties
class MockPlacexTable:
""" A placex table for testing.
"""
def __init__(self, conn):
self.idseq = itertools.count(10000)
self.conn = conn
with conn.cursor() as cur:
cur.execute("""CREATE TABLE placex (
place_id BIGINT,
parent_place_id BIGINT,
linked_place_id BIGINT,
importance FLOAT,
indexed_date TIMESTAMP,
geometry_sector INTEGER,
rank_address SMALLINT,
rank_search SMALLINT,
partition SMALLINT,
indexed_status SMALLINT,
osm_id int8,
osm_type char(1),
class text,
type text,
name hstore,
admin_level smallint,
address hstore,
extratags hstore,
token_info jsonb,
geometry Geometry(Geometry,4326),
wikipedia TEXT,
country_code varchar(2),
housenumber TEXT,
postcode TEXT,
centroid GEOMETRY(Geometry, 4326))""")
cur.execute("CREATE SEQUENCE IF NOT EXISTS seq_place")
conn.commit()
def add(self, osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
country=None, housenumber=None, rank_search=30, centroid=None):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO placex (place_id, osm_type, osm_id, class,
type, name, admin_level, address,
housenumber, rank_search,
extratags, centroid, geometry, country_code)
VALUES(nextval('seq_place'), %s, %s, %s, %s, %s, %s,
%s, %s, %s, %s, %s, %s, %s)
RETURNING place_id""",
(osm_type, osm_id or next(self.idseq), cls, typ, names,
admin_level, address, housenumber, rank_search,
extratags, centroid, 'SRID=4326;' + geom,
country))
place_id = cur.fetchone()[0]
self.conn.commit()
return place_id
class MockPropertyTable:
""" A property table for testing.
"""
def __init__(self, conn):
self.conn = conn
def set(self, name, value):
""" Set a property in the table to the given value.
"""
properties.set_property(self.conn, name, value)
def get(self, name):
""" Set a property in the table to the given value.
"""
return properties.get_property(self.conn, name)

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for ICU tokenizer.
@@ -15,7 +15,6 @@ import pytest
from nominatim_db.tokenizer import icu_tokenizer
import nominatim_db.tokenizer.icu_rule_loader
from nominatim_db.db import properties
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
from nominatim_db.data.place_info import PlaceInfo
from mock_icu_word_table import MockIcuWordTable
@@ -90,13 +89,9 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
@pytest.fixture
def sql_functions(temp_db_conn, def_config, src_dir):
orig_sql = def_config.lib_dir.sql
def_config.lib_dir.sql = src_dir / 'lib-sql'
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
sqlproc.run_sql_file(temp_db_conn, 'tokenizer/icu_tokenizer.sql')
def_config.lib_dir.sql = orig_sql
def sql_functions(load_sql):
load_sql('functions/utils.sql')
load_sql('tokenizer/icu_tokenizer.sql')
@pytest.fixture
@@ -653,22 +648,21 @@ class TestUpdateWordTokens:
self.tok.update_word_tokens()
assert word_table.count_housenumbers() == 1
def test_keep_housenumbers_from_placex_table(self, add_housenumber, word_table,
placex_table):
def test_keep_housenumbers_from_placex_table(self, add_housenumber, word_table, placex_row):
add_housenumber(9999, '5432a')
add_housenumber(9990, '34z')
placex_table.add(housenumber='34z')
placex_table.add(housenumber='25432a')
placex_row(housenumber='34z')
placex_row(housenumber='25432a')
assert word_table.count_housenumbers() == 2
self.tok.update_word_tokens()
assert word_table.count_housenumbers() == 1
def test_keep_housenumbers_from_placex_table_hnr_list(self, add_housenumber,
word_table, placex_table):
word_table, placex_row):
add_housenumber(9991, '9 b')
add_housenumber(9990, '34z')
placex_table.add(housenumber='9 a;9 b;9 c')
placex_row(housenumber='9 a;9 b;9 c')
assert word_table.count_housenumbers() == 2
self.tok.update_word_tokens()

View File

@@ -2,17 +2,17 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for maintenance and analysis functions.
"""
import pytest
import datetime as dt
from nominatim_db.errors import UsageError
from nominatim_db.tools import admin
from nominatim_db.tokenizer import factory
from nominatim_db.db.sql_preprocessor import SQLPreprocessor
@pytest.fixture(autouse=True)
@@ -61,15 +61,14 @@ def test_analyse_indexing_unknown_osmid(project_env):
admin.analyse_indexing(project_env, osm_id='W12345674')
def test_analyse_indexing_with_place_id(project_env, temp_db_cursor):
temp_db_cursor.execute("INSERT INTO placex (place_id) VALUES(12345)")
def test_analyse_indexing_with_place_id(project_env, placex_row):
place_id = placex_row()
admin.analyse_indexing(project_env, place_id=12345)
admin.analyse_indexing(project_env, place_id=place_id)
def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_type, osm_id)
VALUES(9988, 'N', 10000)""")
def test_analyse_indexing_with_osm_id(project_env, placex_row):
placex_row(osm_type='N', osm_id=10000)
admin.analyse_indexing(project_env, osm_id='N10000')
@@ -77,8 +76,8 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
class TestAdminCleanDeleted:
@pytest.fixture(autouse=True)
def setup_polygon_delete(self, project_env, table_factory, place_table,
osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
def setup_polygon_delete(self, project_env, table_factory, place_table, placex_row,
osmline_table, temp_db_cursor, load_sql):
""" Set up place_force_delete function and related tables
"""
self.project_env = project_env
@@ -91,12 +90,15 @@ class TestAdminCleanDeleted:
((100, 'N', 'boundary', 'administrative'),
(145, 'N', 'boundary', 'administrative'),
(175, 'R', 'landcover', 'grass')))
temp_db_cursor.execute("""
INSERT INTO placex (place_id, osm_id, osm_type, class, type,
indexed_date, indexed_status)
VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
(2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
(3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
now = dt.datetime.now()
placex_row(osm_type='N', osm_id=100, cls='boundary', typ='administrative',
indexed_status=1, indexed_date=now - dt.timedelta(days=30))
placex_row(osm_type='N', osm_id=145, cls='boundary', typ='administrative',
indexed_status=1, indexed_date=now - dt.timedelta(days=90))
placex_row(osm_type='R', osm_id=175, cls='landcover', typ='grass',
indexed_status=1, indexed_date=now - dt.timedelta(days=90))
# set up tables and triggers for utils function
table_factory('place_to_be_deleted',
"""osm_id BIGINT,
@@ -104,7 +106,6 @@ class TestAdminCleanDeleted:
class TEXT NOT NULL,
type TEXT NOT NULL,
deferred BOOLEAN""")
table_factory('country_name', 'partition INT')
table_factory('import_polygon_error', """osm_id BIGINT,
osm_type CHAR(1),
class TEXT NOT NULL,
@@ -115,11 +116,7 @@ class TestAdminCleanDeleted:
$$ LANGUAGE plpgsql;""")
temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
orig_sql = def_config.lib_dir.sql
def_config.lib_dir.sql = src_dir / 'lib-sql'
sqlproc = SQLPreprocessor(temp_db_conn, def_config)
sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
def_config.lib_dir.sql = orig_sql
load_sql('functions/utils.sql')
def test_admin_clean_deleted_no_records(self):
admin.clean_deleted_relations(self.project_env, age='1 year')

View File

@@ -170,14 +170,41 @@ def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory, w
@pytest.mark.asyncio
async def test_load_data(dsn, place_row, placex_table, osmline_table,
temp_db_cursor, threads):
for func in ('precompute_words', 'getorcreate_housenumber_id', 'make_standard_name'):
temp_db_cursor.execute(pysql.SQL("""CREATE FUNCTION {} (src TEXT)
RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL
""").format(pysql.Identifier(func)))
for oid in range(100, 130):
place_row(osm_id=oid)
place_row(osm_type='W', osm_id=342, cls='place', typ='houses',
geom='SRID=4326;LINESTRING(0 0, 10 10)')
geom='LINESTRING(0 0, 10 10)')
temp_db_cursor.execute("""
CREATE OR REPLACE FUNCTION placex_insert() RETURNS TRIGGER AS $$
BEGIN
NEW.place_id := nextval('seq_place');
NEW.indexed_status := 1;
NEW.centroid := ST_Centroid(NEW.geometry);
NEW.partition := 0;
NEW.geometry_sector := 2424;
NEW.rank_address := 30;
NEW.rank_search := 30;
RETURN NEW;
END; $$ LANGUAGE plpgsql STABLE PARALLEL SAFE;
CREATE OR REPLACE FUNCTION osmline_insert() RETURNS TRIGGER AS $$
BEGIN
NEW.place_id := nextval('seq_place');
IF NEW.indexed_status IS NULL THEN
NEW.indexed_status := 1;
NEW.partition := 0;
NEW.geometry_sector := 2424;
END IF;
RETURN NEW;
END; $$ LANGUAGE plpgsql STABLE PARALLEL SAFE;
CREATE TRIGGER placex_before_insert BEFORE INSERT ON placex
FOR EACH ROW EXECUTE PROCEDURE placex_insert();
CREATE TRIGGER osmline_before_insert BEFORE INSERT ON location_property_osmline
FOR EACH ROW EXECUTE PROCEDURE osmline_insert();
""")
await database_import.load_data(dsn, threads)

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for import special phrases methods
@@ -125,9 +125,8 @@ def test_grant_access_to_web_user(temp_db_conn, temp_db_cursor, table_factory,
phrase_class, phrase_type)
def test_create_place_classtype_table_and_indexes(
temp_db_cursor, def_config, placex_table,
sp_importer, temp_db_conn, monkeypatch):
def test_create_place_classtype_table_and_indexes(temp_db_cursor, def_config, placex_row,
sp_importer, temp_db_conn, monkeypatch):
"""
Test that _create_place_classtype_table_and_indexes()
create the right place_classtype tables and place_id indexes
@@ -136,7 +135,7 @@ def test_create_place_classtype_table_and_indexes(
"""
pairs = set([('class1', 'type1'), ('class2', 'type2')])
for pair in pairs:
placex_table.add(cls=pair[0], typ=pair[1]) # adding to db
placex_row(cls=pair[0], typ=pair[1]) # adding to db
sp_importer._create_classtype_table_and_indexes(pairs)
temp_db_conn.commit()
@@ -178,7 +177,7 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
@pytest.mark.parametrize("should_replace", [(True), (False)])
def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer,
placex_table, table_factory, tokenizer_mock,
placex_row, table_factory, tokenizer_mock,
xml_wiki_content, should_replace):
"""
Check that the main import_phrases() method is well executed.
@@ -199,8 +198,8 @@ def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer,
type_test = 'zip_line'
tokenizer = tokenizer_mock()
placex_table.add(cls=class_test, typ=type_test) # in db for special phrase filtering
placex_table.add(cls='amenity', typ='animal_shelter') # in db for special phrase filtering
placex_row(cls=class_test, typ=type_test) # in db for special phrase filtering
placex_row(cls='amenity', typ='animal_shelter') # in db for special phrase filtering
sp_importer.import_phrases(tokenizer, should_replace)
assert len(tokenizer.analyser_cache['special_phrases']) == 19
@@ -257,7 +256,7 @@ def check_placeid_and_centroid_indexes(temp_db_cursor, phrase_class, phrase_type
@pytest.mark.parametrize("should_replace", [(True), (False)])
def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, def_config,
sp_importer, placex_table, tokenizer_mock,
sp_importer, placex_row, tokenizer_mock,
xml_wiki_content, should_replace):
monkeypatch.setattr('nominatim_db.tools.special_phrases.sp_wiki_loader._get_wiki_content',
@@ -266,7 +265,7 @@ def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, de
class_test = 'aerialway'
type_test = 'zip_line'
placex_table.add(cls=class_test, typ=type_test) # add to the database to make valid
placex_row(cls=class_test, typ=type_test) # add to the database to make valid
tokenizer = tokenizer_mock()
sp_importer.import_phrases(tokenizer, should_replace)
@@ -276,11 +275,11 @@ def test_import_phrases_special_phrase_filtering(monkeypatch, temp_db_cursor, de
assert check_grant_access(temp_db_cursor, def_config.DATABASE_WEBUSER, class_test, type_test)
def test_get_classtype_pairs_directly(placex_table, temp_db_conn, sp_importer):
def test_get_classtype_pairs_directly(placex_row, temp_db_conn, sp_importer):
for _ in range(101):
placex_table.add(cls='highway', typ='residential')
placex_row(cls='highway', typ='residential')
for _ in range(99):
placex_table.add(cls='amenity', typ='toilet')
placex_row(cls='amenity', typ='toilet')
temp_db_conn.commit()

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for functions to maintain the artificial postcode table.
@@ -75,20 +75,18 @@ class MockPostcodeTable:
@pytest.fixture
def postcode_table(def_config, temp_db_conn, placex_table, table_factory):
country_info.setup_country_config(def_config)
table_factory('country_name', 'partition INT', ((0, ), (1, ), (2, )))
return MockPostcodeTable(temp_db_conn, def_config)
@pytest.fixture
def insert_implicit_postcode(placex_table, place_postcode_row):
def insert_implicit_postcode(placex_row, place_postcode_row):
""" Insert data into the placex and place table
which can then be used to compute one postcode.
"""
def _insert_implicit_postcode(osm_id, country, geometry, postcode, in_placex=False):
if in_placex:
placex_table.add(osm_id=osm_id, country=country, geom=geometry,
centroid=f'SRID=4326;{geometry}',
address={'postcode': postcode})
placex_row(osm_id=osm_id, country=country, geom=geometry,
centroid=geometry, address={'postcode': postcode})
else:
place_postcode_row(osm_id=osm_id, centroid=geometry,
country=country, postcode=postcode)

View File

@@ -42,8 +42,8 @@ def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, r
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type=osm_type, osm_id=57283)
def test_invalidate_osm_object_simple(placex_row, osm_type, temp_db_conn, temp_db_cursor):
placex_row(osm_type=osm_type, osm_id=57283)
refresh.invalidate_osm_object(osm_type, 57283, temp_db_conn, recursive=False)
temp_db_conn.commit()
@@ -53,8 +53,8 @@ def test_invalidate_osm_object_simple(placex_table, osm_type, temp_db_conn, temp
(osm_type, 57283))
def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type='W', osm_id=57283)
def test_invalidate_osm_object_nonexisting_simple(placex_row, temp_db_conn, temp_db_cursor):
placex_row(osm_type='W', osm_id=57283)
refresh.invalidate_osm_object('N', 57283, temp_db_conn, recursive=False)
temp_db_conn.commit()
@@ -64,8 +64,8 @@ def test_invalidate_osm_object_nonexisting_simple(placex_table, temp_db_conn, te
@pytest.mark.parametrize('osm_type', ('N', 'W', 'R'))
def test_invalidate_osm_object_recursive(placex_table, osm_type, temp_db_conn, temp_db_cursor):
placex_table.add(osm_type=osm_type, osm_id=57283)
def test_invalidate_osm_object_recursive(placex_row, osm_type, temp_db_conn, temp_db_cursor):
placex_row(osm_type=osm_type, osm_id=57283)
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_force_update(placeid BIGINT)
RETURNS BOOLEAN AS $$

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for correctly assigning wikipedia pages to places.
@@ -38,7 +38,7 @@ def wiki_csv(tmp_path, sql_preprocessor):
{'wikipedia': 'en:Test'},
{'wikidata': 'Q123'}])
def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
def_config, wiki_csv, placex_table, extra):
def_config, wiki_csv, placex_row, extra):
import_wikipedia_articles(dsn, wiki_csv([('en', 'Test', 0.3, 'Q123')]))
create_functions(temp_db_conn, def_config)
@@ -46,7 +46,7 @@ def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
'SELECT language, title, importance, wikidata FROM wikimedia_importance')
assert content == set([('en', 'Test', 0.3, 'Q123')])
place_id = placex_table.add(osm_id=12, extratags=extra)
place_id = placex_row(osm_id=12, extratags=extra)
table_factory('search_name',
'place_id BIGINT, importance FLOAT',
[(place_id, 0.2)])
@@ -61,11 +61,11 @@ def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, table_factory,
def test_wikipedia_no_match(dsn, temp_db_conn, temp_db_cursor, def_config, wiki_csv,
placex_table, table_factory):
placex_row, table_factory):
import_wikipedia_articles(dsn, wiki_csv([('de', 'Test', 0.3, 'Q123')]))
create_functions(temp_db_conn, def_config)
place_id = placex_table.add(osm_id=12, extratags={'wikipedia': 'en:Test'}, rank_search=10)
place_id = placex_row(osm_id=12, extratags={'wikipedia': 'en:Test'}, rank_search=10)
table_factory('search_name',
'place_id BIGINT, importance FLOAT',
[(place_id, 0.2)])

View File

@@ -1,69 +1,70 @@
# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
from nominatim_db.tools.special_phrases.sp_importer import SPImporter
# Testing Database Class Pair Retrival using Conftest.py and placex
def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn):
def test_get_classtype_pair_data(placex_row, def_config, temp_db_conn):
for _ in range(100):
placex_table.add(cls='highway', typ='motorway') # edge case 100
placex_row(cls='highway', typ='motorway') # edge case 100
for _ in range(99):
placex_table.add(cls='amenity', typ='prison') # edge case 99
placex_row(cls='amenity', typ='prison') # edge case 99
for _ in range(150):
placex_table.add(cls='tourism', typ='hotel')
placex_row(cls='tourism', typ='hotel')
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs(min=100)
expected = {
assert result == {
("highway", "motorway"),
("tourism", "hotel")
}
assert result == expected, f"Expected {expected}, got {result}"
def test_get_classtype_pair_data_more(placex_table, def_config, temp_db_conn):
def test_get_classtype_pair_data_more(placex_row, def_config, temp_db_conn):
for _ in range(99):
placex_table.add(cls='emergency', typ='firehydrant') # edge case 99, not included
placex_row(cls='emergency', typ='firehydrant') # edge case 99, not included
for _ in range(199):
placex_table.add(cls='amenity', typ='prison')
placex_row(cls='amenity', typ='prison')
for _ in range(3478):
placex_table.add(cls='tourism', typ='hotel')
placex_row(cls='tourism', typ='hotel')
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs(min=100)
expected = {
assert result == {
("amenity", "prison"),
("tourism", "hotel")
}
assert result == expected, f"Expected {expected}, got {result}"
def test_get_classtype_pair_data_default(placex_table, def_config, temp_db_conn):
def test_get_classtype_pair_data_default(placex_row, def_config, temp_db_conn):
for _ in range(1):
placex_table.add(cls='emergency', typ='firehydrant')
placex_row(cls='emergency', typ='firehydrant')
for _ in range(199):
placex_table.add(cls='amenity', typ='prison')
placex_row(cls='amenity', typ='prison')
for _ in range(3478):
placex_table.add(cls='tourism', typ='hotel')
placex_row(cls='tourism', typ='hotel')
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs()
expected = {
assert result == {
("amenity", "prison"),
("tourism", "hotel"),
("emergency", "firehydrant")
}
assert result == expected, f"Expected {expected}, got {result}"

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Test for tiger data function
@@ -13,59 +13,10 @@ from textwrap import dedent
import pytest
import pytest_asyncio # noqa: F401
from nominatim_db.db.connection import execute_scalar
from nominatim_db.tools import tiger_data, freeze
from nominatim_db.tools import tiger_data
from nominatim_db.errors import UsageError
class MockTigerTable:
def __init__(self, conn):
self.conn = conn
with conn.cursor() as cur:
cur.execute("""CREATE TABLE tiger (linegeo GEOMETRY,
start INTEGER,
stop INTEGER,
interpol TEXT,
token_info JSONB,
postcode TEXT)""")
# We need this table to determine if the database is frozen or not
cur.execute("CREATE TABLE place (number INTEGER)")
# We need this table to determine if the database is in reverse-only mode
cur.execute("CREATE TABLE search_name (place_id BIGINT)")
def count(self):
return execute_scalar(self.conn, "SELECT count(*) FROM tiger")
def row(self):
with self.conn.cursor() as cur:
cur.execute("SELECT * FROM tiger LIMIT 1")
return cur.fetchone()
@pytest.fixture
def tiger_table(def_config, temp_db_conn, sql_preprocessor,
temp_db_with_extensions, tmp_path):
def_config.lib_dir.sql = tmp_path / 'sql'
def_config.lib_dir.sql.mkdir()
(def_config.lib_dir.sql / 'tiger_import_start.sql').write_text(
"""CREATE OR REPLACE FUNCTION tiger_line_import(linegeo GEOMETRY, start INTEGER,
stop INTEGER, interpol TEXT,
token_info JSONB, postcode TEXT)
RETURNS INTEGER AS $$
INSERT INTO tiger VALUES(linegeo, start, stop, interpol, token_info, postcode)
RETURNING 1
$$ LANGUAGE SQL;""", encoding='utf-8')
(def_config.lib_dir.sql / 'tiger_import_finish.sql').write_text(
"""DROP FUNCTION tiger_line_import (linegeo GEOMETRY, in_startnumber INTEGER,
in_endnumber INTEGER, interpolationtype TEXT,
token_info JSONB, in_postcode TEXT);""", encoding='utf-8')
return MockTigerTable(temp_db_conn)
@pytest.fixture
def csv_factory(tmp_path):
def _mk_file(fname, hnr_from=1, hnr_to=9, interpol='odd', street='Main St',
@@ -80,107 +31,110 @@ def csv_factory(tmp_path):
return _mk_file
@pytest.mark.parametrize("threads", (1, 5))
@pytest.mark.asyncio
async def test_add_tiger_data(def_config, src_dir, tiger_table, tokenizer_mock, threads):
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, threads, tokenizer_mock())
class TestTiger:
assert tiger_table.count() == 6213
@pytest.fixture(autouse=True)
def setup(self, temp_db_conn, placex_row, load_sql):
load_sql('tables/search_name.sql', create_reverse_only=False)
load_sql('tables/tiger.sql')
# fake parent roads
for x in range(-870, -863):
for y in range(323, 328):
placex_row(rank_search=26, rank_address=26,
geom=f"LINESTRING({x/10 - 0.1} {y/10}, {x/10 + 0.1} {y/10})")
@pytest.mark.parametrize("threads", (1, 5))
@pytest.mark.asyncio
async def test_add_tiger_data_database_frozen(def_config, src_dir, temp_db_conn, tiger_table,
tokenizer_mock, threads):
freeze.drop_update_tables(temp_db_conn)
temp_db_conn.execute("""
CREATE OR REPLACE FUNCTION get_partition(cc VARCHAR(10)) RETURNS INTEGER AS $$
SELECT 0;
$$ LANGUAGE sql;
CREATE OR REPLACE FUNCTION token_matches_street(i JSONB, s INT[]) RETURNS BOOLEAN AS $$
SELECT false
$$ LANGUAGE SQL IMMUTABLE STRICT PARALLEL SAFE;
""")
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, threads, tokenizer_mock())
assert tiger_table.count() == 6213
@pytest.mark.asyncio
async def test_add_tiger_data_reverse_only(def_config, src_dir, temp_db_conn, tiger_table,
tokenizer_mock):
with temp_db_conn.cursor() as cur:
cur.execute("DROP TABLE search_name")
temp_db_conn.commit()
with pytest.raises(UsageError,
match="Cannot perform tiger import: required tables are missing. "
"See https://github.com/osm-search/Nominatim/issues/2463 for details."):
@pytest.mark.parametrize("threads", (1, 5))
@pytest.mark.asyncio
async def test_add_tiger_data_database_frozen(self, def_config, src_dir, temp_db_cursor,
tokenizer_mock, threads):
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, 1, tokenizer_mock())
def_config, threads, tokenizer_mock())
assert tiger_table.count() == 0
assert temp_db_cursor.table_rows('location_property_tiger') == 6209
@pytest.mark.asyncio
async def test_add_tiger_data_reverse_only(self, def_config, src_dir, temp_db_cursor,
tokenizer_mock):
temp_db_cursor.execute("DROP TABLE search_name")
@pytest.mark.asyncio
async def test_add_tiger_data_no_files(def_config, tiger_table, tokenizer_mock,
tmp_path):
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
with pytest.raises(UsageError,
match="Cannot perform tiger import: required tables are missing. "
"See https://github.com/osm-search/Nominatim/issues/2463 for details."):
await tiger_data.add_tiger_data(str(src_dir / 'test' / 'testdb' / 'tiger'),
def_config, 1, tokenizer_mock())
assert tiger_table.count() == 0
assert temp_db_cursor.table_rows('location_property_tiger') == 0
@pytest.mark.asyncio
async def test_add_tiger_data_no_files(self, def_config, temp_db_cursor, tokenizer_mock,
tmp_path):
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
@pytest.mark.asyncio
async def test_add_tiger_data_bad_file(def_config, tiger_table, tokenizer_mock,
tmp_path):
sqlfile = tmp_path / '1010.csv'
sqlfile.write_text("""Random text""", encoding='utf-8')
assert temp_db_cursor.table_rows('location_property_tiger') == 0
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
@pytest.mark.asyncio
async def test_add_tiger_data_bad_file(self, def_config, temp_db_cursor, tokenizer_mock,
tmp_path):
sqlfile = tmp_path / '1010.csv'
sqlfile.write_text('Random text', encoding='utf-8')
assert tiger_table.count() == 0
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
assert temp_db_cursor.table_rows('location_property_tiger') == 0
@pytest.mark.asyncio
async def test_add_tiger_data_hnr_nan(def_config, tiger_table, tokenizer_mock,
csv_factory, tmp_path):
csv_factory('file1', hnr_from=99)
csv_factory('file2', hnr_from='L12')
csv_factory('file3', hnr_to='12.4')
@pytest.mark.asyncio
async def test_add_tiger_data_hnr_nan(self, def_config, temp_db_cursor, tokenizer_mock,
csv_factory, tmp_path):
csv_factory('file1', hnr_to=99)
csv_factory('file2', hnr_from='L12')
csv_factory('file3', hnr_to='12.4')
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
await tiger_data.add_tiger_data(str(tmp_path), def_config, 1, tokenizer_mock())
assert tiger_table.count() == 1
assert tiger_table.row().start == 99
rows = temp_db_cursor.row_set("""
SELECT startnumber, endnumber FROM location_property_tiger""")
assert rows == {(1, 99)}
@pytest.mark.parametrize("threads", (1, 5))
@pytest.mark.asyncio
async def test_add_tiger_data_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path, src_dir, threads):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
tar.close()
@pytest.mark.parametrize("threads", (1, 5))
@pytest.mark.asyncio
async def test_add_tiger_data_tarfile(self, def_config, temp_db_cursor, tokenizer_mock,
tmp_path, src_dir, threads):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(str(src_dir / 'test' / 'testdb' / 'tiger' / '01001.csv'))
tar.close()
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
tokenizer_mock())
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads,
tokenizer_mock())
assert tiger_table.count() == 6213
assert temp_db_cursor.table_rows('location_property_tiger') == 6209
@pytest.mark.asyncio
async def test_add_tiger_data_bad_tarfile(self, def_config, tokenizer_mock, tmp_path):
tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""", encoding='utf-8')
@pytest.mark.asyncio
async def test_add_tiger_data_bad_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tarfile = tmp_path / 'sample.tar.gz'
tarfile.write_text("""Random text""", encoding='utf-8')
with pytest.raises(UsageError):
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
with pytest.raises(UsageError):
await tiger_data.add_tiger_data(str(tarfile), def_config, 1, tokenizer_mock())
@pytest.mark.asyncio
async def test_add_tiger_data_empty_tarfile(self, def_config, temp_db_cursor, tokenizer_mock,
tmp_path):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(__file__)
tar.close()
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
tokenizer_mock())
@pytest.mark.asyncio
async def test_add_tiger_data_empty_tarfile(def_config, tiger_table, tokenizer_mock,
tmp_path):
tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz")
tar.add(__file__)
tar.close()
await tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, 1,
tokenizer_mock())
assert tiger_table.count() == 0
assert temp_db_cursor.table_rows('location_property_tiger') == 0