From 9d0732a94187d5a7ddb8a6fe5ca62f0830873360 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 13 Feb 2026 10:57:59 +0100 Subject: [PATCH 1/2] add fixtures for postcode, interpolation table creation and filling --- test/python/conftest.py | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) diff --git a/test/python/conftest.py b/test/python/conftest.py index 0234dd05..891716d3 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -280,6 +280,44 @@ def osmline_table(temp_db_with_extensions, load_sql): load_sql('tables/interpolation.sql') +@pytest.fixture +def osmline_row(osmline_table, temp_db_cursor): + idseq = itertools.count(20001) + + def _add(osm_id=None, geom='LINESTRING(12.0 11.0, 12.003 11.0)'): + return temp_db_cursor.insert_row( + 'location_property_osmline', + place_id=pysql.SQL("nextval('seq_place')"), + osm_id=osm_id or next(idseq), + geometry_sector=pysql.Literal(20), + partition=pysql.Literal(0), + indexed_status=1, + linegeo=_with_srid(geom)) + + return _add + + +@pytest.fixture +def postcode_table(temp_db_with_extensions, load_sql): + load_sql('tables/postcodes.sql') + + +@pytest.fixture +def postcode_row(postcode_table, temp_db_cursor): + def _add(country, postcode, x=34.5, y=-9.33): + geom = _with_srid(f"POINT({x} {y})") + return temp_db_cursor.insert_row( + 'location_postcodes', + place_id=pysql.SQL("nextval('seq_place')"), + indexed_status=pysql.Literal(1), + country_code=country, postcode=postcode, + centroid=geom, + rank_search=pysql.Literal(16), + geometry=('ST_Expand(%s::geometry, 0.005)', geom)) + + return _add + + @pytest.fixture def sql_preprocessor_cfg(tmp_path, table_factory, temp_db_with_extensions, country_row): for part in range(3): From 3285948130026294634aa8032406fe31be94c5d7 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Fri, 13 Feb 2026 11:03:18 +0100 Subject: [PATCH 2/2] rewrite indexing tests to use standard table fixtures --- test/python/indexer/test_indexing.py | 386 +++++++++++---------------- 1 file changed, 149 insertions(+), 237 deletions(-) diff --git a/test/python/indexer/test_indexing.py b/test/python/indexer/test_indexing.py index e0ff4946..c9f19d8e 100644 --- a/test/python/indexer/test_indexing.py +++ b/test/python/indexer/test_indexing.py @@ -1,13 +1,12 @@ -# SPDX-License-Identifier: GPL-3.0-or-later # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2025 by the Nominatim developer community. +# Copyright (C) 2026 by the Nominatim developer community. # For a full list of authors see the git log. """ Tests for running the indexing. """ -import itertools + import pytest import pytest_asyncio # noqa @@ -15,129 +14,57 @@ from nominatim_db.indexer import indexer from nominatim_db.tokenizer import factory -class IndexerTestDB: +class TestIndexing: + @pytest.fixture(autouse=True) + def setup(self, temp_db_conn, project_env, tokenizer_mock, + placex_table, postcode_table, osmline_table): + self.conn = temp_db_conn + temp_db_conn.execute(""" + CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER AS $$ + BEGIN + IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN + NEW.indexed_date = now(); + END IF; + RETURN NEW; + END; $$ LANGUAGE plpgsql; - def __init__(self, conn): - self.placex_id = itertools.count(100000) - self.osmline_id = itertools.count(500000) - self.postcode_id = itertools.count(700000) + DROP TYPE IF EXISTS prepare_update_info CASCADE; + CREATE TYPE prepare_update_info AS ( + name HSTORE, + address HSTORE, + rank_address SMALLINT, + country_code TEXT, + class TEXT, + type TEXT, + linked_place_id BIGINT + ); + CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex, + OUT result prepare_update_info) AS $$ + BEGIN + result.address := p.address; + result.name := p.name; + result.class := p.class; + result.type := p.type; + result.country_code := p.country_code; + result.rank_address := p.rank_address; + END; $$ LANGUAGE plpgsql STABLE; - self.conn = conn - self.conn.autocimmit = True - with self.conn.cursor() as cur: - cur.execute("""CREATE TABLE placex (place_id BIGINT, - name HSTORE, - class TEXT, - type TEXT, - linked_place_id BIGINT, - rank_address SMALLINT, - rank_search SMALLINT, - indexed_status SMALLINT, - indexed_date TIMESTAMP, - partition SMALLINT, - admin_level SMALLINT, - country_code TEXT, - address HSTORE, - token_info JSONB, - geometry_sector INTEGER)""") - cur.execute("""CREATE TABLE location_property_osmline ( - place_id BIGINT, - osm_id BIGINT, - address HSTORE, - token_info JSONB, - indexed_status SMALLINT, - indexed_date TIMESTAMP, - geometry_sector INTEGER)""") - cur.execute("""CREATE TABLE location_postcodes ( - place_id BIGINT, - indexed_status SMALLINT, - indexed_date TIMESTAMP, - country_code varchar(2), - postcode TEXT)""") - cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER - AS $$ - BEGIN - IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN - NEW.indexed_date = now(); - END IF; - RETURN NEW; - END; $$ LANGUAGE plpgsql;""") - cur.execute("DROP TYPE IF EXISTS prepare_update_info CASCADE") - cur.execute("""CREATE TYPE prepare_update_info AS ( - name HSTORE, - address HSTORE, - rank_address SMALLINT, - country_code TEXT, - class TEXT, - type TEXT, - linked_place_id BIGINT - )""") - cur.execute("""CREATE OR REPLACE FUNCTION placex_indexing_prepare(p placex, - OUT result prepare_update_info) - AS $$ - BEGIN - result.address := p.address; - result.name := p.name; - result.class := p.class; - result.type := p.type; - result.country_code := p.country_code; - result.rank_address := p.rank_address; - END; - $$ LANGUAGE plpgsql STABLE; - """) - cur.execute("""CREATE OR REPLACE FUNCTION - get_interpolation_address(in_address HSTORE, wayid BIGINT) - RETURNS HSTORE AS $$ - BEGIN - RETURN in_address; - END; - $$ LANGUAGE plpgsql STABLE; - """) + CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT) + RETURNS HSTORE AS $$ SELECT in_address $$ LANGUAGE sql STABLE; + """) - for table in ('placex', 'location_property_osmline', 'location_postcodes'): - cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0} - FOR EACH ROW EXECUTE PROCEDURE date_update() - """.format(table)) + for table in ('placex', 'location_property_osmline', 'location_postcodes'): + temp_db_conn.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0} + FOR EACH ROW EXECUTE PROCEDURE date_update() + """.format(table)) + + self.tokenizer = factory.create_tokenizer(project_env) def scalar(self, query): with self.conn.cursor() as cur: cur.execute(query) return cur.fetchone()[0] - def add_place(self, cls='place', typ='locality', - rank_search=30, rank_address=30, sector=20): - next_id = next(self.placex_id) - with self.conn.cursor() as cur: - cur.execute("""INSERT INTO placex - (place_id, class, type, rank_search, rank_address, - indexed_status, geometry_sector) - VALUES (%s, %s, %s, %s, %s, 1, %s)""", - (next_id, cls, typ, rank_search, rank_address, sector)) - return next_id - - def add_admin(self, **kwargs): - kwargs['cls'] = 'boundary' - kwargs['typ'] = 'administrative' - return self.add_place(**kwargs) - - def add_osmline(self, sector=20): - next_id = next(self.osmline_id) - with self.conn.cursor() as cur: - cur.execute("""INSERT INTO location_property_osmline - (place_id, osm_id, indexed_status, geometry_sector) - VALUES (%s, %s, 1, %s)""", - (next_id, next_id, sector)) - return next_id - - def add_postcode(self, country, postcode): - next_id = next(self.postcode_id) - with self.conn.cursor() as cur: - cur.execute("""INSERT INTO location_postcodes - (place_id, indexed_status, country_code, postcode) - VALUES (%s, 1, %s, %s)""", - (next_id, country, postcode)) - return next_id - def placex_unindexed(self): return self.scalar('SELECT count(*) from placex where indexed_status > 0') @@ -145,148 +72,133 @@ class IndexerTestDB: return self.scalar("""SELECT count(*) from location_property_osmline WHERE indexed_status > 0""") + @pytest.mark.parametrize("threads", [1, 15]) + @pytest.mark.asyncio + async def test_index_all_by_rank(self, dsn, threads, placex_row, osmline_row): + for rank in range(31): + placex_row(rank_address=rank, rank_search=rank, indexed_status=1) + osmline_row() -@pytest.fixture -def test_db(temp_db_conn): - yield IndexerTestDB(temp_db_conn) + assert self.placex_unindexed() == 31 + assert self.osmline_unindexed() == 1 + idx = indexer.Indexer(dsn, self.tokenizer, threads) + await idx.index_by_rank(0, 30) -@pytest.fixture -def test_tokenizer(tokenizer_mock, project_env): - return factory.create_tokenizer(project_env) + assert self.placex_unindexed() == 0 + assert self.osmline_unindexed() == 0 + assert self.scalar("""SELECT count(*) from placex + WHERE indexed_status = 0 and indexed_date is null""") == 0 + # ranks come in order of rank address + assert self.scalar(""" + SELECT count(*) FROM placex p WHERE rank_address > 0 + AND indexed_date >= (SELECT min(indexed_date) FROM placex o + WHERE p.rank_address < o.rank_address)""") == 0 + # placex address ranked objects come before interpolations + assert self.scalar( + """SELECT count(*) FROM placex WHERE rank_address > 0 + AND indexed_date > + (SELECT min(indexed_date) FROM location_property_osmline)""") == 0 + # rank 0 comes after all other placex objects + assert self.scalar( + """SELECT count(*) FROM placex WHERE rank_address > 0 + AND indexed_date > + (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0 -@pytest.mark.parametrize("threads", [1, 15]) -@pytest.mark.asyncio -async def test_index_all_by_rank(test_db, threads, test_tokenizer): - for rank in range(31): - test_db.add_place(rank_address=rank, rank_search=rank) - test_db.add_osmline() + @pytest.mark.parametrize("threads", [1, 15]) + @pytest.mark.asyncio + async def test_index_partial_without_30(self, dsn, threads, placex_row, osmline_row): + for rank in range(31): + placex_row(rank_address=rank, rank_search=rank, indexed_status=1) + osmline_row() - assert test_db.placex_unindexed() == 31 - assert test_db.osmline_unindexed() == 1 + assert self.placex_unindexed() == 31 + assert self.osmline_unindexed() == 1 - idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - await idx.index_by_rank(0, 30) + idx = indexer.Indexer(dsn, self.tokenizer, threads) + await idx.index_by_rank(4, 15) - assert test_db.placex_unindexed() == 0 - assert test_db.osmline_unindexed() == 0 + assert self.placex_unindexed() == 19 + assert self.osmline_unindexed() == 1 - assert test_db.scalar("""SELECT count(*) from placex - WHERE indexed_status = 0 and indexed_date is null""") == 0 - # ranks come in order of rank address - assert test_db.scalar(""" - SELECT count(*) FROM placex p WHERE rank_address > 0 - AND indexed_date >= (SELECT min(indexed_date) FROM placex o - WHERE p.rank_address < o.rank_address)""") == 0 - # placex address ranked objects come before interpolations - assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address > 0 - AND indexed_date > - (SELECT min(indexed_date) FROM location_property_osmline)""") == 0 - # rank 0 comes after all other placex objects - assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address > 0 - AND indexed_date > - (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0 + assert self.scalar(""" + SELECT count(*) FROM placex + WHERE indexed_status = 0 AND not rank_address between 4 and 15""") == 0 + @pytest.mark.parametrize("threads", [1, 15]) + @pytest.mark.asyncio + async def test_index_partial_with_30(self, dsn, threads, placex_row, osmline_row): + for rank in range(31): + placex_row(rank_address=rank, rank_search=rank, indexed_status=1) + osmline_row() -@pytest.mark.parametrize("threads", [1, 15]) -@pytest.mark.asyncio -async def test_index_partial_without_30(test_db, threads, test_tokenizer): - for rank in range(31): - test_db.add_place(rank_address=rank, rank_search=rank) - test_db.add_osmline() + assert self.placex_unindexed() == 31 + assert self.osmline_unindexed() == 1 - assert test_db.placex_unindexed() == 31 - assert test_db.osmline_unindexed() == 1 + idx = indexer.Indexer(dsn, self.tokenizer, threads) + await idx.index_by_rank(28, 30) - idx = indexer.Indexer('dbname=test_nominatim_python_unittest', - test_tokenizer, threads) - await idx.index_by_rank(4, 15) + assert self.placex_unindexed() == 28 + assert self.osmline_unindexed() == 0 - assert test_db.placex_unindexed() == 19 - assert test_db.osmline_unindexed() == 1 + assert self.scalar(""" + SELECT count(*) FROM placex + WHERE indexed_status = 0 AND rank_address between 0 and 27""") == 0 - assert test_db.scalar(""" - SELECT count(*) FROM placex - WHERE indexed_status = 0 AND not rank_address between 4 and 15""") == 0 + @pytest.mark.parametrize("threads", [1, 15]) + @pytest.mark.asyncio + async def test_index_boundaries(self, dsn, threads, placex_row, osmline_row): + for rank in range(4, 10): + placex_row(cls='boundary', typ='administrative', + rank_address=rank, rank_search=rank, indexed_status=1) + for rank in range(31): + placex_row(rank_address=rank, rank_search=rank, indexed_status=1) + osmline_row() + assert self.placex_unindexed() == 37 + assert self.osmline_unindexed() == 1 -@pytest.mark.parametrize("threads", [1, 15]) -@pytest.mark.asyncio -async def test_index_partial_with_30(test_db, threads, test_tokenizer): - for rank in range(31): - test_db.add_place(rank_address=rank, rank_search=rank) - test_db.add_osmline() + idx = indexer.Indexer(dsn, self.tokenizer, threads) + await idx.index_boundaries() - assert test_db.placex_unindexed() == 31 - assert test_db.osmline_unindexed() == 1 + assert self.placex_unindexed() == 31 + assert self.osmline_unindexed() == 1 - idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - await idx.index_by_rank(28, 30) + assert self.scalar(""" + SELECT count(*) FROM placex + WHERE indexed_status = 0 AND class != 'boundary'""") == 0 - assert test_db.placex_unindexed() == 28 - assert test_db.osmline_unindexed() == 0 + @pytest.mark.parametrize("threads", [1, 15]) + @pytest.mark.asyncio + async def test_index_postcodes(self, dsn, threads, postcode_row): + for postcode in range(1000): + postcode_row(country='de', postcode=postcode) + for postcode in range(32000, 33000): + postcode_row(country='us', postcode=postcode) - assert test_db.scalar(""" - SELECT count(*) FROM placex - WHERE indexed_status = 0 AND rank_address between 0 and 27""") == 0 + idx = indexer.Indexer(dsn, self.tokenizer, threads) + await idx.index_postcodes() + assert self.scalar("""SELECT count(*) FROM location_postcodes + WHERE indexed_status != 0""") == 0 -@pytest.mark.parametrize("threads", [1, 15]) -@pytest.mark.asyncio -async def test_index_boundaries(test_db, threads, test_tokenizer): - for rank in range(4, 10): - test_db.add_admin(rank_address=rank, rank_search=rank) - for rank in range(31): - test_db.add_place(rank_address=rank, rank_search=rank) - test_db.add_osmline() + @pytest.mark.parametrize("analyse", [True, False]) + @pytest.mark.asyncio + async def test_index_full(self, dsn, analyse, placex_row, osmline_row, postcode_row): + for rank in range(4, 10): + placex_row(cls='boundary', typ='administrative', + rank_address=rank, rank_search=rank, indexed_status=1) + for rank in range(31): + placex_row(rank_address=rank, rank_search=rank, indexed_status=1) + osmline_row() + for postcode in range(1000): + postcode_row(country='de', postcode=postcode) - assert test_db.placex_unindexed() == 37 - assert test_db.osmline_unindexed() == 1 + idx = indexer.Indexer(dsn, self.tokenizer, 4) + await idx.index_full(analyse=analyse) - idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - await idx.index_boundaries() - - assert test_db.placex_unindexed() == 31 - assert test_db.osmline_unindexed() == 1 - - assert test_db.scalar(""" - SELECT count(*) FROM placex - WHERE indexed_status = 0 AND class != 'boundary'""") == 0 - - -@pytest.mark.parametrize("threads", [1, 15]) -@pytest.mark.asyncio -async def test_index_postcodes(test_db, threads, test_tokenizer): - for postcode in range(1000): - test_db.add_postcode('de', postcode) - for postcode in range(32000, 33000): - test_db.add_postcode('us', postcode) - - idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - await idx.index_postcodes() - - assert test_db.scalar("""SELECT count(*) FROM location_postcodes - WHERE indexed_status != 0""") == 0 - - -@pytest.mark.parametrize("analyse", [True, False]) -@pytest.mark.asyncio -async def test_index_full(test_db, analyse, test_tokenizer): - for rank in range(4, 10): - test_db.add_admin(rank_address=rank, rank_search=rank) - for rank in range(31): - test_db.add_place(rank_address=rank, rank_search=rank) - test_db.add_osmline() - for postcode in range(1000): - test_db.add_postcode('de', postcode) - - idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4) - await idx.index_full(analyse=analyse) - - assert test_db.placex_unindexed() == 0 - assert test_db.osmline_unindexed() == 0 - assert test_db.scalar("""SELECT count(*) FROM location_postcodes - WHERE indexed_status != 0""") == 0 + assert self.placex_unindexed() == 0 + assert self.osmline_unindexed() == 0 + assert self.scalar("""SELECT count(*) FROM location_postcodes + WHERE indexed_status != 0""") == 0