mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-11 21:34:06 +00:00
require tokeinzer for indexer
This commit is contained in:
@@ -32,8 +32,11 @@ class UpdateIndex:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
def run(args):
|
def run(args):
|
||||||
from ..indexer.indexer import Indexer
|
from ..indexer.indexer import Indexer
|
||||||
|
from ..tokenizer import factory as tokenizer_factory
|
||||||
|
|
||||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||||
|
|
||||||
|
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||||
args.threads or psutil.cpu_count() or 1)
|
args.threads or psutil.cpu_count() or 1)
|
||||||
|
|
||||||
if not args.no_boundaries:
|
if not args.no_boundaries:
|
||||||
|
|||||||
@@ -83,6 +83,7 @@ class UpdateReplication:
|
|||||||
def _update(args):
|
def _update(args):
|
||||||
from ..tools import replication
|
from ..tools import replication
|
||||||
from ..indexer.indexer import Indexer
|
from ..indexer.indexer import Indexer
|
||||||
|
from ..tokenizer import factory as tokenizer_factory
|
||||||
|
|
||||||
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
|
params = args.osm2pgsql_options(default_cache=2000, default_threads=1)
|
||||||
params.update(base_url=args.config.REPLICATION_URL,
|
params.update(base_url=args.config.REPLICATION_URL,
|
||||||
@@ -106,6 +107,8 @@ class UpdateReplication:
|
|||||||
raise UsageError("Bad argument '--no-index'.")
|
raise UsageError("Bad argument '--no-index'.")
|
||||||
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL')
|
||||||
|
|
||||||
|
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
with connect(args.config.get_libpq_dsn()) as conn:
|
with connect(args.config.get_libpq_dsn()) as conn:
|
||||||
start = dt.datetime.now(dt.timezone.utc)
|
start = dt.datetime.now(dt.timezone.utc)
|
||||||
@@ -116,7 +119,7 @@ class UpdateReplication:
|
|||||||
|
|
||||||
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
|
if state is not replication.UpdateState.NO_CHANGES and args.do_index:
|
||||||
index_start = dt.datetime.now(dt.timezone.utc)
|
index_start = dt.datetime.now(dt.timezone.utc)
|
||||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||||
args.threads or 1)
|
args.threads or 1)
|
||||||
indexer.index_boundaries(0, 30)
|
indexer.index_boundaries(0, 30)
|
||||||
indexer.index_by_rank(0, 30)
|
indexer.index_by_rank(0, 30)
|
||||||
|
|||||||
@@ -123,7 +123,7 @@ class SetupAll:
|
|||||||
with connect(args.config.get_libpq_dsn()) as conn:
|
with connect(args.config.get_libpq_dsn()) as conn:
|
||||||
SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
|
SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
|
||||||
LOG.warning('Indexing places')
|
LOG.warning('Indexing places')
|
||||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer,
|
||||||
args.threads or psutil.cpu_count() or 1)
|
args.threads or psutil.cpu_count() or 1)
|
||||||
indexer.index_full(analyse=not args.index_noanalyse)
|
indexer.index_full(analyse=not args.index_noanalyse)
|
||||||
|
|
||||||
|
|||||||
@@ -79,8 +79,9 @@ class Indexer:
|
|||||||
""" Main indexing routine.
|
""" Main indexing routine.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, dsn, num_threads):
|
def __init__(self, dsn, tokenizer, num_threads):
|
||||||
self.dsn = dsn
|
self.dsn = dsn
|
||||||
|
self.tokenizer = tokenizer
|
||||||
self.num_threads = num_threads
|
self.num_threads = num_threads
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ Feature: Creation of search terms
|
|||||||
When importing
|
When importing
|
||||||
Then search_name contains
|
Then search_name contains
|
||||||
| object | nameaddress_vector |
|
| object | nameaddress_vector |
|
||||||
| N1 | Rose, Street, Walltown |
|
| N1 | #Rose Street, Walltown |
|
||||||
When searching for "23 Rose Street, Walltown"
|
When searching for "23 Rose Street, Walltown"
|
||||||
Then results contain
|
Then results contain
|
||||||
| osm_type | osm_id | name |
|
| osm_type | osm_id | name |
|
||||||
@@ -248,7 +248,7 @@ Feature: Creation of search terms
|
|||||||
When importing
|
When importing
|
||||||
Then search_name contains
|
Then search_name contains
|
||||||
| object | name_vector | nameaddress_vector |
|
| object | name_vector | nameaddress_vector |
|
||||||
| N1 | #Green Moss | Rose, Street, Walltown |
|
| N1 | #Green Moss | #Rose Street, Walltown |
|
||||||
When searching for "Green Moss, Rose Street, Walltown"
|
When searching for "Green Moss, Rose Street, Walltown"
|
||||||
Then results contain
|
Then results contain
|
||||||
| osm_type | osm_id | name |
|
| osm_type | osm_id | name |
|
||||||
@@ -299,7 +299,7 @@ Feature: Creation of search terms
|
|||||||
When importing
|
When importing
|
||||||
Then search_name contains
|
Then search_name contains
|
||||||
| object | name_vector | nameaddress_vector |
|
| object | name_vector | nameaddress_vector |
|
||||||
| N1 | foo | the road |
|
| N1 | foo | #the road |
|
||||||
|
|
||||||
Scenario: Some addr: tags are added to address
|
Scenario: Some addr: tags are added to address
|
||||||
Given the scene roads-with-pois
|
Given the scene roads-with-pois
|
||||||
|
|||||||
@@ -109,7 +109,7 @@ def import_and_index_data_from_place_table(context):
|
|||||||
|
|
||||||
# Call directly as the refresh function does not include postcodes.
|
# Call directly as the refresh function does not include postcodes.
|
||||||
indexer.LOG.setLevel(logging.ERROR)
|
indexer.LOG.setLevel(logging.ERROR)
|
||||||
indexer.Indexer(context.nominatim.get_libpq_dsn(), 1).index_full(analyse=False)
|
indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
|
||||||
|
|
||||||
check_database_integrity(context)
|
check_database_integrity(context)
|
||||||
|
|
||||||
@@ -234,7 +234,7 @@ def check_search_name_contents(context, exclude):
|
|||||||
if exclude:
|
if exclude:
|
||||||
assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
|
assert not present, "Found term for {}/{}: {}".format(row['object'], name, wid[1])
|
||||||
else:
|
else:
|
||||||
assert present, "Missing term for {}/{}: {}".fromat(row['object'], name, wid[1])
|
assert present, "Missing term for {}/{}: {}".format(row['object'], name, wid[1])
|
||||||
elif name != 'object':
|
elif name != 'object':
|
||||||
assert db_row.contains(name, value), db_row.assert_msg(name, value)
|
assert db_row.contains(name, value), db_row.assert_msg(name, value)
|
||||||
|
|
||||||
|
|||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import importlib
|
||||||
import itertools
|
import itertools
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
@@ -15,6 +16,9 @@ sys.path.insert(0, str(SRC_DIR.resolve()))
|
|||||||
from nominatim.config import Configuration
|
from nominatim.config import Configuration
|
||||||
from nominatim.db import connection
|
from nominatim.db import connection
|
||||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||||
|
from nominatim.db import properties
|
||||||
|
|
||||||
|
import dummy_tokenizer
|
||||||
|
|
||||||
class _TestingCursor(psycopg2.extras.DictCursor):
|
class _TestingCursor(psycopg2.extras.DictCursor):
|
||||||
""" Extension to the DictCursor class that provides execution
|
""" Extension to the DictCursor class that provides execution
|
||||||
@@ -292,3 +296,17 @@ def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
|
|||||||
sql=tmp_path, data=SRC_DIR / 'data')
|
sql=tmp_path, data=SRC_DIR / 'data')
|
||||||
|
|
||||||
return SQLPreprocessor(temp_db_conn, cfg)
|
return SQLPreprocessor(temp_db_conn, cfg)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tokenizer_mock(monkeypatch, property_table, temp_db_conn):
|
||||||
|
""" Sets up the configuration so that the test dummy tokenizer will be
|
||||||
|
loaded.
|
||||||
|
"""
|
||||||
|
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
|
||||||
|
|
||||||
|
def _import_dummy(module, *args, **kwargs):
|
||||||
|
return dummy_tokenizer
|
||||||
|
|
||||||
|
monkeypatch.setattr(importlib, "import_module", _import_dummy)
|
||||||
|
properties.set_property(temp_db_conn, 'tokenizer', 'dummy')
|
||||||
|
|||||||
@@ -57,6 +57,22 @@ def mock_func_factory(monkeypatch):
|
|||||||
return get_mock
|
return get_mock
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tokenizer_mock(monkeypatch):
|
||||||
|
class DummyTokenizer:
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self.update_sql_functions_called = False
|
||||||
|
|
||||||
|
def update_sql_functions(self, *args):
|
||||||
|
self.update_sql_functions_called = True
|
||||||
|
|
||||||
|
tok = DummyTokenizer()
|
||||||
|
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
|
||||||
|
lambda *args: tok)
|
||||||
|
|
||||||
|
return tok
|
||||||
|
|
||||||
|
|
||||||
def test_cli_help(capsys):
|
def test_cli_help(capsys):
|
||||||
""" Running nominatim tool without arguments prints help.
|
""" Running nominatim tool without arguments prints help.
|
||||||
"""
|
"""
|
||||||
@@ -221,7 +237,8 @@ def test_add_data_command(mock_run_legacy, name, oid):
|
|||||||
(['--boundaries-only'], 1, 0),
|
(['--boundaries-only'], 1, 0),
|
||||||
(['--no-boundaries'], 0, 1),
|
(['--no-boundaries'], 0, 1),
|
||||||
(['--boundaries-only', '--no-boundaries'], 0, 0)])
|
(['--boundaries-only', '--no-boundaries'], 0, 0)])
|
||||||
def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks):
|
def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
|
||||||
|
params, do_bnds, do_ranks):
|
||||||
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
|
temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)")
|
||||||
bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
|
bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries')
|
||||||
rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
|
rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank')
|
||||||
@@ -253,20 +270,12 @@ def test_refresh_command(mock_func_factory, temp_db, command, func):
|
|||||||
assert func_mock.called == 1
|
assert func_mock.called == 1
|
||||||
|
|
||||||
|
|
||||||
def test_refresh_create_functions(mock_func_factory, monkeypatch, temp_db):
|
def test_refresh_create_functions(mock_func_factory, temp_db, tokenizer_mock):
|
||||||
class DummyTokenizer:
|
|
||||||
def update_sql_functions(self, *args):
|
|
||||||
self.called = True
|
|
||||||
|
|
||||||
func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
|
func_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
|
||||||
tok = DummyTokenizer()
|
|
||||||
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
|
|
||||||
lambda *args: tok)
|
|
||||||
|
|
||||||
|
|
||||||
assert 0 == call_nominatim('refresh', '--functions')
|
assert 0 == call_nominatim('refresh', '--functions')
|
||||||
assert func_mock.called == 1
|
assert func_mock.called == 1
|
||||||
assert hasattr(tok, 'called')
|
assert tokenizer_mock.update_sql_functions_called
|
||||||
|
|
||||||
|
|
||||||
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
|
def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db):
|
||||||
|
|||||||
@@ -27,7 +27,7 @@ def call_nominatim(*args):
|
|||||||
cli_args=['replication'] + list(args))
|
cli_args=['replication'] + list(args))
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def index_mock(monkeypatch):
|
def index_mock(monkeypatch, tokenizer_mock):
|
||||||
mock = MockParamCapture()
|
mock = MockParamCapture()
|
||||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
|
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock)
|
||||||
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
|
monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock)
|
||||||
@@ -52,7 +52,7 @@ def init_status(temp_db_conn, status_table):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def update_mock(mock_func_factory, init_status):
|
def update_mock(mock_func_factory, init_status, tokenizer_mock):
|
||||||
return mock_func_factory(nominatim.tools.replication, 'update')
|
return mock_func_factory(nominatim.tools.replication, 'update')
|
||||||
|
|
||||||
@pytest.mark.parametrize("params,func", [
|
@pytest.mark.parametrize("params,func", [
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import psycopg2
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from nominatim.indexer import indexer
|
from nominatim.indexer import indexer
|
||||||
|
from nominatim.tokenizer import factory
|
||||||
|
|
||||||
class IndexerTestDB:
|
class IndexerTestDB:
|
||||||
|
|
||||||
@@ -115,8 +116,14 @@ def test_db(temp_db_conn):
|
|||||||
yield IndexerTestDB(temp_db_conn)
|
yield IndexerTestDB(temp_db_conn)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def test_tokenizer(tokenizer_mock, def_config, tmp_path):
|
||||||
|
def_config.project_dir = tmp_path
|
||||||
|
return factory.create_tokenizer(def_config)
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("threads", [1, 15])
|
@pytest.mark.parametrize("threads", [1, 15])
|
||||||
def test_index_all_by_rank(test_db, threads):
|
def test_index_all_by_rank(test_db, threads, test_tokenizer):
|
||||||
for rank in range(31):
|
for rank in range(31):
|
||||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||||
test_db.add_osmline()
|
test_db.add_osmline()
|
||||||
@@ -124,7 +131,7 @@ def test_index_all_by_rank(test_db, threads):
|
|||||||
assert 31 == test_db.placex_unindexed()
|
assert 31 == test_db.placex_unindexed()
|
||||||
assert 1 == test_db.osmline_unindexed()
|
assert 1 == test_db.osmline_unindexed()
|
||||||
|
|
||||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||||
idx.index_by_rank(0, 30)
|
idx.index_by_rank(0, 30)
|
||||||
|
|
||||||
assert 0 == test_db.placex_unindexed()
|
assert 0 == test_db.placex_unindexed()
|
||||||
@@ -155,7 +162,7 @@ def test_index_all_by_rank(test_db, threads):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("threads", [1, 15])
|
@pytest.mark.parametrize("threads", [1, 15])
|
||||||
def test_index_partial_without_30(test_db, threads):
|
def test_index_partial_without_30(test_db, threads, test_tokenizer):
|
||||||
for rank in range(31):
|
for rank in range(31):
|
||||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||||
test_db.add_osmline()
|
test_db.add_osmline()
|
||||||
@@ -163,7 +170,8 @@ def test_index_partial_without_30(test_db, threads):
|
|||||||
assert 31 == test_db.placex_unindexed()
|
assert 31 == test_db.placex_unindexed()
|
||||||
assert 1 == test_db.osmline_unindexed()
|
assert 1 == test_db.osmline_unindexed()
|
||||||
|
|
||||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
|
||||||
|
test_tokenizer, threads)
|
||||||
idx.index_by_rank(4, 15)
|
idx.index_by_rank(4, 15)
|
||||||
|
|
||||||
assert 19 == test_db.placex_unindexed()
|
assert 19 == test_db.placex_unindexed()
|
||||||
@@ -175,7 +183,7 @@ def test_index_partial_without_30(test_db, threads):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("threads", [1, 15])
|
@pytest.mark.parametrize("threads", [1, 15])
|
||||||
def test_index_partial_with_30(test_db, threads):
|
def test_index_partial_with_30(test_db, threads, test_tokenizer):
|
||||||
for rank in range(31):
|
for rank in range(31):
|
||||||
test_db.add_place(rank_address=rank, rank_search=rank)
|
test_db.add_place(rank_address=rank, rank_search=rank)
|
||||||
test_db.add_osmline()
|
test_db.add_osmline()
|
||||||
@@ -183,7 +191,7 @@ def test_index_partial_with_30(test_db, threads):
|
|||||||
assert 31 == test_db.placex_unindexed()
|
assert 31 == test_db.placex_unindexed()
|
||||||
assert 1 == test_db.osmline_unindexed()
|
assert 1 == test_db.osmline_unindexed()
|
||||||
|
|
||||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||||
idx.index_by_rank(28, 30)
|
idx.index_by_rank(28, 30)
|
||||||
|
|
||||||
assert 27 == test_db.placex_unindexed()
|
assert 27 == test_db.placex_unindexed()
|
||||||
@@ -194,7 +202,7 @@ def test_index_partial_with_30(test_db, threads):
|
|||||||
WHERE indexed_status = 0 AND rank_address between 1 and 27""")
|
WHERE indexed_status = 0 AND rank_address between 1 and 27""")
|
||||||
|
|
||||||
@pytest.mark.parametrize("threads", [1, 15])
|
@pytest.mark.parametrize("threads", [1, 15])
|
||||||
def test_index_boundaries(test_db, threads):
|
def test_index_boundaries(test_db, threads, test_tokenizer):
|
||||||
for rank in range(4, 10):
|
for rank in range(4, 10):
|
||||||
test_db.add_admin(rank_address=rank, rank_search=rank)
|
test_db.add_admin(rank_address=rank, rank_search=rank)
|
||||||
for rank in range(31):
|
for rank in range(31):
|
||||||
@@ -204,7 +212,7 @@ def test_index_boundaries(test_db, threads):
|
|||||||
assert 37 == test_db.placex_unindexed()
|
assert 37 == test_db.placex_unindexed()
|
||||||
assert 1 == test_db.osmline_unindexed()
|
assert 1 == test_db.osmline_unindexed()
|
||||||
|
|
||||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||||
idx.index_boundaries(0, 30)
|
idx.index_boundaries(0, 30)
|
||||||
|
|
||||||
assert 31 == test_db.placex_unindexed()
|
assert 31 == test_db.placex_unindexed()
|
||||||
@@ -216,13 +224,13 @@ def test_index_boundaries(test_db, threads):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("threads", [1, 15])
|
@pytest.mark.parametrize("threads", [1, 15])
|
||||||
def test_index_postcodes(test_db, threads):
|
def test_index_postcodes(test_db, threads, test_tokenizer):
|
||||||
for postcode in range(1000):
|
for postcode in range(1000):
|
||||||
test_db.add_postcode('de', postcode)
|
test_db.add_postcode('de', postcode)
|
||||||
for postcode in range(32000, 33000):
|
for postcode in range(32000, 33000):
|
||||||
test_db.add_postcode('us', postcode)
|
test_db.add_postcode('us', postcode)
|
||||||
|
|
||||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||||
idx.index_postcodes()
|
idx.index_postcodes()
|
||||||
|
|
||||||
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
|
assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
|
||||||
@@ -230,7 +238,7 @@ def test_index_postcodes(test_db, threads):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("analyse", [True, False])
|
@pytest.mark.parametrize("analyse", [True, False])
|
||||||
def test_index_full(test_db, analyse):
|
def test_index_full(test_db, analyse, test_tokenizer):
|
||||||
for rank in range(4, 10):
|
for rank in range(4, 10):
|
||||||
test_db.add_admin(rank_address=rank, rank_search=rank)
|
test_db.add_admin(rank_address=rank, rank_search=rank)
|
||||||
for rank in range(31):
|
for rank in range(31):
|
||||||
@@ -239,7 +247,7 @@ def test_index_full(test_db, analyse):
|
|||||||
for postcode in range(1000):
|
for postcode in range(1000):
|
||||||
test_db.add_postcode('de', postcode)
|
test_db.add_postcode('de', postcode)
|
||||||
|
|
||||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', 4)
|
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
|
||||||
idx.index_full(analyse=analyse)
|
idx.index_full(analyse=analyse)
|
||||||
|
|
||||||
assert 0 == test_db.placex_unindexed()
|
assert 0 == test_db.placex_unindexed()
|
||||||
@@ -249,13 +257,13 @@ def test_index_full(test_db, analyse):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("threads", [1, 15])
|
@pytest.mark.parametrize("threads", [1, 15])
|
||||||
def test_index_reopen_connection(test_db, threads, monkeypatch):
|
def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
|
||||||
monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
|
monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
|
||||||
|
|
||||||
for _ in range(1000):
|
for _ in range(1000):
|
||||||
test_db.add_place(rank_address=30, rank_search=30)
|
test_db.add_place(rank_address=30, rank_search=30)
|
||||||
|
|
||||||
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
|
idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
|
||||||
idx.index_by_rank(28, 30)
|
idx.index_by_rank(28, 30)
|
||||||
|
|
||||||
assert 0 == test_db.placex_unindexed()
|
assert 0 == test_db.placex_unindexed()
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import pytest
|
|||||||
from nominatim.db import properties
|
from nominatim.db import properties
|
||||||
from nominatim.tokenizer import factory
|
from nominatim.tokenizer import factory
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
import dummy_tokenizer
|
from dummy_tokenizer import DummyTokenizer
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def test_config(def_config, tmp_path):
|
def test_config(def_config, tmp_path):
|
||||||
@@ -15,37 +15,27 @@ def test_config(def_config, tmp_path):
|
|||||||
return def_config
|
return def_config
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def tokenizer_import(monkeypatch):
|
|
||||||
monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
|
|
||||||
|
|
||||||
def _import_dummy(module, *args, **kwargs):
|
|
||||||
return dummy_tokenizer
|
|
||||||
|
|
||||||
monkeypatch.setattr(importlib, "import_module", _import_dummy)
|
|
||||||
|
|
||||||
|
|
||||||
def test_setup_dummy_tokenizer(temp_db_conn, test_config,
|
def test_setup_dummy_tokenizer(temp_db_conn, test_config,
|
||||||
tokenizer_import, property_table):
|
tokenizer_mock, property_table):
|
||||||
tokenizer = factory.create_tokenizer(test_config)
|
tokenizer = factory.create_tokenizer(test_config)
|
||||||
|
|
||||||
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
|
assert isinstance(tokenizer, DummyTokenizer)
|
||||||
assert tokenizer.init_state == "new"
|
assert tokenizer.init_state == "new"
|
||||||
assert (test_config.project_dir / 'tokenizer').is_dir()
|
assert (test_config.project_dir / 'tokenizer').is_dir()
|
||||||
|
|
||||||
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
|
assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
|
||||||
|
|
||||||
|
|
||||||
def test_setup_tokenizer_dir_exists(test_config, tokenizer_import, property_table):
|
def test_setup_tokenizer_dir_exists(test_config, tokenizer_mock, property_table):
|
||||||
(test_config.project_dir / 'tokenizer').mkdir()
|
(test_config.project_dir / 'tokenizer').mkdir()
|
||||||
|
|
||||||
tokenizer = factory.create_tokenizer(test_config)
|
tokenizer = factory.create_tokenizer(test_config)
|
||||||
|
|
||||||
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
|
assert isinstance(tokenizer, DummyTokenizer)
|
||||||
assert tokenizer.init_state == "new"
|
assert tokenizer.init_state == "new"
|
||||||
|
|
||||||
|
|
||||||
def test_setup_tokenizer_dir_failure(test_config, tokenizer_import, property_table):
|
def test_setup_tokenizer_dir_failure(test_config, tokenizer_mock, property_table):
|
||||||
(test_config.project_dir / 'tokenizer').write_text("foo")
|
(test_config.project_dir / 'tokenizer').write_text("foo")
|
||||||
|
|
||||||
with pytest.raises(UsageError):
|
with pytest.raises(UsageError):
|
||||||
@@ -59,16 +49,16 @@ def test_setup_bad_tokenizer_name(test_config, monkeypatch):
|
|||||||
factory.create_tokenizer(test_config)
|
factory.create_tokenizer(test_config)
|
||||||
|
|
||||||
def test_load_tokenizer(temp_db_conn, test_config,
|
def test_load_tokenizer(temp_db_conn, test_config,
|
||||||
tokenizer_import, property_table):
|
tokenizer_mock, property_table):
|
||||||
factory.create_tokenizer(test_config)
|
factory.create_tokenizer(test_config)
|
||||||
|
|
||||||
tokenizer = factory.get_tokenizer_for_db(test_config)
|
tokenizer = factory.get_tokenizer_for_db(test_config)
|
||||||
|
|
||||||
assert isinstance(tokenizer, dummy_tokenizer.DummyTokenizer)
|
assert isinstance(tokenizer, DummyTokenizer)
|
||||||
assert tokenizer.init_state == "loaded"
|
assert tokenizer.init_state == "loaded"
|
||||||
|
|
||||||
|
|
||||||
def test_load_no_tokenizer_dir(test_config, tokenizer_import, property_table):
|
def test_load_no_tokenizer_dir(test_config, tokenizer_mock, property_table):
|
||||||
factory.create_tokenizer(test_config)
|
factory.create_tokenizer(test_config)
|
||||||
|
|
||||||
test_config.project_dir = test_config.project_dir / 'foo'
|
test_config.project_dir = test_config.project_dir / 'foo'
|
||||||
@@ -77,7 +67,7 @@ def test_load_no_tokenizer_dir(test_config, tokenizer_import, property_table):
|
|||||||
factory.get_tokenizer_for_db(test_config)
|
factory.get_tokenizer_for_db(test_config)
|
||||||
|
|
||||||
|
|
||||||
def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_import, property_table):
|
def test_load_missing_propoerty(temp_db_cursor, test_config, tokenizer_mock, property_table):
|
||||||
factory.create_tokenizer(test_config)
|
factory.create_tokenizer(test_config)
|
||||||
|
|
||||||
temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
|
temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
|
||||||
|
|||||||
Reference in New Issue
Block a user