mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 19:07:58 +00:00
add factory for query analyzer
This commit is contained in:
@@ -21,10 +21,7 @@ from nominatim.typing import SaRow
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.search import query as qmod
|
||||
|
||||
# XXX: TODO
|
||||
class AbstractQueryAnalyzer:
|
||||
pass
|
||||
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
|
||||
|
||||
|
||||
DB_TO_TOKEN_TYPE = {
|
||||
|
||||
43
nominatim/api/search/query_analyzer_factory.py
Normal file
43
nominatim/api/search/query_analyzer_factory.py
Normal file
@@ -0,0 +1,43 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Factory for creating a query analyzer for the configured tokenizer.
|
||||
"""
|
||||
from typing import List, cast
|
||||
from abc import ABC, abstractmethod
|
||||
from pathlib import Path
|
||||
import importlib
|
||||
|
||||
from nominatim.api.logging import log
|
||||
from nominatim.api.connection import SearchConnection
|
||||
from nominatim.api.search.query import Phrase, QueryStruct
|
||||
|
||||
class AbstractQueryAnalyzer(ABC):
|
||||
""" Class for analysing incomming queries.
|
||||
|
||||
Query analyzers are tied to the tokenizer used on import.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
async def analyze_query(self, phrases: List[Phrase]) -> QueryStruct:
|
||||
""" Analyze the given phrases and return the tokenized query.
|
||||
"""
|
||||
|
||||
|
||||
async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||
""" Create a query analyzer for the tokenizer used by the database.
|
||||
"""
|
||||
name = await conn.get_property('tokenizer')
|
||||
|
||||
src_file = Path(__file__).parent / f'{name}_tokenizer.py'
|
||||
if not src_file.is_file():
|
||||
log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
|
||||
raise RuntimeError('Tokenizer not found')
|
||||
|
||||
module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer')
|
||||
|
||||
return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))
|
||||
57
test/python/api/search/test_query_analyzer_factory.py
Normal file
57
test/python/api/search/test_query_analyzer_factory.py
Normal file
@@ -0,0 +1,57 @@
|
||||
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for query analyzer creation.
|
||||
"""
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from nominatim.api import NominatimAPIAsync
|
||||
from nominatim.api.search.query_analyzer_factory import make_query_analyzer
|
||||
from nominatim.api.search.icu_tokenizer import ICUQueryAnalyzer
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_import_icu_tokenizer(table_factory):
|
||||
table_factory('nominatim_properties',
|
||||
definition='property TEXT, value TEXT',
|
||||
content=(('tokenizer', 'icu'),
|
||||
('tokenizer_import_normalisation', ':: lower();'),
|
||||
('tokenizer_import_transliteration', "'1' > '/1/'; 'ä' > 'ä '")))
|
||||
|
||||
api = NominatimAPIAsync(Path('/invalid'), {})
|
||||
async with api.begin() as conn:
|
||||
ana = await make_query_analyzer(conn)
|
||||
|
||||
assert isinstance(ana, ICUQueryAnalyzer)
|
||||
await api.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_import_missing_property(table_factory):
|
||||
api = NominatimAPIAsync(Path('/invalid'), {})
|
||||
table_factory('nominatim_properties',
|
||||
definition='property TEXT, value TEXT')
|
||||
|
||||
async with api.begin() as conn:
|
||||
with pytest.raises(ValueError, match='Property.*not found'):
|
||||
await make_query_analyzer(conn)
|
||||
await api.close()
|
||||
|
||||
|
||||
@pytest.mark.asyncio
|
||||
async def test_import_missing_module(table_factory):
|
||||
api = NominatimAPIAsync(Path('/invalid'), {})
|
||||
table_factory('nominatim_properties',
|
||||
definition='property TEXT, value TEXT',
|
||||
content=(('tokenizer', 'missing'),))
|
||||
|
||||
async with api.begin() as conn:
|
||||
with pytest.raises(RuntimeError, match='Tokenizer not found'):
|
||||
await make_query_analyzer(conn)
|
||||
await api.close()
|
||||
|
||||
Reference in New Issue
Block a user