mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
add factory for query analyzer
This commit is contained in:
@@ -21,10 +21,7 @@ from nominatim.typing import SaRow
|
|||||||
from nominatim.api.connection import SearchConnection
|
from nominatim.api.connection import SearchConnection
|
||||||
from nominatim.api.logging import log
|
from nominatim.api.logging import log
|
||||||
from nominatim.api.search import query as qmod
|
from nominatim.api.search import query as qmod
|
||||||
|
from nominatim.api.search.query_analyzer_factory import AbstractQueryAnalyzer
|
||||||
# XXX: TODO
|
|
||||||
class AbstractQueryAnalyzer:
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
DB_TO_TOKEN_TYPE = {
|
DB_TO_TOKEN_TYPE = {
|
||||||
|
|||||||
43
nominatim/api/search/query_analyzer_factory.py
Normal file
43
nominatim/api/search/query_analyzer_factory.py
Normal file
@@ -0,0 +1,43 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
#
|
||||||
|
# This file is part of Nominatim. (https://nominatim.org)
|
||||||
|
#
|
||||||
|
# Copyright (C) 2023 by the Nominatim developer community.
|
||||||
|
# For a full list of authors see the git log.
|
||||||
|
"""
|
||||||
|
Factory for creating a query analyzer for the configured tokenizer.
|
||||||
|
"""
|
||||||
|
from typing import List, cast
|
||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from pathlib import Path
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
from nominatim.api.logging import log
|
||||||
|
from nominatim.api.connection import SearchConnection
|
||||||
|
from nominatim.api.search.query import Phrase, QueryStruct
|
||||||
|
|
||||||
|
class AbstractQueryAnalyzer(ABC):
|
||||||
|
""" Class for analysing incomming queries.
|
||||||
|
|
||||||
|
Query analyzers are tied to the tokenizer used on import.
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
async def analyze_query(self, phrases: List[Phrase]) -> QueryStruct:
|
||||||
|
""" Analyze the given phrases and return the tokenized query.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
|
||||||
|
""" Create a query analyzer for the tokenizer used by the database.
|
||||||
|
"""
|
||||||
|
name = await conn.get_property('tokenizer')
|
||||||
|
|
||||||
|
src_file = Path(__file__).parent / f'{name}_tokenizer.py'
|
||||||
|
if not src_file.is_file():
|
||||||
|
log().comment(f"No tokenizer named '{name}' available. Database not set up properly.")
|
||||||
|
raise RuntimeError('Tokenizer not found')
|
||||||
|
|
||||||
|
module = importlib.import_module(f'nominatim.api.search.{name}_tokenizer')
|
||||||
|
|
||||||
|
return cast(AbstractQueryAnalyzer, await module.create_query_analyzer(conn))
|
||||||
57
test/python/api/search/test_query_analyzer_factory.py
Normal file
57
test/python/api/search/test_query_analyzer_factory.py
Normal file
@@ -0,0 +1,57 @@
|
|||||||
|
# SPDX-License-Identifier: GPL-3.0-or-later
|
||||||
|
#
|
||||||
|
# This file is part of Nominatim. (https://nominatim.org)
|
||||||
|
#
|
||||||
|
# Copyright (C) 2023 by the Nominatim developer community.
|
||||||
|
# For a full list of authors see the git log.
|
||||||
|
"""
|
||||||
|
Tests for query analyzer creation.
|
||||||
|
"""
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from nominatim.api import NominatimAPIAsync
|
||||||
|
from nominatim.api.search.query_analyzer_factory import make_query_analyzer
|
||||||
|
from nominatim.api.search.icu_tokenizer import ICUQueryAnalyzer
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_import_icu_tokenizer(table_factory):
|
||||||
|
table_factory('nominatim_properties',
|
||||||
|
definition='property TEXT, value TEXT',
|
||||||
|
content=(('tokenizer', 'icu'),
|
||||||
|
('tokenizer_import_normalisation', ':: lower();'),
|
||||||
|
('tokenizer_import_transliteration', "'1' > '/1/'; 'ä' > 'ä '")))
|
||||||
|
|
||||||
|
api = NominatimAPIAsync(Path('/invalid'), {})
|
||||||
|
async with api.begin() as conn:
|
||||||
|
ana = await make_query_analyzer(conn)
|
||||||
|
|
||||||
|
assert isinstance(ana, ICUQueryAnalyzer)
|
||||||
|
await api.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_import_missing_property(table_factory):
|
||||||
|
api = NominatimAPIAsync(Path('/invalid'), {})
|
||||||
|
table_factory('nominatim_properties',
|
||||||
|
definition='property TEXT, value TEXT')
|
||||||
|
|
||||||
|
async with api.begin() as conn:
|
||||||
|
with pytest.raises(ValueError, match='Property.*not found'):
|
||||||
|
await make_query_analyzer(conn)
|
||||||
|
await api.close()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.asyncio
|
||||||
|
async def test_import_missing_module(table_factory):
|
||||||
|
api = NominatimAPIAsync(Path('/invalid'), {})
|
||||||
|
table_factory('nominatim_properties',
|
||||||
|
definition='property TEXT, value TEXT',
|
||||||
|
content=(('tokenizer', 'missing'),))
|
||||||
|
|
||||||
|
async with api.begin() as conn:
|
||||||
|
with pytest.raises(RuntimeError, match='Tokenizer not found'):
|
||||||
|
await make_query_analyzer(conn)
|
||||||
|
await api.close()
|
||||||
|
|
||||||
Reference in New Issue
Block a user