Merge pull request #3155 from lonvia/caching-of-transliterators

Cache ICU transliterators between calls
This commit is contained in:
Sarah Hoffmann
2023-08-16 22:55:54 +02:00
committed by GitHub
2 changed files with 44 additions and 10 deletions

View File

@@ -7,7 +7,8 @@
"""
Extended SQLAlchemy connection class that also includes access to the schema.
"""
from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set
from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
Awaitable, Callable, TypeVar
import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection
@@ -17,6 +18,8 @@ from nominatim.db.sqlalchemy_schema import SearchTables
from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.logging import log
T = TypeVar('T')
class SearchConnection:
""" An extended SQLAlchemy connection class, that also contains
then table definitions. The underlying asynchronous SQLAlchemy
@@ -61,11 +64,10 @@ class SearchConnection:
Raises a ValueError if the property does not exist.
"""
if name.startswith('DB:'):
raise ValueError(f"Illegal property value '{name}'.")
lookup_name = f'DBPROP:{name}'
if cached and name in self._property_cache:
return cast(str, self._property_cache[name])
if cached and lookup_name in self._property_cache:
return cast(str, self._property_cache[lookup_name])
sql = sa.select(self.t.properties.c.value)\
.where(self.t.properties.c.property == name)
@@ -74,7 +76,7 @@ class SearchConnection:
if value is None:
raise ValueError(f"Property '{name}' not found in database.")
self._property_cache[name] = cast(str, value)
self._property_cache[lookup_name] = cast(str, value)
return cast(str, value)
@@ -92,6 +94,29 @@ class SearchConnection:
return self._property_cache['DB:server_version']
async def get_cached_value(self, group: str, name: str,
factory: Callable[[], Awaitable[T]]) -> T:
""" Access the cache for this Nominatim instance.
Each cache value needs to belong to a group and have a name.
This function is for internal API use only.
`factory` is an async callback function that produces
the value if it is not already cached.
Returns the cached value or the result of factory (also caching
the result).
"""
full_name = f'{group}:{name}'
if full_name in self._property_cache:
return cast(T, self._property_cache[full_name])
value = await factory()
self._property_cache[full_name] = value
return value
async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
""" Lookup up if there is a classtype table for the given category
and return a SQLAlchemy table for it, if it exists.

View File

@@ -133,10 +133,19 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
async def setup(self) -> None:
""" Set up static data structures needed for the analysis.
"""
rules = await self.conn.get_property('tokenizer_import_normalisation')
self.normalizer = Transliterator.createFromRules("normalization", rules)
rules = await self.conn.get_property('tokenizer_import_transliteration')
self.transliterator = Transliterator.createFromRules("transliteration", rules)
async def _make_normalizer() -> Any:
rules = await self.conn.get_property('tokenizer_import_normalisation')
return Transliterator.createFromRules("normalization", rules)
self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
_make_normalizer)
async def _make_transliterator() -> Any:
rules = await self.conn.get_property('tokenizer_import_transliteration')
return Transliterator.createFromRules("transliteration", rules)
self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
_make_transliterator)
if 'word' not in self.conn.t.meta.tables:
sa.Table('word', self.conn.t.meta,