Merge pull request #3155 from lonvia/caching-of-transliterators

Cache ICU transliterators between calls
This commit is contained in:
Sarah Hoffmann
2023-08-16 22:55:54 +02:00
committed by GitHub
2 changed files with 44 additions and 10 deletions

View File

@@ -7,7 +7,8 @@
""" """
Extended SQLAlchemy connection class that also includes access to the schema. Extended SQLAlchemy connection class that also includes access to the schema.
""" """
from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
Awaitable, Callable, TypeVar
import sqlalchemy as sa import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection from sqlalchemy.ext.asyncio import AsyncConnection
@@ -17,6 +18,8 @@ from nominatim.db.sqlalchemy_schema import SearchTables
from nominatim.db.sqlalchemy_types import Geometry from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.logging import log from nominatim.api.logging import log
T = TypeVar('T')
class SearchConnection: class SearchConnection:
""" An extended SQLAlchemy connection class, that also contains """ An extended SQLAlchemy connection class, that also contains
then table definitions. The underlying asynchronous SQLAlchemy then table definitions. The underlying asynchronous SQLAlchemy
@@ -61,11 +64,10 @@ class SearchConnection:
Raises a ValueError if the property does not exist. Raises a ValueError if the property does not exist.
""" """
if name.startswith('DB:'): lookup_name = f'DBPROP:{name}'
raise ValueError(f"Illegal property value '{name}'.")
if cached and name in self._property_cache: if cached and lookup_name in self._property_cache:
return cast(str, self._property_cache[name]) return cast(str, self._property_cache[lookup_name])
sql = sa.select(self.t.properties.c.value)\ sql = sa.select(self.t.properties.c.value)\
.where(self.t.properties.c.property == name) .where(self.t.properties.c.property == name)
@@ -74,7 +76,7 @@ class SearchConnection:
if value is None: if value is None:
raise ValueError(f"Property '{name}' not found in database.") raise ValueError(f"Property '{name}' not found in database.")
self._property_cache[name] = cast(str, value) self._property_cache[lookup_name] = cast(str, value)
return cast(str, value) return cast(str, value)
@@ -92,6 +94,29 @@ class SearchConnection:
return self._property_cache['DB:server_version'] return self._property_cache['DB:server_version']
async def get_cached_value(self, group: str, name: str,
factory: Callable[[], Awaitable[T]]) -> T:
""" Access the cache for this Nominatim instance.
Each cache value needs to belong to a group and have a name.
This function is for internal API use only.
`factory` is an async callback function that produces
the value if it is not already cached.
Returns the cached value or the result of factory (also caching
the result).
"""
full_name = f'{group}:{name}'
if full_name in self._property_cache:
return cast(T, self._property_cache[full_name])
value = await factory()
self._property_cache[full_name] = value
return value
async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]: async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
""" Lookup up if there is a classtype table for the given category """ Lookup up if there is a classtype table for the given category
and return a SQLAlchemy table for it, if it exists. and return a SQLAlchemy table for it, if it exists.

View File

@@ -133,10 +133,19 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
async def setup(self) -> None: async def setup(self) -> None:
""" Set up static data structures needed for the analysis. """ Set up static data structures needed for the analysis.
""" """
rules = await self.conn.get_property('tokenizer_import_normalisation') async def _make_normalizer() -> Any:
self.normalizer = Transliterator.createFromRules("normalization", rules) rules = await self.conn.get_property('tokenizer_import_normalisation')
rules = await self.conn.get_property('tokenizer_import_transliteration') return Transliterator.createFromRules("normalization", rules)
self.transliterator = Transliterator.createFromRules("transliteration", rules)
self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
_make_normalizer)
async def _make_transliterator() -> Any:
rules = await self.conn.get_property('tokenizer_import_transliteration')
return Transliterator.createFromRules("transliteration", rules)
self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
_make_transliterator)
if 'word' not in self.conn.t.meta.tables: if 'word' not in self.conn.t.meta.tables:
sa.Table('word', self.conn.t.meta, sa.Table('word', self.conn.t.meta,