Merge pull request #3257 from lonvia/slow-queries

Performance tweaks for search
This commit is contained in:
Sarah Hoffmann
2023-11-23 12:05:50 +01:00
committed by GitHub
5 changed files with 31 additions and 9 deletions

View File

@@ -235,6 +235,10 @@ class TextLogger(BaseLogger):
self.buffer = io.StringIO()
def _timestamp(self) -> None:
self._write(f'[{dt.datetime.now()}]\n')
def get_buffer(self) -> str:
return self.buffer.getvalue()
@@ -247,6 +251,7 @@ class TextLogger(BaseLogger):
def section(self, heading: str) -> None:
self._timestamp()
self._write(f"\n# {heading}\n\n")
@@ -283,6 +288,7 @@ class TextLogger(BaseLogger):
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
self._timestamp()
self._write(f'{heading}:\n')
total = 0
for rank, res in results:
@@ -298,6 +304,7 @@ class TextLogger(BaseLogger):
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
self._timestamp()
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
self._write(f"| {sqlstr}\n\n")

View File

@@ -208,7 +208,7 @@ class SearchBuilder:
and all(t.is_indexed for t in addr_partials)
exp_count = min(t.count for t in name_partials) / (2**(len(name_partials) - 1))
if (len(name_partials) > 3 or exp_count < 3000) and partials_indexed:
if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
return

View File

@@ -24,6 +24,13 @@ from nominatim.db.sqlalchemy_types import Geometry
#pylint: disable=singleton-comparison,not-callable
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
def no_index(expr: SaColumn) -> SaColumn:
""" Wrap the given expression, so that the query planner will
refrain from using the expression for index lookup.
"""
return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
""" Create a dictionary from search parameters that can be used
as bind parameter for SQL execute.
@@ -107,14 +114,14 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
orexpr: List[SaExpression] = []
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
orexpr.append(table.c.rank_address.between(1, 30))
orexpr.append(no_index(table.c.rank_address).between(1, 30))
elif layers & DataLayer.ADDRESS:
orexpr.append(table.c.rank_address.between(1, 29))
orexpr.append(sa.and_(table.c.rank_address == 30,
orexpr.append(no_index(table.c.rank_address).between(1, 29))
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
sa.or_(table.c.housenumber != None,
table.c.address.has_key('addr:housename'))))
elif layers & DataLayer.POI:
orexpr.append(sa.and_(table.c.rank_address == 30,
orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
table.c.class_.not_in(('place', 'building'))))
if layers & DataLayer.MANMADE:
@@ -124,7 +131,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
if not layers & DataLayer.NATURAL:
exclude.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
table.c.rank_address == 0))
no_index(table.c.rank_address) == 0))
else:
include = []
if layers & DataLayer.RAILWAY:
@@ -132,7 +139,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
if layers & DataLayer.NATURAL:
include.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
table.c.rank_address == 0))
no_index(table.c.rank_address) == 0))
if len(orexpr) == 1:
return orexpr[0]
@@ -295,7 +302,7 @@ class NearSearch(AbstractSearch):
else_ = tgeom.c.centroid.ST_Expand(0.05))))\
.order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
if details.countries:
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
if details.excluded:

View File

@@ -101,10 +101,16 @@ class ICUToken(qmod.Token):
penalty = 0.0
if row.type == 'w':
penalty = 0.3
elif row.type == 'W':
if len(row.word_token) == 1 and row.word_token == row.word:
penalty = 0.2 if row.word.isdigit() else 0.3
elif row.type == 'H':
penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
if all(not c.isdigit() for c in row.word_token):
penalty += 0.2 * (len(row.word_token) - 1)
elif row.type == 'C':
if len(row.word_token) == 1:
penalty = 0.3
if row.info is None:
lookup_word = row.word

View File

@@ -538,7 +538,9 @@ class SearchDetails(LookupDetails):
or (self.bounded_viewbox
and self.viewbox is not None and self.near is not None
and self.viewbox.contains(self.near))
or self.layers is not None and not self.layers)
or (self.layers is not None and not self.layers)
or (self.max_rank <= 4 and
self.layers is not None and not self.layers & DataLayer.ADDRESS))
def layer_enabled(self, layer: DataLayer) -> bool: