don't restrict to viewbox for frequent terms

All searched places may be outside the viewbox in which case the
restriction means that there are no results at all. Add the penalty for
being outside the viewbox earlier instead and then cut the list.
This commit is contained in:
Sarah Hoffmann
2025-08-06 17:27:52 +02:00
parent b325413486
commit 0045203092
2 changed files with 14 additions and 16 deletions

View File

@@ -63,8 +63,7 @@ class PlaceSearch(base.AbstractSearch):
t.c.name_vector, t.c.nameaddress_vector, t.c.name_vector, t.c.nameaddress_vector,
sa.case((t.c.importance > 0, t.c.importance), sa.case((t.c.importance > 0, t.c.importance),
else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75)) else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
.label('importance'), .label('importance'))
penalty.label('penalty'))
for lookup in self.lookups: for lookup in self.lookups:
sql = sql.where(lookup.sql_condition(t)) sql = sql.where(lookup.sql_condition(t))
@@ -88,10 +87,10 @@ class PlaceSearch(base.AbstractSearch):
sql = sql.where(t.c.centroid sql = sql.where(t.c.centroid
.intersects(VIEWBOX_PARAM, .intersects(VIEWBOX_PARAM,
use_index=details.viewbox.area < 0.2)) use_index=details.viewbox.area < 0.2))
elif not self.postcodes and self.expected_count >= 10000: else:
sql = sql.where(t.c.centroid penalty += sa.case((t.c.centroid.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
.intersects(VIEWBOX2_PARAM, (t.c.centroid.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
use_index=details.viewbox.area < 0.5)) else_=1.0)
if details.near is not None and details.near_radius is not None: if details.near is not None and details.near_radius is not None:
if details.near_radius < 0.1: if details.near_radius < 0.1:
@@ -110,6 +109,8 @@ class PlaceSearch(base.AbstractSearch):
sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM, sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM,
t.c.search_rank <= MAX_RANK_PARAM)) t.c.search_rank <= MAX_RANK_PARAM))
sql = sql.add_columns(penalty.label('penalty'))
inner = sql.limit(5000 if self.qualifiers else 1000)\ inner = sql.limit(5000 if self.qualifiers else 1000)\
.order_by(sa.desc(sa.text('importance')))\ .order_by(sa.desc(sa.text('importance')))\
.subquery() .subquery()
@@ -121,8 +122,8 @@ class PlaceSearch(base.AbstractSearch):
# If the query is not an address search or has a geographic preference, # If the query is not an address search or has a geographic preference,
# preselect most important items to restrict the number of places # preselect most important items to restrict the number of places
# that need to be looked up in placex. # that need to be looked up in placex.
if (details.viewbox is None or details.bounded_viewbox)\ if (details.viewbox is None or not details.bounded_viewbox)\
and (details.near is None or details.near_radius is not None)\ and (details.near is None or details.near_radius is None)\
and not self.qualifiers: and not self.qualifiers:
sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance) sql = sql.add_columns(sa.func.first_value(inner.c.penalty - inner.c.importance)
.over(order_by=inner.c.penalty - inner.c.importance) .over(order_by=inner.c.penalty - inner.c.importance)
@@ -166,11 +167,6 @@ class PlaceSearch(base.AbstractSearch):
# The postcode search needs to get priority here. # The postcode search needs to get priority here.
penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0), else_=1.0) penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0), else_=1.0)
if details.viewbox is not None and not details.bounded_viewbox:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM, use_index=False), 0.0),
(t.c.geometry.intersects(VIEWBOX2_PARAM, use_index=False), 0.5),
else_=1.0)
if details.near is not None: if details.near is not None:
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM)) sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance')) .label('importance'))

View File

@@ -156,9 +156,11 @@ class TestNameOnlySearches:
assert result.place_id == 333 assert result.place_id == 333
assert len(geom['coordinates']) == npoints assert len(geom['coordinates']) == npoints
@pytest.mark.parametrize('viewbox', ['5.0,4.0,6.0,5.0', '5.7,4.0,6.0,5.0']) @pytest.mark.parametrize('viewbox,rids', [('5.0,4.0,6.0,5.0', [100]),
@pytest.mark.parametrize('wcount,rids', [(2, [100, 101]), (20000, [100])]) ('5.7,4.0,6.0,5.0', [100, 101]),
def test_prefer_viewbox(self, apiobj, frontend, viewbox, wcount, rids): ('10.0,10.0,11.0,11.0', [101, 100])])
@pytest.mark.parametrize('wcount', [2, 50000])
def test_prefer_viewbox(self, apiobj, frontend, viewbox, rids, wcount):
lookup = FieldLookup('name_vector', [1, 2], LookupAll) lookup = FieldLookup('name_vector', [1, 2], LookupAll)
ranking = FieldRanking('name_vector', 0.2, [RankedTokens(0.0, [21])]) ranking = FieldRanking('name_vector', 0.2, [RankedTokens(0.0, [21])])