Merge pull request #3899 from lonvia/improve-reverse-performance

Streamline reverse lookup slightly
This commit is contained in:
Sarah Hoffmann
2025-12-07 09:39:10 +01:00
committed by GitHub
2 changed files with 96 additions and 66 deletions

View File

@@ -157,16 +157,19 @@ class ReverseGeocoder:
include.extend(('natural', 'water', 'waterway')) include.extend(('natural', 'water', 'waterway'))
return table.c.class_.in_(tuple(include)) return table.c.class_.in_(tuple(include))
async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]: async def _find_closest_street_or_pois(self, distance: float,
""" Look up the closest rank 26+ place in the database, which fuzziness: float) -> list[SaRow]:
is closer than the given distance. """ Look up the closest rank 26+ place in the database.
The function finds the object that is closest to the reverse
search point as well as all objects within 'fuzziness' distance
to that best result.
""" """
t = self.conn.t.placex t = self.conn.t.placex
# PostgreSQL must not get the distance as a parameter because # PostgreSQL must not get the distance as a parameter because
# there is a danger it won't be able to properly estimate index use # there is a danger it won't be able to properly estimate index use
# when used with prepared statements # when used with prepared statements
diststr = sa.text(f"{distance}") diststr = sa.text(f"{distance + fuzziness}")
sql: SaLambdaSelect = sa.lambda_stmt( sql: SaLambdaSelect = sa.lambda_stmt(
lambda: _select_from_placex(t) lambda: _select_from_placex(t)
@@ -174,9 +177,7 @@ class ReverseGeocoder:
.where(t.c.indexed_status == 0) .where(t.c.indexed_status == 0)
.where(t.c.linked_place_id == None) .where(t.c.linked_place_id == None)
.where(sa.or_(sa.not_(t.c.geometry.is_area()), .where(sa.or_(sa.not_(t.c.geometry.is_area()),
t.c.centroid.ST_Distance(WKT_PARAM) < diststr)) t.c.centroid.ST_Distance(WKT_PARAM) < diststr)))
.order_by('distance')
.limit(2))
if self.has_geometries(): if self.has_geometries():
sql = self._add_geometry_columns(sql, t.c.geometry) sql = self._add_geometry_columns(sql, t.c.geometry)
@@ -198,24 +199,39 @@ class ReverseGeocoder:
self._filter_by_layer(t))) self._filter_by_layer(t)))
if not restrict: if not restrict:
return None return []
sql = sql.where(sa.or_(*restrict)) inner = sql.where(sa.or_(*restrict)) \
.add_columns(t.c.geometry.label('_geometry')) \
.subquery()
# If the closest object is inside an area, then check if there is a # Use a window function to get the closest results to the best result.
# POI node nearby and return that. windowed = sa.select(inner,
prev_row = None sa.func.first_value(inner.c.distance)
for row in await self.conn.execute(sql, self.bind_params): .over(order_by=inner.c.distance)
if prev_row is None: .label('_min_distance'),
if row.rank_search <= 27 or row.osm_type == 'N' or row.distance > 0: sa.func.first_value(inner.c._geometry.ST_ClosestPoint(WKT_PARAM))
return row .over(order_by=inner.c.distance)
prev_row = row .label('_closest_point'),
else: sa.func.first_value(sa.case((sa.or_(inner.c.rank_search <= 27,
if row.rank_search > 27 and row.osm_type == 'N'\ inner.c.osm_type == 'N'), None),
and row.distance < 0.0001: else_=inner.c._geometry))
return row .over(order_by=inner.c.distance)
.label('_best_geometry')) \
.subquery()
return prev_row outer = sa.select(*(c for c in windowed.c if not c.key.startswith('_')),
windowed.c.centroid.ST_Distance(windowed.c._closest_point)
.label('best_distance'),
sa.case((sa.or_(windowed.c._best_geometry == None,
windowed.c.rank_search <= 27,
windowed.c.osm_type != 'N'), False),
else_=windowed.c.centroid.ST_CoveredBy(windowed.c._best_geometry))
.label('best_inside')) \
.where(windowed.c.distance < windowed.c._min_distance + fuzziness) \
.order_by(windowed.c.distance)
return list(await self.conn.execute(outer, self.bind_params))
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]: async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.placex t = self.conn.t.placex
@@ -301,55 +317,69 @@ class ReverseGeocoder:
""" Find a street or POI/address for the given WKT point. """ Find a street or POI/address for the given WKT point.
""" """
log().section('Reverse lookup on street/address level') log().section('Reverse lookup on street/address level')
distance = 0.006
parent_place_id = None
row = await self._find_closest_street_or_poi(distance)
row_func: RowFunc = nres.create_from_placex_row row_func: RowFunc = nres.create_from_placex_row
log().var_dump('Result (street/building)', row) distance = 0.006
# If the closest result was a street, but an address was requested, result = None
# check for a housenumber nearby which is part of the street. hnr_distance = None
if row is not None: parent_street = None
if self.max_rank > 27 \ for row in await self._find_closest_street_or_pois(distance, 0.001):
and self.layer_enabled(DataLayer.ADDRESS) \ if result is None:
and row.rank_address <= 27: log().var_dump('Closest result', row)
distance = 0.001 result = row
parent_place_id = row.place_id if self.max_rank > 27 \
log().comment('Find housenumber for street') and self.layer_enabled(DataLayer.ADDRESS) \
addr_row = await self._find_housenumber_for_street(parent_place_id) and result.rank_address <= 27:
log().var_dump('Result (street housenumber)', addr_row) parent_street = result.place_id
distance = 0.001
if addr_row is not None: else:
row = addr_row distance = row.distance
row_func = nres.create_from_placex_row # If the closest result was a street but an address was requested,
distance = addr_row.distance # see if we can refine the result with a housenumber closeby.
elif row.country_code == 'us' and parent_place_id is not None: elif parent_street is not None \
log().comment('Find TIGER housenumber for street') and row.rank_address > 27 \
addr_row = await self._find_tiger_number_for_street(parent_place_id) and row.best_distance < 0.001 \
log().var_dump('Result (street Tiger housenumber)', addr_row) and (hnr_distance is None or hnr_distance > row.best_distance) \
and row.parent_place_id == parent_street:
if addr_row is not None: log().var_dump('Housenumber to closest result', row)
row_func = cast(RowFunc, result = row
functools.partial(nres.create_from_tiger_row, hnr_distance = row.best_distance
osm_type=row.osm_type,
osm_id=row.osm_id))
row = addr_row
else:
distance = row.distance distance = row.distance
# If the closest object is inside an area, then check if there is
# a POI nearby and return that with preference.
elif result.osm_type != 'N' and result.rank_search > 27 \
and result.distance == 0 \
and row.best_inside:
log().var_dump('POI near closest result area', row)
result = row
break # it can't get better than that, everything else is farther away
# For the US also check the TIGER data, when no housenumber/POI was found.
if result is not None and parent_street is not None and hnr_distance is None \
and result.country_code == 'us':
log().comment('Find TIGER housenumber for street')
addr_row = await self._find_tiger_number_for_street(parent_street)
log().var_dump('Result (street Tiger housenumber)', addr_row)
if addr_row is not None:
row_func = cast(RowFunc,
functools.partial(nres.create_from_tiger_row,
osm_type=row.osm_type,
osm_id=row.osm_id))
result = addr_row
# Check for an interpolation that is either closer than our result # Check for an interpolation that is either closer than our result
# or belongs to a close street found. # or belongs to a close street found.
if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS): # No point in doing this when the result is already inside a building,
# i.e. when the distance is already 0.
if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS) and distance > 0:
log().comment('Find interpolation for street') log().comment('Find interpolation for street')
addr_row = await self._find_interpolation_for_street(parent_place_id, addr_row = await self._find_interpolation_for_street(parent_street, distance)
distance)
log().var_dump('Result (street interpolation)', addr_row) log().var_dump('Result (street interpolation)', addr_row)
if addr_row is not None: if addr_row is not None:
row = addr_row return addr_row, nres.create_from_osmline_row
row_func = nres.create_from_osmline_row
return row, row_func return result, row_func
async def _lookup_area_address(self) -> Optional[SaRow]: async def _lookup_area_address(self) -> Optional[SaRow]:
""" Lookup large addressable areas for the given WKT point. """ Lookup large addressable areas for the given WKT point.

View File

@@ -163,12 +163,12 @@ def test_reverse_housenumber_interpolation(apiobj, frontend, with_geom):
parent_place_id=990, parent_place_id=990,
rank_search=30, rank_address=30, rank_search=30, rank_address=30,
housenumber='23', housenumber='23',
centroid=(10.0, 10.00002)) centroid=(10.0, 10.0002))
apiobj.add_osmline(place_id=992, apiobj.add_osmline(place_id=992,
parent_place_id=990, parent_place_id=990,
startnumber=1, endnumber=3, step=1, startnumber=1, endnumber=3, step=1,
centroid=(10.0, 10.00001), centroid=(10.0, 10.0001),
geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)') geometry='LINESTRING(9.995 10.0001, 10.005 10.0001)')
apiobj.add_placex(place_id=1990, class_='highway', type='service', apiobj.add_placex(place_id=1990, class_='highway', type='service',
rank_search=27, rank_address=27, rank_search=27, rank_address=27,
name={'name': 'Other Street'}, name={'name': 'Other Street'},
@@ -177,8 +177,8 @@ def test_reverse_housenumber_interpolation(apiobj, frontend, with_geom):
apiobj.add_osmline(place_id=1992, apiobj.add_osmline(place_id=1992,
parent_place_id=1990, parent_place_id=1990,
startnumber=1, endnumber=3, step=1, startnumber=1, endnumber=3, step=1,
centroid=(10.0, 20.00001), centroid=(10.0, 20.0001),
geometry='LINESTRING(9.995 20.00001, 10.005 20.00001)') geometry='LINESTRING(9.995 20.0001, 10.005 20.0001)')
params = {'geometry_output': napi.GeometryFormat.TEXT} if with_geom else {} params = {'geometry_output': napi.GeometryFormat.TEXT} if with_geom else {}