add server fronting for search endpoint

This also implements some of the quirks of free-text search of the V1 API, in particular, search for categories and coordinates.
2026-03-12 13:54:07 +00:00 · 2023-05-26 11:40:45 +02:00
parent c7db69a30c
commit 371a780ef4
6 changed files with 535 additions and 14 deletions
--- a/nominatim/api/v1/helpers.py
+++ b/nominatim/api/v1/helpers.py
@@ -8,8 +8,12 @@
 Helper function for parsing parameters and and outputting data
 specifically for the v1 version of the API.
 """
+from typing import Tuple, Optional, Any, Dict, Iterable
+from itertools import chain
+import re

 from nominatim.api.results import SearchResult, SearchResults, SourceTable
+from nominatim.api.types import SearchDetails, GeometryFormat

 REVERSE_MAX_RANKS = [2, 2, 2,   # 0-2   Continent/Sea
                     4, 4,      # 3-4   Country
@@ -33,6 +37,58 @@ def zoom_to_rank(zoom: int) -> int:
    return REVERSE_MAX_RANKS[max(0, min(18, zoom))]


+FEATURE_TYPE_TO_RANK: Dict[Optional[str], Any] = {
+    'country': (4, 4),
+    'state': (8, 8),
+    'city': (14, 16),
+    'settlement': (8, 20)
+}
+
+
+def feature_type_to_rank(feature_type: Optional[str]) -> Tuple[int, int]:
+    """ Convert a feature type parameter to a tuple of
+        feature type name, minimum rank and maximum rank.
+    """
+    return FEATURE_TYPE_TO_RANK.get(feature_type, (0, 30))
+
+
+#pylint: disable=too-many-arguments
+def extend_query_parts(queryparts: dict[str, Any], details: dict[str, Any],
+                       feature_type: Optional[str],
+                       namedetails: bool, extratags: bool,
+                       excluded: Iterable[str]) -> None:
+    """ Add parameters from details dictionary to the query parts
+        dictionary which is suitable as URL parameter dictionary.
+    """
+    parsed = SearchDetails.from_kwargs(details)
+    if parsed.geometry_output != GeometryFormat.NONE:
+        for flag in parsed.geometry_output:
+            assert flag.name
+            queryparts[f'polygon_{flag.name.lower()}'] = '1'
+    if parsed.address_details:
+        queryparts['addressdetails'] = '1'
+    if namedetails:
+        queryparts['namedetails'] = '1'
+    if extratags:
+        queryparts['extratags'] = '1'
+    if parsed.geometry_simplification > 0.0:
+        queryparts['polygon_threshold'] = f"{parsed.geometry_simplification:.6g}"
+    if parsed.max_results != 10:
+        queryparts['limit'] = str(parsed.max_results)
+    if parsed.countries:
+        queryparts['countrycodes'] = ','.join(parsed.countries)
+    queryparts['exclude_place_ids'] = \
+        ','.join(chain(excluded, map(str, parsed.excluded)))
+    if parsed.viewbox:
+        queryparts['viewbox'] = ','.join(f"{c:.7g}" for c in parsed.viewbox.coords)
+    if parsed.bounded_viewbox:
+        queryparts['bounded'] = '1'
+    if not details['dedupe']:
+        queryparts['dedupe'] = '0'
+    if feature_type in FEATURE_TYPE_TO_RANK:
+        queryparts['featureType'] = feature_type
+
+
 def deduplicate_results(results: SearchResults, max_results: int) -> SearchResults:
    """ Remove results that look like duplicates.

@@ -69,3 +125,69 @@ def _is_postcode_relation_for(result: SearchResult, postcode: str) -> bool:
           and result.category == ('boundary', 'postal_code') \
           and result.names is not None \
           and result.names.get('ref') == postcode
+
+
+def _deg(axis:str) -> str:
+    return f"(?P<{axis}_deg>\\d+\\.\\d+)°?"
+
+def _deg_min(axis: str) -> str:
+    return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>[\\d.]+)?[′']*"
+
+def _deg_min_sec(axis: str) -> str:
+    return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>\\d+)[′'\\s]+(?P<{axis}_sec>[\\d.]+)?[\"″]*"
+
+COORD_REGEX = [re.compile(r'(?:(?P<pre>.*?)\s+)??' + r + r'(?:\s+(?P<post>.*))?') for r in (
+    r"(?P<ns>[NS])\s*" + _deg('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg('lon'),
+    _deg('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg('lon') + r"\s*(?P<ew>[EW])",
+    r"(?P<ns>[NS])\s*" + _deg_min('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg_min('lon'),
+    _deg_min('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg_min('lon') + r"\s*(?P<ew>[EW])",
+    r"(?P<ns>[NS])\s*" + _deg_min_sec('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg_min_sec('lon'),
+    _deg_min_sec('lat') + r"\s*(?P<ns>[NS])[\s,]+" + _deg_min_sec('lon') + r"\s*(?P<ew>[EW])",
+    r"\[?(?P<lat_deg>[+-]?\d+\.\d+)[\s,]+(?P<lon_deg>[+-]?\d+\.\d+)\]?"
+)]
+
+def extract_coords_from_query(query: str) -> Tuple[str, Optional[float], Optional[float]]:
+    """ Look for something that is formated like a coordinate at the
+        beginning or end of the query. If found, extract the coordinate and
+        return the remaining query (or the empty string if the query
+        consisted of nothing but a coordinate).
+
+        Only the first match will be returned.
+    """
+    for regex in COORD_REGEX:
+        match = regex.fullmatch(query)
+        if match is None:
+            continue
+        groups = match.groupdict()
+        if not groups['pre'] or not groups['post']:
+            x = float(groups['lon_deg']) \
+                + float(groups.get('lon_min', 0.0)) / 60.0 \
+                + float(groups.get('lon_sec', 0.0)) / 3600.0
+            if groups.get('ew') == 'W':
+                x = -x
+            y = float(groups['lat_deg']) \
+                + float(groups.get('lat_min', 0.0)) / 60.0 \
+                + float(groups.get('lat_sec', 0.0)) / 3600.0
+            if groups.get('ns') == 'S':
+                y = -y
+            return groups['pre'] or groups['post'] or '', x, y
+
+    return query, None, None
+
+
+CATEGORY_REGEX = re.compile(r'(?P<pre>.*?)\[(?P<cls>[a-zA-Z_]+)=(?P<typ>[a-zA-Z_]+)\](?P<post>.*)')
+
+def extract_category_from_query(query: str) -> Tuple[str, Optional[str], Optional[str]]:
+    """ Extract a hidden category specification of the form '[key=value]' from
+        the query. If found, extract key and value  and
+        return the remaining query (or the empty string if the query
+        consisted of nothing but a category).
+
+        Only the first match will be returned.
+    """
+    match = CATEGORY_REGEX.search(query)
+    if match is not None:
+        return (match.group('pre').strip() + ' ' + match.group('post').strip()).strip(), \
+               match.group('cls'), match.group('typ')
+
+    return query, None, None