From 0b7bde25002713a0ba8b1f0be5dc7778f8cf860c Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 10 Sep 2025 10:24:20 +0200 Subject: [PATCH 1/6] introduce parameter for saving query statistics --- src/nominatim_api/core.py | 99 +++++++++++++++++++++----------------- src/nominatim_api/types.py | 51 +++++++++++++++++++- 2 files changed, 105 insertions(+), 45 deletions(-) diff --git a/src/nominatim_api/core.py b/src/nominatim_api/core.py index 769eb1d1..376b5016 100644 --- a/src/nominatim_api/core.py +++ b/src/nominatim_api/core.py @@ -217,11 +217,13 @@ class NominatimAPIAsync: """ timeout = Timeout(self.request_timeout) details = ntyp.LookupDetails.from_kwargs(params) - async with self.begin(abs_timeout=timeout.abs) as conn: - conn.set_query_timeout(self.query_timeout) - if details.keywords: - await nsearch.make_query_analyzer(conn) - return await get_detailed_place(conn, place, details) + with details.query_stats as qs: + async with self.begin(abs_timeout=timeout.abs) as conn: + qs.log_time('start_query') + conn.set_query_timeout(self.query_timeout) + if details.keywords: + await nsearch.make_query_analyzer(conn) + return await get_detailed_place(conn, place, details) async def lookup(self, places: Sequence[ntyp.PlaceRef], **params: Any) -> SearchResults: """ Get simple information about a list of places. @@ -230,11 +232,13 @@ class NominatimAPIAsync: """ timeout = Timeout(self.request_timeout) details = ntyp.LookupDetails.from_kwargs(params) - async with self.begin(abs_timeout=timeout.abs) as conn: - conn.set_query_timeout(self.query_timeout) - if details.keywords: - await nsearch.make_query_analyzer(conn) - return await get_places(conn, places, details) + with details.query_stats as qs: + async with self.begin(abs_timeout=timeout.abs) as conn: + qs.log_time('start_query') + conn.set_query_timeout(self.query_timeout) + if details.keywords: + await nsearch.make_query_analyzer(conn) + return await get_places(conn, places, details) async def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]: """ Find a place by its coordinates. Also known as reverse geocoding. @@ -249,28 +253,32 @@ class NominatimAPIAsync: timeout = Timeout(self.request_timeout) details = ntyp.ReverseDetails.from_kwargs(params) - async with self.begin(abs_timeout=timeout.abs) as conn: - conn.set_query_timeout(self.query_timeout) - if details.keywords: - await nsearch.make_query_analyzer(conn) - geocoder = ReverseGeocoder(conn, details, - self.reverse_restrict_to_country_area) - return await geocoder.lookup(coord) + with details.query_stats as qs: + async with self.begin(abs_timeout=timeout.abs) as conn: + qs.log_time('start_query') + conn.set_query_timeout(self.query_timeout) + if details.keywords: + await nsearch.make_query_analyzer(conn) + geocoder = ReverseGeocoder(conn, details, + self.reverse_restrict_to_country_area) + return await geocoder.lookup(coord) async def search(self, query: str, **params: Any) -> SearchResults: """ Find a place by free-text search. Also known as forward geocoding. """ - query = query.strip() - if not query: - raise UsageError('Nothing to search for.') - timeout = Timeout(self.request_timeout) - async with self.begin(abs_timeout=timeout.abs) as conn: - conn.set_query_timeout(self.query_timeout) - geocoder = nsearch.ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params), - timeout) - phrases = [nsearch.Phrase(nsearch.PHRASE_ANY, p.strip()) for p in query.split(',')] - return await geocoder.lookup(phrases) + details = ntyp.SearchDetails.from_kwargs(params) + with details.query_stats as qs: + query = query.strip() + if not query: + raise UsageError('Nothing to search for.') + + async with self.begin(abs_timeout=timeout.abs) as conn: + qs.log_time('start_query') + conn.set_query_timeout(self.query_timeout) + geocoder = nsearch.ForwardGeocoder(conn, details, timeout) + phrases = [nsearch.Phrase(nsearch.PHRASE_ANY, p.strip()) for p in query.split(',')] + return await geocoder.lookup(phrases) async def search_address(self, amenity: Optional[str] = None, street: Optional[str] = None, @@ -283,10 +291,8 @@ class NominatimAPIAsync: """ Find an address using structured search. """ timeout = Timeout(self.request_timeout) - async with self.begin(abs_timeout=timeout.abs) as conn: - conn.set_query_timeout(self.query_timeout) - details = ntyp.SearchDetails.from_kwargs(params) - + details = ntyp.SearchDetails.from_kwargs(params) + with details.query_stats as qs: phrases: List[nsearch.Phrase] = [] if amenity: @@ -325,6 +331,9 @@ class NominatimAPIAsync: if amenity: details.layers |= ntyp.DataLayer.POI + async with self.begin(abs_timeout=timeout.abs) as conn: + qs.log_time('start_query') + conn.set_query_timeout(self.query_timeout) geocoder = nsearch.ForwardGeocoder(conn, details, timeout) return await geocoder.lookup(phrases) @@ -335,22 +344,24 @@ class NominatimAPIAsync: The near place may either be given as an unstructured search query in itself or as coordinates. """ - if not categories: - return SearchResults() - timeout = Timeout(self.request_timeout) details = ntyp.SearchDetails.from_kwargs(params) - async with self.begin(abs_timeout=timeout.abs) as conn: - conn.set_query_timeout(self.query_timeout) - if near_query: - phrases = [nsearch.Phrase(nsearch.PHRASE_ANY, p) for p in near_query.split(',')] - else: - phrases = [] - if details.keywords: - await nsearch.make_query_analyzer(conn) + with details.query_stats as qs: + if not categories: + return SearchResults() - geocoder = nsearch.ForwardGeocoder(conn, details, timeout) - return await geocoder.lookup_pois(categories, phrases) + async with self.begin(abs_timeout=timeout.abs) as conn: + qs.log_time('start_query') + conn.set_query_timeout(self.query_timeout) + if near_query: + phrases = [nsearch.Phrase(nsearch.PHRASE_ANY, p) for p in near_query.split(',')] + else: + phrases = [] + if details.keywords: + await nsearch.make_query_analyzer(conn) + + geocoder = nsearch.ForwardGeocoder(conn, details, timeout) + return await geocoder.lookup_pois(categories, phrases) class NominatimAPI: diff --git a/src/nominatim_api/types.py b/src/nominatim_api/types.py index 38f6ed9d..98ec571a 100644 --- a/src/nominatim_api/types.py +++ b/src/nominatim_api/types.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Complex datatypes used by the Nominatim API. @@ -11,6 +11,7 @@ from typing import Optional, Union, Tuple, NamedTuple, TypeVar, Type, Dict, \ Any, List, Sequence from collections import abc import dataclasses +import datetime as dt import enum import math from struct import unpack @@ -334,6 +335,49 @@ class DataLayer(enum.Flag): """ +class QueryStatistics(dict[str, Any]): + """ A specialised dictionary for collecting query statistics. + """ + + def __enter__(self) -> 'QueryStatistics': + self.log_time('start_function') + return self + + def __exit__(self, *_: Any) -> None: + self.log_time('end_function') + self['total_time'] = (self['end_function'] - self['start_function']) \ + / dt.timedelta(microseconds=1) + if 'start_query' in self: + self['wait_time'] = (self['start_query'] - self['start_function']) \ + / dt.timedelta(microseconds=1) + else: + self['wait_time'] = 0 + self['query_time'] = self['total_time'] - self['wait_time'] + + def __missing__(self, key: str) -> str: + return '' + + def log_time(self, key: str) -> None: + self[key] = dt.datetime.now(tz=dt.timezone.utc) + + +class NoQueryStats: + """ Null object to use, when no query statistics are requested. + """ + + def __enter__(self) -> 'NoQueryStats': + return self + + def __exit__(self, *_: Any) -> None: + pass + + def __setitem__(self, key: str, value: Any) -> None: + pass + + def log_time(self, key: str) -> None: + pass + + def format_country(cc: Any) -> List[str]: """ Extract a list of country codes from the input which may be either a string or list of strings. Filters out all values that are not @@ -412,6 +456,11 @@ class LookupDetails: 0.0 means the original geometry is kept. The higher the value, the more the geometry gets simplified. """ + query_stats: Union[QueryStatistics, NoQueryStats] = \ + dataclasses.field(default_factory=NoQueryStats) + """ Optional QueryStatistics object collecting information about + runtime behaviour of the call. + """ @classmethod def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam: From 177b16b89b214a74626d66174632a3bbbcf9fc65 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 10 Sep 2025 11:52:06 +0200 Subject: [PATCH 2/6] use new QueryStatistics in API server --- src/nominatim_api/server/asgi_adaptor.py | 9 +++++- src/nominatim_api/server/falcon/server.py | 34 ++++++++++++-------- src/nominatim_api/server/starlette/server.py | 30 ++++++++++------- src/nominatim_api/types.py | 13 ++++---- src/nominatim_api/v1/server_glue.py | 6 +++- test/python/api/fake_adaptor.py | 3 ++ 6 files changed, 61 insertions(+), 34 deletions(-) diff --git a/src/nominatim_api/server/asgi_adaptor.py b/src/nominatim_api/server/asgi_adaptor.py index 77c50f31..a2ded018 100644 --- a/src/nominatim_api/server/asgi_adaptor.py +++ b/src/nominatim_api/server/asgi_adaptor.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Base abstraction for implementing based on different ASGI frameworks. @@ -13,6 +13,7 @@ import math from ..config import Configuration from ..core import NominatimAPIAsync +from ..types import QueryStatistics from ..result_formatting import FormatDispatcher from .content_types import CONTENT_TEXT @@ -68,6 +69,12 @@ class ASGIAdaptor(abc.ABC): """ Return the formatting object to use. """ + @abc.abstractmethod + def query_stats(self) -> Optional[QueryStatistics]: + """ Return the object for saving query statistics or None if + no statistics are required. + """ + def get_int(self, name: str, default: Optional[int] = None) -> int: """ Return an input parameter as an int. Raises an exception if the parameter is given but not in an integer format. diff --git a/src/nominatim_api/server/falcon/server.py b/src/nominatim_api/server/falcon/server.py index c16d085b..df2b3379 100644 --- a/src/nominatim_api/server/falcon/server.py +++ b/src/nominatim_api/server/falcon/server.py @@ -2,20 +2,21 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Server implementation using the falcon webserver framework. """ -from typing import Optional, Mapping, Any, List +from typing import Optional, Mapping, Any, List, cast from pathlib import Path -import datetime as dt import asyncio +import datetime as dt from falcon.asgi import App, Request, Response from ...config import Configuration from ...core import NominatimAPIAsync +from ...types import QueryStatistics from ... import v1 as api_impl from ...result_formatting import FormatDispatcher, load_format_dispatcher from ... import logging as loglib @@ -95,6 +96,9 @@ class ParamWrapper(ASGIAdaptor): def formatting(self) -> FormatDispatcher: return self._formatter + def query_stats(self) -> Optional[QueryStatistics]: + return cast(Optional[QueryStatistics], getattr(self.request.context, 'query_stats', None)) + class EndpointWrapper: """ Converter for server glue endpoint functions to Falcon request handlers. @@ -124,7 +128,7 @@ class FileLoggingMiddleware: async def process_request(self, req: Request, _: Response) -> None: """ Callback before the request starts timing. """ - req.context.start = dt.datetime.now(tz=dt.timezone.utc) + req.context.query_stats = QueryStatistics() async def process_response(self, req: Request, resp: Response, resource: Optional[EndpointWrapper], @@ -132,19 +136,23 @@ class FileLoggingMiddleware: """ Callback after requests writes to the logfile. It only writes logs for successful requests for search, reverse and lookup. """ - if not req_succeeded or resource is None or resp.status != 200\ + qs = req.context.query_stats + + if not req_succeeded or 'start' not in qs\ + or resource is None or resp.status != 200\ or resource.name not in ('reverse', 'search', 'lookup', 'details'): return - finish = dt.datetime.now(tz=dt.timezone.utc) - duration = (finish - req.context.start).total_seconds() - params = req.scope['query_string'].decode('utf8') - start = req.context.start.replace(tzinfo=None)\ - .isoformat(sep=' ', timespec='milliseconds') + qs['endpoint'] = resource.name + qs['query_string'] = req.scope['query_string'].decode('utf8') + qs['results_total'] = getattr(resp.context, 'num_results', 0) + for param in ('start', 'end', 'start_query'): + if isinstance(qs.get(param), dt.datetime): + qs[param] = qs[param].replace(tzinfo=None)\ + .isoformat(sep=' ', timespec='milliseconds') - self.fd.write(f"[{start}] " - f"{duration:.4f} {getattr(resp.context, 'num_results', 0)} " - f'{resource.name} "{params}"\n') + self.fd.write(("[{start}] {total_time:.4f} {results_total} " + '{endpoint} "{query_string}"\n').format_map(qs)) class APIMiddleware: diff --git a/src/nominatim_api/server/starlette/server.py b/src/nominatim_api/server/starlette/server.py index afaf5732..15c5dd92 100644 --- a/src/nominatim_api/server/starlette/server.py +++ b/src/nominatim_api/server/starlette/server.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Server implementation using the starlette webserver framework. @@ -10,9 +10,9 @@ Server implementation using the starlette webserver framework. from typing import Any, Optional, Mapping, Callable, cast, Coroutine, Dict, \ Awaitable, AsyncIterator from pathlib import Path -import datetime as dt import asyncio import contextlib +import datetime as dt from starlette.applications import Starlette from starlette.routing import Route @@ -25,6 +25,7 @@ from starlette.middleware.cors import CORSMiddleware from ...config import Configuration from ...core import NominatimAPIAsync +from ...types import QueryStatistics from ... import v1 as api_impl from ...result_formatting import FormatDispatcher, load_format_dispatcher from ..asgi_adaptor import ASGIAdaptor, EndpointFunc @@ -70,6 +71,9 @@ class ParamWrapper(ASGIAdaptor): def formatting(self) -> FormatDispatcher: return cast(FormatDispatcher, self.request.app.state.formatter) + def query_stats(self) -> Optional[QueryStatistics]: + return cast(Optional[QueryStatistics], getattr(self.request.state, 'query_stats', None)) + def _wrap_endpoint(func: EndpointFunc)\ -> Callable[[Request], Coroutine[Any, Any, Response]]: @@ -89,27 +93,29 @@ class FileLoggingMiddleware(BaseHTTPMiddleware): async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response: - start = dt.datetime.now(tz=dt.timezone.utc) + qs = QueryStatistics() + request.state.query_stats = qs response = await call_next(request) - if response.status_code != 200: + if response.status_code != 200 or 'start' not in qs: return response - finish = dt.datetime.now(tz=dt.timezone.utc) - for endpoint in ('reverse', 'search', 'lookup', 'details'): if request.url.path.startswith('/' + endpoint): - qtype = endpoint + qs['endpoint'] = endpoint break else: return response - duration = (finish - start).total_seconds() - params = request.scope['query_string'].decode('utf8') + qs['query_string'] = request.scope['query_string'].decode('utf8') + qs['results_total'] = getattr(request.state, 'num_results', 0) + for param in ('start', 'end', 'start_query'): + if isinstance(qs.get(param), dt.datetime): + qs[param] = qs[param].replace(tzinfo=None)\ + .isoformat(sep=' ', timespec='milliseconds') - self.fd.write(f"[{start.replace(tzinfo=None).isoformat(sep=' ', timespec='milliseconds')}] " - f"{duration:.4f} {getattr(request.state, 'num_results', 0)} " - f'{qtype} "{params}"\n') + self.fd.write(("[{start}] {total_time:.4f} {results_total} " + '{endpoint} "{query_string}"\n').format_map(qs)) return response diff --git a/src/nominatim_api/types.py b/src/nominatim_api/types.py index 98ec571a..f2e4c69e 100644 --- a/src/nominatim_api/types.py +++ b/src/nominatim_api/types.py @@ -340,18 +340,17 @@ class QueryStatistics(dict[str, Any]): """ def __enter__(self) -> 'QueryStatistics': - self.log_time('start_function') + self.log_time('start') return self def __exit__(self, *_: Any) -> None: - self.log_time('end_function') - self['total_time'] = (self['end_function'] - self['start_function']) \ - / dt.timedelta(microseconds=1) + self.log_time('end') + self['total_time'] = (self['end'] - self['start']).total_seconds() if 'start_query' in self: - self['wait_time'] = (self['start_query'] - self['start_function']) \ - / dt.timedelta(microseconds=1) + self['wait_time'] = (self['start_query'] - self['start']).total_seconds() else: - self['wait_time'] = 0 + self['wait_time'] = self['total_time'] + self['start_query'] = self['end'] self['query_time'] = self['total_time'] - self['wait_time'] def __missing__(self, key: str) -> str: diff --git a/src/nominatim_api/v1/server_glue.py b/src/nominatim_api/v1/server_glue.py index a3a29199..871358ef 100644 --- a/src/nominatim_api/v1/server_glue.py +++ b/src/nominatim_api/v1/server_glue.py @@ -2,7 +2,7 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2024 by the Nominatim developer community. +# Copyright (C) 2025 by the Nominatim developer community. # For a full list of authors see the git log. """ Generic part of the server implementation of the v1 API. @@ -165,6 +165,7 @@ async def details_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: geometry_output=(GeometryFormat.GEOJSON if params.get_bool('polygon_geojson', False) else GeometryFormat.NONE), + query_stats=params.query_stats() ) if debug: @@ -197,6 +198,7 @@ async def reverse_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: details = parse_geometry_details(params, fmt) details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18)) details['layers'] = get_layers(params) + details['query_stats'] = params.query_stats() result = await api.reverse(coord, **details) @@ -234,6 +236,7 @@ async def lookup_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: fmt = parse_format(params, SearchResults, 'xml') debug = setup_debugging(params) details = parse_geometry_details(params, fmt) + details['query_stats'] = params.query_stats() places = [] for oid in (params.get('osm_ids') or '').split(','): @@ -302,6 +305,7 @@ async def search_endpoint(api: NominatimAPIAsync, params: ASGIAdaptor) -> Any: debug = setup_debugging(params) details = parse_geometry_details(params, fmt) + details['query_stats'] = params.query_stats() details['countries'] = params.get('countrycodes', None) details['entrances'] = params.get_bool('entrances', False) details['excluded'] = params.get('exclude_place_ids', None) diff --git a/test/python/api/fake_adaptor.py b/test/python/api/fake_adaptor.py index a3a3bcf9..01050037 100644 --- a/test/python/api/fake_adaptor.py +++ b/test/python/api/fake_adaptor.py @@ -54,3 +54,6 @@ class FakeAdaptor(glue.ASGIAdaptor): def formatting(self): return formatting + + def query_stats(self): + return None From 3d0867ff1690ca570cda1648cf9447a4d3d473ee Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 10 Sep 2025 20:11:46 +0200 Subject: [PATCH 3/6] make log output configurable --- docs/customize/Settings.md | 34 +++++++++++++++----- docs/extra.css | 6 ++++ mkdocs.yml | 1 + settings/env.defaults | 7 ++++ src/nominatim_api/server/falcon/server.py | 8 ++--- src/nominatim_api/server/starlette/server.py | 9 +++--- 6 files changed, 49 insertions(+), 16 deletions(-) diff --git a/docs/customize/Settings.md b/docs/customize/Settings.md index 5be1035c..fb8e6aaf 100644 --- a/docs/customize/Settings.md +++ b/docs/customize/Settings.md @@ -648,21 +648,39 @@ See also [NOMINATIM_DEFAULT_LANGUAGE](#nominatim_default_language). | **Description:** | Log requests into a file | | **Format:** | path | | **Default:** | _empty_ (logging disabled) | -| **After Changes:** | run `nominatim refresh --website` | Enable logging of requests into a file with this setting by setting the log file where to log to. A relative file name is assumed to be relative to -the project directory. +the project directory. The format of the log output can be set +with NOMINATIM_LOG_FORMAT. +#### NOMINATIM_LOG_FORMAT -The entries in the log file have the following format: +| Summary | | +| -------------- | --------------------------------------------------- | +| **Description:** | Log requests into a file | +| **Format:** | [Python String Format](https://docs.python.org/3/library/string.html#formatstrings) string | +| **Default:** | `[{start}] {total_time:.4f} {results_total} {endpoint} "{query_string}"` | - "" +Describes the content of a log line for a single request. The format +must be readable by Python's format function. Nominatim provides a number +of metrics than can be logged. The default set of metrics is the following: -Request time is the time when the request was started. The execution time is -given in seconds and includes the entire time the query was queued and executed -in the frontend. -type contains the name of the endpoint used. +/// html | div.simple-table +| name | type | Description | +| --------------- | ------ | ------------| +| start | time | Point in time when the request arrived. | +| end | time | Point in time when the request was done. | +| query_start | time | Point in time when processing started. | +| total_time | float | Total time in seconds to handle the request. | +| wait_time | float | Time in seconds the request waited for a database connection to be available. | +| query_time | float | Total time in seconds to process the request once a connection was available. | +| results_total | int | Number of results found. | +| endpoint | string | API endpoint used. | +| query_string | string | Raw query string received. | +/// + +Variables of type 'time' contain a UTC timestamp string in ISO format. #### NOMINATIM_DEBUG_SQL diff --git a/docs/extra.css b/docs/extra.css index 033e9903..fe7bec48 100644 --- a/docs/extra.css +++ b/docs/extra.css @@ -39,3 +39,9 @@ th { filter: grayscale(100%); font-size: 80%; } + +.simple-table table:not([class]) th, +.simple-table table:not([class]) td { + padding: 2px 4px; + background: white; +} diff --git a/mkdocs.yml b/mkdocs.yml index 6a24e816..3fd9cd5a 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -67,6 +67,7 @@ markdown_extensions: - codehilite - admonition - pymdownx.superfences + - pymdownx.blocks.html - pymdownx.tabbed: alternate_style: true - def_list diff --git a/settings/env.defaults b/settings/env.defaults index 89dfef84..827e3f2a 100644 --- a/settings/env.defaults +++ b/settings/env.defaults @@ -208,6 +208,13 @@ NOMINATIM_OUTPUT_NAMES=name:XX,name,brand,official_name:XX,short_name:XX,officia # To enable logging set this setting to the file to log to. NOMINATIM_LOG_FILE= +# Set the output format of the query log. +# This is a string following the Python String Format syntax, +# see https://docs.python.org/3/library/string.html#formatstrings. +# For possible replacement values, see the full documentation at +# https://nominatim.org/release-docs/latest/customize/Settings/ +NOMINATIM_LOG_FORMAT='[{start}] {total_time:.4f} {results_total} {endpoint} "{query_string}"' + # Echo raw SQL from SQLAlchemy statements. # EXPERT: Works only in command line/library use. NOMINATIM_DEBUG_SQL=no diff --git a/src/nominatim_api/server/falcon/server.py b/src/nominatim_api/server/falcon/server.py index df2b3379..6eb0ecd0 100644 --- a/src/nominatim_api/server/falcon/server.py +++ b/src/nominatim_api/server/falcon/server.py @@ -122,7 +122,8 @@ class FileLoggingMiddleware: """ Middleware to log selected requests into a file. """ - def __init__(self, file_name: str): + def __init__(self, file_name: str, logstr: str): + self.logstr = logstr + '\n' self.fd = open(file_name, 'a', buffering=1, encoding='utf8') async def process_request(self, req: Request, _: Response) -> None: @@ -151,8 +152,7 @@ class FileLoggingMiddleware: qs[param] = qs[param].replace(tzinfo=None)\ .isoformat(sep=' ', timespec='milliseconds') - self.fd.write(("[{start}] {total_time:.4f} {results_total} " - '{endpoint} "{query_string}"\n').format_map(qs)) + self.fd.write(self.logstr.format_map(qs)) class APIMiddleware: @@ -201,7 +201,7 @@ def get_application(project_dir: Path, middleware: List[Any] = [apimw] log_file = apimw.config.LOG_FILE if log_file: - middleware.append(FileLoggingMiddleware(log_file)) + middleware.append(FileLoggingMiddleware(log_file, apimw.config.LOG_FORMAT)) app = App(cors_enable=apimw.config.get_bool('CORS_NOACCESSCONTROL'), middleware=middleware) diff --git a/src/nominatim_api/server/starlette/server.py b/src/nominatim_api/server/starlette/server.py index 15c5dd92..c59a9a2c 100644 --- a/src/nominatim_api/server/starlette/server.py +++ b/src/nominatim_api/server/starlette/server.py @@ -87,9 +87,10 @@ class FileLoggingMiddleware(BaseHTTPMiddleware): """ Middleware to log selected requests into a file. """ - def __init__(self, app: Starlette, file_name: str = ''): + def __init__(self, app: Starlette, file_name: str = '', logstr: str = ''): super().__init__(app) self.fd = open(file_name, 'a', buffering=1, encoding='utf8') + self.logstr = logstr + '\n' async def dispatch(self, request: Request, call_next: RequestResponseEndpoint) -> Response: @@ -114,8 +115,7 @@ class FileLoggingMiddleware(BaseHTTPMiddleware): qs[param] = qs[param].replace(tzinfo=None)\ .isoformat(sep=' ', timespec='milliseconds') - self.fd.write(("[{start}] {total_time:.4f} {results_total} " - '{endpoint} "{query_string}"\n').format_map(qs)) + self.fd.write(self.logstr.format_map(qs)) return response @@ -149,7 +149,8 @@ def get_application(project_dir: Path, log_file = config.LOG_FILE if log_file: - middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file)) # type: ignore + middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file, # type: ignore + logstr=config.LOG_FORMAT)) exceptions: Dict[Any, Callable[[Request, Exception], Awaitable[Response]]] = { TimeoutError: timeout_error, From fd12d2e9f327be0247406b86d660ad70d06eec62 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 10 Sep 2025 20:49:46 +0200 Subject: [PATCH 4/6] add additional stats for search queries --- docs/customize/Settings.md | 14 ++++++++++++++ src/nominatim_api/search/geocoder.py | 10 ++++++++++ src/nominatim_api/types.py | 6 ++++++ 3 files changed, 30 insertions(+) diff --git a/docs/customize/Settings.md b/docs/customize/Settings.md index fb8e6aaf..9bd44f59 100644 --- a/docs/customize/Settings.md +++ b/docs/customize/Settings.md @@ -682,6 +682,20 @@ of metrics than can be logged. The default set of metrics is the following: Variables of type 'time' contain a UTC timestamp string in ISO format. +Nominatim also exposes additional metrics to help with development. These +are subject to change between versions: + +/// html | div.simple-table +| name | type | Description | +| ------------------------- | ------ | ------------| +| search_rounds | int | Total number of searches executed for the request. | +| search_min_penalty | float | Minimal possible penalty for the request. | +| search_first_result_round | int | Number of first search to yield any result. | +| search_min_result_penalty | float | Minimal penalty by a result found. | +| search_best_penalty_round | int | Search round that yielded the best penalty result. | +/// + + #### NOMINATIM_DEBUG_SQL | Summary | | diff --git a/src/nominatim_api/search/geocoder.py b/src/nominatim_api/search/geocoder.py index b05888be..738fc270 100644 --- a/src/nominatim_api/search/geocoder.py +++ b/src/nominatim_api/search/geocoder.py @@ -77,7 +77,9 @@ class ForwardGeocoder: """ log().section('Execute database searches') results: Dict[Any, SearchResult] = {} + qs = self.params.query_stats + qs['search_min_penalty'] = round(searches[0].penalty, 2) min_ranking = searches[0].penalty + 2.0 prev_penalty = 0.0 for i, search in enumerate(searches): @@ -93,6 +95,13 @@ class ForwardGeocoder: if prevresult: prevresult.accuracy = min(prevresult.accuracy, result.accuracy) else: + if not results: + qs['search_first_result_round'] = i + spenalty = round(search.penalty, 2) + if 'search_min_result_penalty' not in qs or \ + spenalty < qs['search_min_result_penalty']: + qs['search_min_result_penalty'] = spenalty + qs['search_best_penalty_round'] = i results[rhash] = result min_ranking = min(min_ranking, result.accuracy * 1.2, 2.0) log().result_dump('Results', ((r.accuracy, r) for r in lookup_results)) @@ -100,6 +109,7 @@ class ForwardGeocoder: if self.timeout.is_elapsed(): break + qs['search_rounds'] = i return SearchResults(results.values()) def pre_filter_results(self, results: SearchResults) -> SearchResults: diff --git a/src/nominatim_api/types.py b/src/nominatim_api/types.py index f2e4c69e..92c2b6b9 100644 --- a/src/nominatim_api/types.py +++ b/src/nominatim_api/types.py @@ -373,6 +373,12 @@ class NoQueryStats: def __setitem__(self, key: str, value: Any) -> None: pass + def __getitem__(self, key: str) -> Any: + return None + + def __contains__(self, key: str, default: Any = None) -> bool: + return False + def log_time(self, key: str) -> None: pass From 5a2bfd7a1980a80c3cfe3409480adeb461ad2276 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 10 Sep 2025 21:38:09 +0200 Subject: [PATCH 5/6] add documentation for library API --- src/nominatim_api/core.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/nominatim_api/core.py b/src/nominatim_api/core.py index 376b5016..0f2bcbd2 100644 --- a/src/nominatim_api/core.py +++ b/src/nominatim_api/core.py @@ -458,6 +458,8 @@ class NominatimAPI: Only POI places can have parents. (Default: False) keywords (bool): Add detailed information about the search terms used for this place. + query_stats (QueryStatistics): When given collects statistics + about the query execution. Returns: source_table (enum): Data source of the place. See below for possible values. @@ -540,6 +542,8 @@ class NominatimAPI: Only POI places can have parents. (Default: False) keywords (bool): Add detailed information about the search terms used for this place. + query_stats (QueryStatistics): When given collects statistics + about the query execution. Returns: source_table (enum): Data source of the place. See below for possible values. @@ -620,6 +624,8 @@ class NominatimAPI: Only POI places can have parents. (Default: False) keywords (bool): Add detailed information about the search terms used for this place. + query_stats (QueryStatistics): When given collects statistics + about the query execution. Returns: source_table (enum): Data source of the place. See below for possible values. @@ -719,6 +725,8 @@ class NominatimAPI: Only POI places can have parents. (Default: False) keywords (bool): Add detailed information about the search terms used for this place. + query_stats (QueryStatistics): When given collects statistics + about the query execution. Returns: source_table (enum): Data source of the place. See below for possible values. @@ -835,6 +843,8 @@ class NominatimAPI: Only POI places can have parents. (Default: False) keywords (bool): Add detailed information about the search terms used for this place. + query_stats (QueryStatistics): When given collects statistics + about the query execution. Returns: source_table (enum): Data source of the place. See below for possible values. @@ -942,6 +952,8 @@ class NominatimAPI: Only POI places can have parents. (Default: False) keywords (bool): Add detailed information about the search terms used for this place. + query_stats (QueryStatistics): When given collects statistics + about the query execution. Returns: source_table (enum): Data source of the place. See below for possible values. From 45a44f1411be0b6912bcf71ea04dd76d4a97ace0 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 10 Sep 2025 21:40:39 +0200 Subject: [PATCH 6/6] export QueryStatistics type --- src/nominatim_api/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/nominatim_api/__init__.py b/src/nominatim_api/__init__.py index 89c57b65..bdef4258 100644 --- a/src/nominatim_api/__init__.py +++ b/src/nominatim_api/__init__.py @@ -24,7 +24,8 @@ from .types import (PlaceID as PlaceID, Point as Point, Bbox as Bbox, GeometryFormat as GeometryFormat, - DataLayer as DataLayer) + DataLayer as DataLayer, + QueryStatistics as QueryStatistics) from .results import (SourceTable as SourceTable, AddressLine as AddressLine, AddressLines as AddressLines,