Merge branch 'osm-search:master' into check-database-on-frozen-database

This commit is contained in:
mtmail
2023-06-22 12:14:55 +02:00
committed by GitHub
29 changed files with 191 additions and 160 deletions

View File

@@ -178,7 +178,7 @@ class HTMLLogger(BaseLogger):
self._write(f"rank={res.rank_address}, ")
self._write(f"osm={format_osm(res.osm_object)}, ")
self._write(f'cc={res.country_code}, ')
self._write(f'importance={res.importance or -1:.5f})</dd>')
self._write(f'importance={res.importance or float("nan"):.5f})</dd>')
total += 1
self._write(f'</dl><b>TOTAL:</b> {total}</p>')
@@ -196,7 +196,7 @@ class HTMLLogger(BaseLogger):
def _python_var(self, var: Any) -> str:
if CODE_HIGHLIGHT:
fmt = highlight(repr(var), PythonLexer(), HtmlFormatter(nowrap=True))
fmt = highlight(str(var), PythonLexer(), HtmlFormatter(nowrap=True))
return f'<div class="highlight"><code class="lang-python">{fmt}</code></div>'
return f'<code class="lang-python">{str(var)}</code>'

View File

@@ -141,12 +141,14 @@ class SearchBuilder:
yield dbs.CountrySearch(sdata)
if sdata.postcodes and (is_category or self.configured_for_postcode):
penalty = 0.0 if sdata.countries else 0.1
if address:
sdata.lookups = [dbf.FieldLookup('nameaddress_vector',
[t.token for r in address
for t in self.query.get_partials_list(r)],
'restrict')]
yield dbs.PostcodeSearch(0.4, sdata)
penalty += 0.2
yield dbs.PostcodeSearch(penalty, sdata)
def build_housenumber_search(self, sdata: dbf.SearchData, hnrs: List[Token],

View File

@@ -403,6 +403,12 @@ class CountrySearch(AbstractSearch):
details: SearchDetails) -> nres.SearchResults:
""" Look up the country in the fallback country tables.
"""
# Avoid the fallback search when this is a more search. Country results
# usually are in the first batch of results and it is not possible
# to exclude these fallbacks.
if details.excluded:
return nres.SearchResults()
t = conn.t.country_name
tgrid = conn.t.country_grid
@@ -562,6 +568,8 @@ class PlaceSearch(AbstractSearch):
sql = sql.where(tsearch.c.country_code.in_(self.countries.values))
if self.postcodes:
# if a postcode is given, don't search for state or country level objects
sql = sql.where(tsearch.c.address_rank > 9)
tpc = conn.t.postcode
if self.expected_count > 1000:
# Many results expected. Restrict by postcode.

View File

@@ -180,7 +180,7 @@ def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
return f'{c[0]}^{c[1]}'
for search in searches[start:]:
fields = ('name_lookups', 'name_ranking', 'countries', 'housenumbers',
fields = ('lookups', 'rankings', 'countries', 'housenumbers',
'postcodes', 'qualifier')
iters = itertools.zip_longest([f"{search.penalty:.3g}"],
*(getattr(search, attr, []) for attr in fields),

View File

@@ -153,7 +153,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
"""
log().section('Analyze query (using ICU tokenizer)')
normalized = list(filter(lambda p: p.text,
(qmod.Phrase(p.ptype, self.normalizer.transliterate(p.text))
(qmod.Phrase(p.ptype, self.normalize_text(p.text))
for p in phrases)))
query = qmod.QueryStruct(normalized)
log().var_dump('Normalized query', query.source)
@@ -187,6 +187,14 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
return query
def normalize_text(self, text: str) -> str:
""" Bring the given text into a normalized form. That is the
standardized form search will work with. All information removed
at this stage is inevitably lost.
"""
return cast(str, self.normalizer.transliterate(text))
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
""" Transliterate the phrases and split them into tokens.
@@ -248,12 +256,11 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
and (repl.ttype != qmod.TokenType.HOUSENUMBER
or len(tlist.tokens[0].lookup_word) > 4):
repl.add_penalty(0.39)
elif tlist.ttype == qmod.TokenType.HOUSENUMBER:
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
and len(tlist.tokens[0].lookup_word) <= 3:
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER \
and (repl.ttype != qmod.TokenType.HOUSENUMBER
or len(tlist.tokens[0].lookup_word) <= 3):
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
repl.add_penalty(0.5 - tlist.tokens[0].penalty)
elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL):
norm = parts[i].normalized

View File

@@ -233,12 +233,11 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
and (repl.ttype != qmod.TokenType.HOUSENUMBER
or len(tlist.tokens[0].lookup_word) > 4):
repl.add_penalty(0.39)
elif tlist.ttype == qmod.TokenType.HOUSENUMBER:
elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
and len(tlist.tokens[0].lookup_word) <= 3:
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
for repl in node.starting:
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER \
and (repl.ttype != qmod.TokenType.HOUSENUMBER
or len(tlist.tokens[0].lookup_word) <= 3):
if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
repl.add_penalty(0.5 - tlist.tokens[0].penalty)

View File

@@ -270,7 +270,12 @@ class _TokenSequence:
if (base.postcode.start == 0 and self.direction != -1)\
or (base.postcode.end == query.num_token_slots() and self.direction != 1):
log().comment('postcode search')
yield dataclasses.replace(base, penalty=self.penalty)
# <address>,<postcode> should give preference to address search
if base.postcode.start == 0:
penalty = self.penalty
else:
penalty = self.penalty + 0.1
yield dataclasses.replace(base, penalty=penalty)
# Postcode or country-only search
if not base.address:
@@ -278,6 +283,9 @@ class _TokenSequence:
log().comment('postcode/country search')
yield dataclasses.replace(base, penalty=self.penalty)
else:
# <postcode>,<address> should give preference to postcode search
if base.postcode and base.postcode.start == 0:
self.penalty += 0.1
# Use entire first word as name
if self.direction != -1:
log().comment('first word = name')

View File

@@ -302,10 +302,11 @@ def format_excluded(ids: Any) -> List[int]:
else:
raise UsageError("Parameter 'excluded' needs to be a comma-separated list "
"or a Python list of numbers.")
if not all(isinstance(i, int) or (isinstance(i, str) and i.isdigit()) for i in plist):
if not all(isinstance(i, int) or
(isinstance(i, str) and (not i or i.isdigit())) for i in plist):
raise UsageError("Parameter 'excluded' only takes place IDs.")
return [int(id) for id in plist if id]
return [int(id) for id in plist if id] or [0]
def format_categories(categories: List[Tuple[str, str]]) -> List[Tuple[str, str]]:

View File

@@ -62,13 +62,13 @@ def extend_query_parts(queryparts: Dict[str, Any], details: Dict[str, Any],
"""
parsed = SearchDetails.from_kwargs(details)
if parsed.geometry_output != GeometryFormat.NONE:
if parsed.geometry_output & GeometryFormat.GEOJSON:
if GeometryFormat.GEOJSON in parsed.geometry_output:
queryparts['polygon_geojson'] = '1'
if parsed.geometry_output & GeometryFormat.KML:
if GeometryFormat.KML in parsed.geometry_output:
queryparts['polygon_kml'] = '1'
if parsed.geometry_output & GeometryFormat.SVG:
if GeometryFormat.SVG in parsed.geometry_output:
queryparts['polygon_svg'] = '1'
if parsed.geometry_output & GeometryFormat.TEXT:
if GeometryFormat.TEXT in parsed.geometry_output:
queryparts['polygon_text'] = '1'
if parsed.address_details:
queryparts['addressdetails'] = '1'

View File

@@ -185,7 +185,7 @@ class ASGIAdaptor(abc.ABC):
""" Return the accepted languages.
"""
return self.get('accept-language')\
or self.get_header('http_accept_language')\
or self.get_header('accept-language')\
or self.config().DEFAULT_LANGUAGE

View File

@@ -215,7 +215,7 @@ class AdminServe:
group.add_argument('--server', default='127.0.0.1:8088',
help='The address the server will listen to.')
group.add_argument('--engine', default='php',
choices=('php', 'sanic', 'falcon', 'starlette'),
choices=('php', 'falcon', 'starlette'),
help='Webserver framework to run. (default: php)')
@@ -223,6 +223,7 @@ class AdminServe:
if args.engine == 'php':
run_php_server(args.server, args.project_dir / 'website')
else:
import uvicorn # pylint: disable=import-outside-toplevel
server_info = args.server.split(':', 1)
host = server_info[0]
if len(server_info) > 1:
@@ -232,21 +233,10 @@ class AdminServe:
else:
port = 8088
if args.engine == 'sanic':
server_module = importlib.import_module('nominatim.server.sanic.server')
server_module = importlib.import_module(f'nominatim.server.{args.engine}.server')
app = server_module.get_application(args.project_dir)
app.run(host=host, port=port, debug=True, single_process=True)
else:
import uvicorn # pylint: disable=import-outside-toplevel
if args.engine == 'falcon':
server_module = importlib.import_module('nominatim.server.falcon.server')
elif args.engine == 'starlette':
server_module = importlib.import_module('nominatim.server.starlette.server')
app = server_module.get_application(args.project_dir)
uvicorn.run(app, host=host, port=port)
app = server_module.get_application(args.project_dir)
uvicorn.run(app, host=host, port=port)
return 0

View File

@@ -1,78 +0,0 @@
# SPDX-License-Identifier: GPL-2.0-only
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Server implementation using the sanic webserver framework.
"""
from typing import Any, Optional, Mapping, Callable, cast, Coroutine
from pathlib import Path
from sanic import Request, HTTPResponse, Sanic
from sanic.exceptions import SanicException
from sanic.response import text as TextResponse
from nominatim.api import NominatimAPIAsync
import nominatim.api.v1 as api_impl
from nominatim.config import Configuration
class ParamWrapper(api_impl.ASGIAdaptor):
""" Adaptor class for server glue to Sanic framework.
"""
def __init__(self, request: Request) -> None:
self.request = request
def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
return cast(Optional[str], self.request.args.get(name, default))
def get_header(self, name: str, default: Optional[str] = None) -> Optional[str]:
return cast(Optional[str], self.request.headers.get(name, default))
def error(self, msg: str, status: int = 400) -> SanicException:
exception = SanicException(msg, status_code=status)
return exception
def create_response(self, status: int, output: str) -> HTTPResponse:
return TextResponse(output, status=status, content_type=self.content_type)
def config(self) -> Configuration:
return cast(Configuration, self.request.app.ctx.api.config)
def _wrap_endpoint(func: api_impl.EndpointFunc)\
-> Callable[[Request], Coroutine[Any, Any, HTTPResponse]]:
async def _callback(request: Request) -> HTTPResponse:
return cast(HTTPResponse, await func(request.app.ctx.api, ParamWrapper(request)))
return _callback
def get_application(project_dir: Path,
environ: Optional[Mapping[str, str]] = None) -> Sanic:
""" Create a Nominatim sanic ASGI application.
"""
app = Sanic("NominatimInstance")
app.ctx.api = NominatimAPIAsync(project_dir, environ)
if app.ctx.api.config.get_bool('CORS_NOACCESSCONTROL'):
from sanic_cors import CORS # pylint: disable=import-outside-toplevel
CORS(app)
legacy_urls = app.ctx.api.config.get_bool('SERVE_LEGACY_URLS')
for name, func in api_impl.ROUTES:
endpoint = _wrap_endpoint(func)
app.add_route(endpoint, f"/{name}", name=f"v1_{name}_simple")
if legacy_urls:
app.add_route(endpoint, f"/{name}.php", name=f"v1_{name}_legacy")
return app