mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
Compare commits
41 Commits
docs-5.2.x
...
58e56ec53d
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
58e56ec53d | ||
|
|
fe170c9286 | ||
|
|
0c5af2e3e4 | ||
|
|
681daeea29 | ||
|
|
49454048c4 | ||
|
|
4919240377 | ||
|
|
56cb183c4e | ||
|
|
35060164ab | ||
|
|
4cfc1792fb | ||
|
|
3bb5d00848 | ||
|
|
b366b9df6f | ||
|
|
6b12501c7a | ||
|
|
ffd5c32f17 | ||
|
|
6c8869439f | ||
|
|
8188946394 | ||
|
|
19134cc15c | ||
|
|
d0b9aac400 | ||
|
|
48d13c593b | ||
|
|
96d04e3a2e | ||
|
|
23db1ab981 | ||
|
|
cd1b1736a9 | ||
|
|
9447c90b09 | ||
|
|
81c6cb72e6 | ||
|
|
f2a122c5c0 | ||
|
|
57ef0e1f98 | ||
|
|
922667b650 | ||
|
|
fba803167c | ||
|
|
782df52ea0 | ||
|
|
c36da68a48 | ||
|
|
716de13bc9 | ||
|
|
1df56d7548 | ||
|
|
9cfef7a31a | ||
|
|
139678f367 | ||
|
|
e578c60ff4 | ||
|
|
7b4a3c8500 | ||
|
|
7751f9a6b6 | ||
|
|
303ac42b47 | ||
|
|
6a2d2daad5 | ||
|
|
a51c771107 | ||
|
|
55547723bf | ||
|
|
362088775f |
2
.github/actions/build-nominatim/action.yml
vendored
2
.github/actions/build-nominatim/action.yml
vendored
@@ -22,7 +22,7 @@ runs:
|
||||
|
||||
- name: Install prerequisites from apt
|
||||
run: |
|
||||
sudo apt-get install -y -qq python3-icu python3-datrie python3-jinja2 python3-psutil python3-dotenv python3-yaml python3-sqlalchemy python3-psycopg python3-asyncpg
|
||||
sudo apt-get install -y -qq python3-icu python3-datrie python3-jinja2 python3-psutil python3-dotenv python3-yaml python3-sqlalchemy python3-psycopg python3-asyncpg python3-mwparserfromhell
|
||||
shell: bash
|
||||
if: inputs.dependencies == 'apt'
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ Furthermore the following Python libraries are required:
|
||||
* [Jinja2](https://palletsprojects.com/p/jinja/)
|
||||
* [PyICU](https://pypi.org/project/PyICU/)
|
||||
* [PyYaml](https://pyyaml.org/) (5.1+)
|
||||
* [mwparserfromhell](https://github.com/earwig/mwparserfromhell/)
|
||||
|
||||
These will be installed automatically when using pip installation.
|
||||
|
||||
|
||||
@@ -73,7 +73,7 @@ virtualenv ~/nominatim-dev-venv
|
||||
types-jinja2 types-markupsafe types-psutil types-psycopg2 \
|
||||
types-pygments types-pyyaml types-requests types-ujson \
|
||||
types-urllib3 typing-extensions unicorn falcon starlette \
|
||||
uvicorn mypy osmium aiosqlite
|
||||
uvicorn mypy osmium aiosqlite mwparserfromhell
|
||||
```
|
||||
|
||||
Now enter the virtual environment whenever you want to develop:
|
||||
|
||||
@@ -52,6 +52,15 @@ To run the functional tests, do
|
||||
|
||||
pytest test/bdd
|
||||
|
||||
You can run a single feature file using expression matching:
|
||||
|
||||
pytest test/bdd -k osm2pgsql/import/entrances.feature
|
||||
|
||||
This even works for running single tests by adding the line number of the
|
||||
scenario header like that:
|
||||
|
||||
pytest test/bdd -k 'osm2pgsql/import/entrances.feature and L4'
|
||||
|
||||
The BDD tests create databases for the tests. You can set name of the databases
|
||||
through configuration variables in your `pytest.ini`:
|
||||
|
||||
|
||||
@@ -117,6 +117,7 @@ module.MAIN_TAGS.all_boundaries = {
|
||||
boundary = {'named',
|
||||
place = 'delete',
|
||||
land_area = 'delete',
|
||||
protected_area = 'fallback',
|
||||
postal_code = 'always'},
|
||||
landuse = 'fallback',
|
||||
place = 'always'
|
||||
@@ -198,7 +199,7 @@ module.MAIN_TAGS_POIS = function (group)
|
||||
no = group},
|
||||
landuse = {cemetery = 'always'},
|
||||
leisure = {'always',
|
||||
nature_reserve = 'fallback',
|
||||
nature_reserve = 'named',
|
||||
swimming_pool = 'named',
|
||||
garden = 'named',
|
||||
common = 'named',
|
||||
@@ -321,7 +322,6 @@ module.NAME_TAGS = {}
|
||||
|
||||
module.NAME_TAGS.core = {main = {'name', 'name:*',
|
||||
'int_name', 'int_name:*',
|
||||
'nat_name', 'nat_name:*',
|
||||
'reg_name', 'reg_name:*',
|
||||
'loc_name', 'loc_name:*',
|
||||
'old_name', 'old_name:*',
|
||||
|
||||
@@ -341,6 +341,22 @@ BEGIN
|
||||
END IF;
|
||||
END IF;
|
||||
|
||||
IF bnd.extratags ? 'wikidata' THEN
|
||||
FOR linked_placex IN
|
||||
SELECT * FROM placex
|
||||
WHERE placex.class = 'place' AND placex.osm_type = 'N'
|
||||
AND placex.extratags ? 'wikidata' -- needed to select right index
|
||||
AND placex.extratags->'wikidata' = bnd.extratags->'wikidata'
|
||||
AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id)
|
||||
AND placex.rank_search < 26
|
||||
AND _st_covers(bnd.geometry, placex.geometry)
|
||||
ORDER BY lower(name->'name') = bnd_name desc
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
|
||||
RETURN linked_placex;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- If extratags has a place tag, look for linked nodes by their place type.
|
||||
-- Area and node still have to have the same name.
|
||||
IF bnd.extratags ? 'place' and bnd.extratags->'place' != 'postcode'
|
||||
@@ -361,22 +377,6 @@ BEGIN
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
IF bnd.extratags ? 'wikidata' THEN
|
||||
FOR linked_placex IN
|
||||
SELECT * FROM placex
|
||||
WHERE placex.class = 'place' AND placex.osm_type = 'N'
|
||||
AND placex.extratags ? 'wikidata' -- needed to select right index
|
||||
AND placex.extratags->'wikidata' = bnd.extratags->'wikidata'
|
||||
AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id)
|
||||
AND placex.rank_search < 26
|
||||
AND _st_covers(bnd.geometry, placex.geometry)
|
||||
ORDER BY lower(name->'name') = bnd_name desc
|
||||
LOOP
|
||||
{% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
|
||||
RETURN linked_placex;
|
||||
END LOOP;
|
||||
END IF;
|
||||
|
||||
-- Name searches can be done for ways as well as relations
|
||||
IF bnd_name is not null THEN
|
||||
{% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %}
|
||||
@@ -874,7 +874,7 @@ BEGIN
|
||||
-- Remove linkage, if we have computed a different new linkee.
|
||||
UPDATE placex SET linked_place_id = null, indexed_status = 2
|
||||
WHERE linked_place_id = NEW.place_id
|
||||
and (linked_place is null or linked_place_id != linked_place);
|
||||
and (linked_place is null or place_id != linked_place);
|
||||
-- update not necessary for osmline, cause linked_place_id does not exist
|
||||
|
||||
-- Postcodes are just here to compute the centroids. They are not searchable
|
||||
|
||||
@@ -15,12 +15,13 @@ classifiers = [
|
||||
"Operating System :: OS Independent",
|
||||
]
|
||||
dependencies = [
|
||||
"psycopg",
|
||||
"psycopg != 3.3.0",
|
||||
"python-dotenv",
|
||||
"jinja2",
|
||||
"pyYAML>=5.1",
|
||||
"psutil",
|
||||
"PyICU"
|
||||
"PyICU",
|
||||
"mwparserfromhell"
|
||||
]
|
||||
dynamic = ["version"]
|
||||
|
||||
|
||||
@@ -10,7 +10,7 @@ Helper classes and functions for formatting results into API responses.
|
||||
from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping, Optional, cast
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
import importlib
|
||||
import importlib.util
|
||||
|
||||
from .server.content_types import CONTENT_JSON
|
||||
|
||||
|
||||
@@ -157,16 +157,19 @@ class ReverseGeocoder:
|
||||
include.extend(('natural', 'water', 'waterway'))
|
||||
return table.c.class_.in_(tuple(include))
|
||||
|
||||
async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
|
||||
""" Look up the closest rank 26+ place in the database, which
|
||||
is closer than the given distance.
|
||||
async def _find_closest_street_or_pois(self, distance: float,
|
||||
fuzziness: float) -> list[SaRow]:
|
||||
""" Look up the closest rank 26+ place in the database.
|
||||
The function finds the object that is closest to the reverse
|
||||
search point as well as all objects within 'fuzziness' distance
|
||||
to that best result.
|
||||
"""
|
||||
t = self.conn.t.placex
|
||||
|
||||
# PostgreSQL must not get the distance as a parameter because
|
||||
# there is a danger it won't be able to properly estimate index use
|
||||
# when used with prepared statements
|
||||
diststr = sa.text(f"{distance}")
|
||||
diststr = sa.text(f"{distance + fuzziness}")
|
||||
|
||||
sql: SaLambdaSelect = sa.lambda_stmt(
|
||||
lambda: _select_from_placex(t)
|
||||
@@ -174,9 +177,7 @@ class ReverseGeocoder:
|
||||
.where(t.c.indexed_status == 0)
|
||||
.where(t.c.linked_place_id == None)
|
||||
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
|
||||
t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
|
||||
.order_by('distance')
|
||||
.limit(2))
|
||||
t.c.centroid.ST_Distance(WKT_PARAM) < diststr)))
|
||||
|
||||
if self.has_geometries():
|
||||
sql = self._add_geometry_columns(sql, t.c.geometry)
|
||||
@@ -198,24 +199,44 @@ class ReverseGeocoder:
|
||||
self._filter_by_layer(t)))
|
||||
|
||||
if not restrict:
|
||||
return None
|
||||
return []
|
||||
|
||||
sql = sql.where(sa.or_(*restrict))
|
||||
inner = sql.where(sa.or_(*restrict)) \
|
||||
.add_columns(t.c.geometry.label('_geometry')) \
|
||||
.subquery()
|
||||
|
||||
# If the closest object is inside an area, then check if there is a
|
||||
# POI node nearby and return that.
|
||||
prev_row = None
|
||||
for row in await self.conn.execute(sql, self.bind_params):
|
||||
if prev_row is None:
|
||||
if row.rank_search <= 27 or row.osm_type == 'N' or row.distance > 0:
|
||||
return row
|
||||
prev_row = row
|
||||
else:
|
||||
if row.rank_search > 27 and row.osm_type == 'N'\
|
||||
and row.distance < 0.0001:
|
||||
return row
|
||||
# Use a window function to get the closest results to the best result.
|
||||
windowed = sa.select(inner,
|
||||
sa.func.first_value(inner.c.distance)
|
||||
.over(order_by=inner.c.distance)
|
||||
.label('_min_distance'),
|
||||
sa.func.first_value(
|
||||
sa.case((inner.c.rank_search <= 27,
|
||||
inner.c._geometry.ST_ClosestPoint(WKT_PARAM)),
|
||||
else_=None))
|
||||
.over(order_by=inner.c.distance)
|
||||
.label('_closest_point'),
|
||||
sa.func.first_value(sa.case((sa.or_(inner.c.rank_search <= 27,
|
||||
inner.c.osm_type == 'N'), None),
|
||||
else_=inner.c._geometry))
|
||||
.over(order_by=inner.c.distance)
|
||||
.label('_best_geometry')) \
|
||||
.subquery()
|
||||
|
||||
return prev_row
|
||||
outer = sa.select(*(c for c in windowed.c if not c.key.startswith('_')),
|
||||
sa.case((sa.or_(windowed.c._closest_point == None,
|
||||
windowed.c.housenumber == None), None),
|
||||
else_=windowed.c.centroid.ST_Distance(windowed.c._closest_point))
|
||||
.label('distance_from_best'),
|
||||
sa.case((sa.or_(windowed.c._best_geometry == None,
|
||||
windowed.c.rank_search <= 27,
|
||||
windowed.c.osm_type != 'N'), False),
|
||||
else_=windowed.c.centroid.ST_CoveredBy(windowed.c._best_geometry))
|
||||
.label('best_inside')) \
|
||||
.where(windowed.c.distance < windowed.c._min_distance + fuzziness) \
|
||||
.order_by(windowed.c.distance)
|
||||
|
||||
return list(await self.conn.execute(outer, self.bind_params))
|
||||
|
||||
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
|
||||
t = self.conn.t.placex
|
||||
@@ -301,55 +322,69 @@ class ReverseGeocoder:
|
||||
""" Find a street or POI/address for the given WKT point.
|
||||
"""
|
||||
log().section('Reverse lookup on street/address level')
|
||||
distance = 0.006
|
||||
parent_place_id = None
|
||||
|
||||
row = await self._find_closest_street_or_poi(distance)
|
||||
row_func: RowFunc = nres.create_from_placex_row
|
||||
log().var_dump('Result (street/building)', row)
|
||||
distance = 0.006
|
||||
|
||||
# If the closest result was a street, but an address was requested,
|
||||
# check for a housenumber nearby which is part of the street.
|
||||
if row is not None:
|
||||
if self.max_rank > 27 \
|
||||
and self.layer_enabled(DataLayer.ADDRESS) \
|
||||
and row.rank_address <= 27:
|
||||
distance = 0.001
|
||||
parent_place_id = row.place_id
|
||||
log().comment('Find housenumber for street')
|
||||
addr_row = await self._find_housenumber_for_street(parent_place_id)
|
||||
log().var_dump('Result (street housenumber)', addr_row)
|
||||
|
||||
if addr_row is not None:
|
||||
row = addr_row
|
||||
row_func = nres.create_from_placex_row
|
||||
distance = addr_row.distance
|
||||
elif row.country_code == 'us' and parent_place_id is not None:
|
||||
log().comment('Find TIGER housenumber for street')
|
||||
addr_row = await self._find_tiger_number_for_street(parent_place_id)
|
||||
log().var_dump('Result (street Tiger housenumber)', addr_row)
|
||||
|
||||
if addr_row is not None:
|
||||
row_func = cast(RowFunc,
|
||||
functools.partial(nres.create_from_tiger_row,
|
||||
osm_type=row.osm_type,
|
||||
osm_id=row.osm_id))
|
||||
row = addr_row
|
||||
else:
|
||||
result = None
|
||||
hnr_distance = None
|
||||
parent_street = None
|
||||
for row in await self._find_closest_street_or_pois(distance, 0.001):
|
||||
if result is None:
|
||||
log().var_dump('Closest result', row)
|
||||
result = row
|
||||
if self.max_rank > 27 \
|
||||
and self.layer_enabled(DataLayer.ADDRESS) \
|
||||
and result.rank_address <= 27:
|
||||
parent_street = result.place_id
|
||||
distance = 0.001
|
||||
else:
|
||||
distance = row.distance
|
||||
# If the closest result was a street but an address was requested,
|
||||
# see if we can refine the result with a housenumber closeby.
|
||||
elif parent_street is not None \
|
||||
and row.distance_from_best is not None \
|
||||
and row.distance_from_best < 0.001 \
|
||||
and (hnr_distance is None or hnr_distance > row.distance_from_best) \
|
||||
and row.parent_place_id == parent_street:
|
||||
log().var_dump('Housenumber to closest result', row)
|
||||
result = row
|
||||
hnr_distance = row.distance_from_best
|
||||
distance = row.distance
|
||||
# If the closest object is inside an area, then check if there is
|
||||
# a POI nearby and return that with preference.
|
||||
elif result.osm_type != 'N' and result.rank_search > 27 \
|
||||
and result.distance == 0 \
|
||||
and row.best_inside:
|
||||
log().var_dump('POI near closest result area', row)
|
||||
result = row
|
||||
break # it can't get better than that, everything else is farther away
|
||||
|
||||
# For the US also check the TIGER data, when no housenumber/POI was found.
|
||||
if result is not None and parent_street is not None and hnr_distance is None \
|
||||
and result.country_code == 'us':
|
||||
log().comment('Find TIGER housenumber for street')
|
||||
addr_row = await self._find_tiger_number_for_street(parent_street)
|
||||
log().var_dump('Result (street Tiger housenumber)', addr_row)
|
||||
|
||||
if addr_row is not None:
|
||||
row_func = cast(RowFunc,
|
||||
functools.partial(nres.create_from_tiger_row,
|
||||
osm_type=row.osm_type,
|
||||
osm_id=row.osm_id))
|
||||
result = addr_row
|
||||
|
||||
# Check for an interpolation that is either closer than our result
|
||||
# or belongs to a close street found.
|
||||
if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
|
||||
# No point in doing this when the result is already inside a building,
|
||||
# i.e. when the distance is already 0.
|
||||
if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS) and distance > 0:
|
||||
log().comment('Find interpolation for street')
|
||||
addr_row = await self._find_interpolation_for_street(parent_place_id,
|
||||
distance)
|
||||
addr_row = await self._find_interpolation_for_street(parent_street, distance)
|
||||
log().var_dump('Result (street interpolation)', addr_row)
|
||||
if addr_row is not None:
|
||||
row = addr_row
|
||||
row_func = nres.create_from_osmline_row
|
||||
return addr_row, nres.create_from_osmline_row
|
||||
|
||||
return row, row_func
|
||||
return result, row_func
|
||||
|
||||
async def _lookup_area_address(self) -> Optional[SaRow]:
|
||||
""" Lookup large addressable areas for the given WKT point.
|
||||
|
||||
@@ -374,7 +374,7 @@ class SearchBuilder:
|
||||
tokens = self.get_country_tokens(assignment.country)
|
||||
if not tokens:
|
||||
return None
|
||||
sdata.set_strings('countries', tokens)
|
||||
sdata.set_countries(tokens)
|
||||
sdata.penalty += self.query.get_in_word_penalty(assignment.country)
|
||||
elif self.details.countries:
|
||||
sdata.countries = dbf.WeightedStrings(self.details.countries,
|
||||
@@ -413,7 +413,7 @@ class SearchBuilder:
|
||||
"""
|
||||
tokens = self.query.get_tokens(trange, qmod.TOKEN_COUNTRY)
|
||||
if self.details.countries:
|
||||
tokens = [t for t in tokens if t.lookup_word in self.details.countries]
|
||||
tokens = [t for t in tokens if t.get_country() in self.details.countries]
|
||||
|
||||
return tokens
|
||||
|
||||
|
||||
@@ -244,6 +244,21 @@ class SearchData:
|
||||
|
||||
setattr(self, field, wstrs)
|
||||
|
||||
def set_countries(self, tokens: List[Token]) -> None:
|
||||
""" Set the WeightedStrings properties for countries. Multiple
|
||||
entries for the same country are deduplicated and the minimum
|
||||
penalty is used. Adapts the global penalty, so that the
|
||||
minimum penalty is 0.
|
||||
"""
|
||||
if tokens:
|
||||
min_penalty = min(t.penalty for t in tokens)
|
||||
self.penalty += min_penalty
|
||||
countries: dict[str, float] = {}
|
||||
for t in tokens:
|
||||
cc = t.get_country()
|
||||
countries[cc] = min(t.penalty - min_penalty, countries.get(cc, 10000))
|
||||
self.countries = WeightedStrings(list(countries.keys()), list(countries.values()))
|
||||
|
||||
def set_qualifiers(self, tokens: List[Token]) -> None:
|
||||
""" Set the qulaifier field from the given tokens.
|
||||
"""
|
||||
|
||||
@@ -59,12 +59,16 @@ class ICUToken(qmod.Token):
|
||||
assert self.info
|
||||
return self.info.get('class', ''), self.info.get('type', '')
|
||||
|
||||
def rematch(self, norm: str) -> None:
|
||||
def get_country(self) -> str:
|
||||
assert self.info
|
||||
return cast(str, self.info.get('cc', ''))
|
||||
|
||||
def match_penalty(self, norm: str) -> float:
|
||||
""" Check how well the token matches the given normalized string
|
||||
and add a penalty, if necessary.
|
||||
"""
|
||||
if not self.lookup_word:
|
||||
return
|
||||
return 0.0
|
||||
|
||||
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
|
||||
distance = 0
|
||||
@@ -75,7 +79,7 @@ class ICUToken(qmod.Token):
|
||||
distance += max((ato-afrom), (bto-bfrom))
|
||||
elif tag != 'equal':
|
||||
distance += abs((ato-afrom) - (bto-bfrom))
|
||||
self.penalty += (distance/len(self.lookup_word))
|
||||
return (distance/len(self.lookup_word))
|
||||
|
||||
@staticmethod
|
||||
def from_db_row(row: SaRow) -> 'ICUToken':
|
||||
@@ -330,9 +334,10 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
|
||||
norm = ''.join(f"{n.term_normalized}{'' if n.btype == qmod.BREAK_TOKEN else ' '}"
|
||||
for n in query.nodes[start + 1:end + 1]).strip()
|
||||
for ttype, tokens in tlist.items():
|
||||
if ttype != qmod.TOKEN_COUNTRY:
|
||||
for token in tokens:
|
||||
cast(ICUToken, token).rematch(norm)
|
||||
for token in tokens:
|
||||
itok = cast(ICUToken, token)
|
||||
itok.penalty += itok.match_penalty(norm) * \
|
||||
(1 if ttype in (qmod.TOKEN_WORD, qmod.TOKEN_PARTIAL) else 2)
|
||||
|
||||
def compute_break_penalties(self, query: qmod.QueryStruct) -> None:
|
||||
""" Set the break penalties for the nodes in the query.
|
||||
|
||||
@@ -127,6 +127,12 @@ class Token(ABC):
|
||||
category objects.
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def get_country(self) -> str:
|
||||
""" Return the country code this tojen is associated with
|
||||
(currently for country tokens only).
|
||||
"""
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TokenRange:
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
"""
|
||||
Server implementation using the falcon webserver framework.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional, Mapping, Any, List, cast
|
||||
from pathlib import Path
|
||||
import asyncio
|
||||
@@ -161,7 +163,7 @@ class APIMiddleware:
|
||||
|
||||
def __init__(self, project_dir: Path, environ: Optional[Mapping[str, str]]) -> None:
|
||||
self.api = NominatimAPIAsync(project_dir, environ)
|
||||
self.app: Optional[App] = None
|
||||
self.app: Optional[App[Request, Response]] = None
|
||||
|
||||
@property
|
||||
def config(self) -> Configuration:
|
||||
@@ -169,7 +171,7 @@ class APIMiddleware:
|
||||
"""
|
||||
return self.api.config
|
||||
|
||||
def set_app(self, app: App) -> None:
|
||||
def set_app(self, app: App[Request, Response]) -> None:
|
||||
""" Set the Falcon application this middleware is connected to.
|
||||
"""
|
||||
self.app = app
|
||||
@@ -193,7 +195,7 @@ class APIMiddleware:
|
||||
|
||||
|
||||
def get_application(project_dir: Path,
|
||||
environ: Optional[Mapping[str, str]] = None) -> App:
|
||||
environ: Optional[Mapping[str, str]] = None) -> App[Request, Response]:
|
||||
""" Create a Nominatim Falcon ASGI application.
|
||||
"""
|
||||
apimw = APIMiddleware(project_dir, environ)
|
||||
@@ -215,7 +217,7 @@ def get_application(project_dir: Path,
|
||||
return app
|
||||
|
||||
|
||||
def run_wsgi() -> App:
|
||||
def run_wsgi() -> App[Request, Response]:
|
||||
""" Entry point for uvicorn.
|
||||
|
||||
Make sure uvicorn is run from the project directory.
|
||||
|
||||
@@ -23,6 +23,7 @@ from ..tokenizer.base import AbstractTokenizer
|
||||
from ..version import NOMINATIM_VERSION
|
||||
from .args import NominatimArgs
|
||||
|
||||
import time
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -86,6 +87,8 @@ class SetupAll:
|
||||
from ..tools import database_import, postcodes, freeze
|
||||
from ..indexer.indexer import Indexer
|
||||
|
||||
start_time = time.time()
|
||||
|
||||
num_threads = args.threads or psutil.cpu_count() or 1
|
||||
country_info.setup_country_config(args.config)
|
||||
|
||||
@@ -138,6 +141,10 @@ class SetupAll:
|
||||
LOG.warning('Recompute word counts')
|
||||
tokenizer.update_statistics(args.config, threads=num_threads)
|
||||
|
||||
end_time = time.time()
|
||||
elapsed = end_time - start_time
|
||||
LOG.warning(f'Import completed successfully in {elapsed:.2f} seconds.')
|
||||
|
||||
self._finalize_database(args.config.get_libpq_dsn(), args.offline)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -197,7 +197,7 @@ class Configuration:
|
||||
if dsn.startswith('pgsql:'):
|
||||
return dict((p.split('=', 1) for p in dsn[6:].split(';')))
|
||||
|
||||
return conninfo_to_dict(dsn)
|
||||
return conninfo_to_dict(dsn) # type: ignore
|
||||
|
||||
def get_import_style_file(self) -> Path:
|
||||
""" Return the import style file as a path object. Translates the
|
||||
|
||||
@@ -29,6 +29,9 @@ class CountryPostcodeMatcher:
|
||||
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
|
||||
self.pattern = re.compile(pc_pattern)
|
||||
|
||||
# We want to exclude 0000, 00-000, 000 00 etc
|
||||
self.zero_pattern = re.compile(r'^[0\- ]+$')
|
||||
|
||||
self.output = config.get('output', r'\g<0>')
|
||||
|
||||
def match(self, postcode: str) -> Optional[Match[str]]:
|
||||
@@ -40,7 +43,10 @@ class CountryPostcodeMatcher:
|
||||
normalized = self.norm_pattern.fullmatch(postcode.upper())
|
||||
|
||||
if normalized:
|
||||
return self.pattern.fullmatch(normalized.group(1))
|
||||
match = self.pattern.fullmatch(normalized.group(1))
|
||||
if match and self.zero_pattern.match(match.string):
|
||||
return None
|
||||
return match
|
||||
|
||||
return None
|
||||
|
||||
|
||||
@@ -475,20 +475,23 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
assert self.conn is not None
|
||||
word_tokens = set()
|
||||
for name in names:
|
||||
norm_name = self._search_normalized(name.name)
|
||||
if norm_name:
|
||||
word_tokens.add(norm_name)
|
||||
norm_name = self._normalized(name.name)
|
||||
token_name = self._search_normalized(name.name)
|
||||
if norm_name and token_name:
|
||||
word_tokens.add((token_name, norm_name))
|
||||
|
||||
with self.conn.cursor() as cur:
|
||||
# Get existing names
|
||||
cur.execute("""SELECT word_token, coalesce(info ? 'internal', false) as is_internal
|
||||
cur.execute("""SELECT word_token,
|
||||
word as lookup,
|
||||
coalesce(info ? 'internal', false) as is_internal
|
||||
FROM word
|
||||
WHERE type = 'C' and word = %s""",
|
||||
WHERE type = 'C' and info->>'cc' = %s""",
|
||||
(country_code, ))
|
||||
# internal/external names
|
||||
existing_tokens: Dict[bool, Set[str]] = {True: set(), False: set()}
|
||||
existing_tokens: Dict[bool, Set[Tuple[str, str]]] = {True: set(), False: set()}
|
||||
for word in cur:
|
||||
existing_tokens[word[1]].add(word[0])
|
||||
existing_tokens[word[2]].add((word[0], word[1]))
|
||||
|
||||
# Delete names that no longer exist.
|
||||
gone_tokens = existing_tokens[internal] - word_tokens
|
||||
@@ -496,10 +499,10 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
gone_tokens.update(existing_tokens[False] & word_tokens)
|
||||
if gone_tokens:
|
||||
cur.execute("""DELETE FROM word
|
||||
USING unnest(%s::text[]) as token
|
||||
WHERE type = 'C' and word = %s
|
||||
and word_token = token""",
|
||||
(list(gone_tokens), country_code))
|
||||
USING jsonb_array_elements(%s) as data
|
||||
WHERE type = 'C' and info->>'cc' = %s
|
||||
and word_token = data->>0 and word = data->>1""",
|
||||
(Jsonb(list(gone_tokens)), country_code))
|
||||
|
||||
# Only add those names that are not yet in the list.
|
||||
new_tokens = word_tokens - existing_tokens[True]
|
||||
@@ -508,15 +511,17 @@ class ICUNameAnalyzer(AbstractAnalyzer):
|
||||
if new_tokens:
|
||||
if internal:
|
||||
sql = """INSERT INTO word (word_token, type, word, info)
|
||||
(SELECT token, 'C', %s, '{"internal": "yes"}'
|
||||
FROM unnest(%s::text[]) as token)
|
||||
(SELECT data->>0, 'C', data->>1,
|
||||
jsonb_build_object('internal', 'yes', 'cc', %s::text)
|
||||
FROM jsonb_array_elements(%s) as data)
|
||||
"""
|
||||
else:
|
||||
sql = """INSERT INTO word (word_token, type, word)
|
||||
(SELECT token, 'C', %s
|
||||
FROM unnest(%s::text[]) as token)
|
||||
sql = """INSERT INTO word (word_token, type, word, info)
|
||||
(SELECT data->>0, 'C', data->>1,
|
||||
jsonb_build_object('cc', %s::text)
|
||||
FROM jsonb_array_elements(%s) as data)
|
||||
"""
|
||||
cur.execute(sql, (country_code, list(new_tokens)))
|
||||
cur.execute(sql, (country_code, Jsonb(list(new_tokens))))
|
||||
|
||||
def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
|
||||
""" Determine tokenizer information about the given place.
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2024 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Functions for database migration to newer software versions.
|
||||
@@ -18,6 +18,7 @@ from ..db.connection import connect, Connection, \
|
||||
from ..db.sql_preprocessor import SQLPreprocessor
|
||||
from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
from ..data.country_info import create_country_names, setup_country_config
|
||||
from . import refresh
|
||||
|
||||
LOG = logging.getLogger()
|
||||
@@ -156,3 +157,25 @@ def create_place_entrance_table(conn: Connection, config: Configuration, **_: An
|
||||
CREATE UNIQUE INDEX place_entrance_osm_id_idx ON place_entrance
|
||||
USING BTREE (osm_id);
|
||||
""")
|
||||
|
||||
|
||||
@_migration(5, 2, 99, 1)
|
||||
def convert_country_tokens(conn: Connection, config: Configuration, **_: Any) -> None:
|
||||
""" Convert country word tokens
|
||||
|
||||
Country tokens now save the country in the info field instead of the
|
||||
word. This migration removes all country tokens from the word table
|
||||
and reimports the default country name. This means that custom names
|
||||
are lost. If you need them back, invalidate the OSM objects containing
|
||||
the names by setting indexed_status to 2 and then reindex the database.
|
||||
"""
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
|
||||
# There is only one tokenizer at the time of migration, so we make
|
||||
# some assumptions here about the structure of the database. This will
|
||||
# fail if somebody has written a custom tokenizer.
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("DELETE FROM word WHERE type = 'C'")
|
||||
conn.commit()
|
||||
|
||||
setup_country_config(config)
|
||||
create_country_names(conn, tokenizer, config.get_str_list('LANGUAGES'))
|
||||
|
||||
@@ -11,6 +11,8 @@ from typing import Iterable
|
||||
import re
|
||||
import logging
|
||||
|
||||
import mwparserfromhell
|
||||
|
||||
from ...config import Configuration
|
||||
from ...utils.url_utils import get_url
|
||||
from .special_phrase import SpecialPhrase
|
||||
@@ -36,10 +38,6 @@ class SPWikiLoader:
|
||||
"""
|
||||
def __init__(self, config: Configuration) -> None:
|
||||
self.config = config
|
||||
# Compile the regex here to increase performances.
|
||||
self.occurence_pattern = re.compile(
|
||||
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
|
||||
)
|
||||
# Hack around a bug where building=yes was imported with quotes into the wiki
|
||||
self.type_fix_pattern = re.compile(r'\"|"')
|
||||
|
||||
@@ -58,11 +56,21 @@ class SPWikiLoader:
|
||||
LOG.warning('Importing phrases for lang: %s...', lang)
|
||||
loaded_xml = _get_wiki_content(lang)
|
||||
|
||||
# One match will be of format [label, class, type, operator, plural]
|
||||
matches = self.occurence_pattern.findall(loaded_xml)
|
||||
wikicode = mwparserfromhell.parse(loaded_xml)
|
||||
|
||||
for match in matches:
|
||||
yield SpecialPhrase(match[0],
|
||||
match[1],
|
||||
self.type_fix_pattern.sub('', match[2]),
|
||||
match[3])
|
||||
for table in wikicode.filter_tags(matches=lambda t: t.tag == 'table'):
|
||||
for row in table.contents.filter_tags(matches=lambda t: t.tag == 'tr'):
|
||||
cells = list(row.contents.filter_tags(matches=lambda t: t.tag == 'td'))
|
||||
|
||||
if len(cells) < 5:
|
||||
continue
|
||||
|
||||
label = cells[0].contents.strip_code().strip()
|
||||
cls = cells[1].contents.strip_code().strip()
|
||||
typ = cells[2].contents.strip_code().strip()
|
||||
operator = cells[3].contents.strip_code().strip()
|
||||
|
||||
yield SpecialPhrase(label,
|
||||
cls,
|
||||
self.type_fix_pattern.sub('', typ),
|
||||
operator)
|
||||
|
||||
@@ -55,7 +55,7 @@ def parse_version(version: str) -> NominatimVersion:
|
||||
return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])
|
||||
|
||||
|
||||
NOMINATIM_VERSION = parse_version('5.2.0-0')
|
||||
NOMINATIM_VERSION = parse_version('5.2.99-0')
|
||||
|
||||
POSTGRESQL_REQUIRED_VERSION = (12, 0)
|
||||
POSTGIS_REQUIRED_VERSION = (3, 0)
|
||||
|
||||
@@ -9,6 +9,7 @@ Fixtures for BDD test steps
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
import psycopg
|
||||
@@ -20,7 +21,8 @@ sys.path.insert(0, str(SRC_DIR / 'src'))
|
||||
|
||||
import pytest
|
||||
from pytest_bdd.parsers import re as step_parse
|
||||
from pytest_bdd import given, when, then
|
||||
from pytest_bdd import given, when, then, scenario
|
||||
from pytest_bdd.feature import get_features
|
||||
|
||||
pytest.register_assert_rewrite('utils')
|
||||
|
||||
@@ -373,3 +375,57 @@ def check_place_missing_lines(db_conn, table, osm_type, osm_id, osm_class):
|
||||
|
||||
with db_conn.cursor() as cur:
|
||||
assert cur.execute(sql, params).fetchone()[0] == 0
|
||||
|
||||
|
||||
if pytest.version_tuple >= (8, 0, 0):
|
||||
def pytest_pycollect_makemodule(module_path, parent):
|
||||
return BddTestCollector.from_parent(parent, path=module_path)
|
||||
|
||||
|
||||
class BddTestCollector(pytest.Module):
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def collect(self):
|
||||
for item in super().collect():
|
||||
yield item
|
||||
|
||||
if hasattr(self.obj, 'PYTEST_BDD_SCENARIOS'):
|
||||
for path in self.obj.PYTEST_BDD_SCENARIOS:
|
||||
for feature in get_features([str(Path(self.path.parent, path).resolve())]):
|
||||
yield FeatureFile.from_parent(self,
|
||||
name=str(Path(path, feature.rel_filename)),
|
||||
path=Path(feature.filename),
|
||||
feature=feature)
|
||||
|
||||
|
||||
# borrowed from pytest-bdd: src/pytest_bdd/scenario.py
|
||||
def make_python_name(string: str) -> str:
|
||||
"""Make python attribute name out of a given string."""
|
||||
string = re.sub(r"\W", "", string.replace(" ", "_"))
|
||||
return re.sub(r"^\d+_*", "", string).lower()
|
||||
|
||||
|
||||
class FeatureFile(pytest.File):
|
||||
class obj:
|
||||
pass
|
||||
|
||||
def __init__(self, feature, **kwargs):
|
||||
self.feature = feature
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def collect(self):
|
||||
for sname, sobject in self.feature.scenarios.items():
|
||||
class_name = f"L{sobject.line_number}"
|
||||
test_name = "test_" + make_python_name(sname)
|
||||
|
||||
@scenario(self.feature.filename, sname)
|
||||
def _test():
|
||||
pass
|
||||
|
||||
tclass = type(class_name, (),
|
||||
{test_name: staticmethod(_test)})
|
||||
setattr(self.obj, class_name, tclass)
|
||||
|
||||
yield pytest.Class.from_parent(self, name=class_name, obj=tclass)
|
||||
|
||||
@@ -9,13 +9,32 @@ Feature: Reverse searches
|
||||
And the places
|
||||
| osm | class | type | geometry |
|
||||
| W1 | aeroway | terminal | (1,2,3,4,1) |
|
||||
| N1 | amenity | restaurant | 9 |
|
||||
| N9 | amenity | restaurant | 9 |
|
||||
When importing
|
||||
And reverse geocoding 1.0001,1.0001
|
||||
Then the result contains
|
||||
| object |
|
||||
| N1 |
|
||||
| N9 |
|
||||
When reverse geocoding 1.0003,1.0001
|
||||
Then the result contains
|
||||
| object |
|
||||
| W1 |
|
||||
|
||||
|
||||
Scenario: Find closest housenumber for street matches
|
||||
Given the 0.0001 grid with origin 1,1
|
||||
| | 1 | | |
|
||||
| | | 2 | |
|
||||
| 10 | | | 11 |
|
||||
And the places
|
||||
| osm | class | type | name | geometry |
|
||||
| W1 | highway | service | Goose Drive | 10,11 |
|
||||
| N2 | tourism | art_work | Beauty | 2 |
|
||||
And the places
|
||||
| osm | class | type | housenr | geometry |
|
||||
| N1 | place | house | 23 | 1 |
|
||||
When importing
|
||||
When reverse geocoding 1.0002,1.0002
|
||||
Then the result contains
|
||||
| object |
|
||||
| N1 |
|
||||
|
||||
@@ -15,7 +15,7 @@ import xml.etree.ElementTree as ET
|
||||
|
||||
import pytest
|
||||
from pytest_bdd.parsers import re as step_parse
|
||||
from pytest_bdd import scenarios, when, given, then
|
||||
from pytest_bdd import when, given, then
|
||||
|
||||
from nominatim_db import cli
|
||||
from nominatim_db.config import Configuration
|
||||
@@ -150,4 +150,8 @@ def parse_api_json_response(api_response, fmt, num):
|
||||
return result
|
||||
|
||||
|
||||
scenarios('features/api')
|
||||
if pytest.version_tuple >= (8, 0, 0):
|
||||
PYTEST_BDD_SCENARIOS = ['features/api']
|
||||
else:
|
||||
from pytest_bdd import scenarios
|
||||
scenarios('features/api')
|
||||
|
||||
@@ -15,7 +15,7 @@ import re
|
||||
import psycopg
|
||||
|
||||
import pytest
|
||||
from pytest_bdd import scenarios, when, then, given
|
||||
from pytest_bdd import when, then, given
|
||||
from pytest_bdd.parsers import re as step_parse
|
||||
|
||||
from utils.place_inserter import PlaceColumn
|
||||
@@ -276,4 +276,8 @@ def then_check_interpolation_table_negative(db_conn, oid):
|
||||
assert cur.fetchone()[0] == 0
|
||||
|
||||
|
||||
scenarios('features/db')
|
||||
if pytest.version_tuple >= (8, 0, 0):
|
||||
PYTEST_BDD_SCENARIOS = ['features/db']
|
||||
else:
|
||||
from pytest_bdd import scenarios
|
||||
scenarios('features/db')
|
||||
|
||||
@@ -11,7 +11,7 @@ import asyncio
|
||||
import random
|
||||
|
||||
import pytest
|
||||
from pytest_bdd import scenarios, when, then, given
|
||||
from pytest_bdd import when, then, given
|
||||
from pytest_bdd.parsers import re as step_parse
|
||||
|
||||
from nominatim_db import cli
|
||||
@@ -106,4 +106,8 @@ def check_place_content(db_conn, datatable, node_grid, table, exact):
|
||||
check_table_content(db_conn, table, datatable, grid=node_grid, exact=bool(exact))
|
||||
|
||||
|
||||
scenarios('features/osm2pgsql')
|
||||
if pytest.version_tuple >= (8, 0, 0):
|
||||
PYTEST_BDD_SCENARIOS = ['features/osm2pgsql']
|
||||
else:
|
||||
from pytest_bdd import scenarios
|
||||
scenarios('features/osm2pgsql')
|
||||
|
||||
@@ -17,6 +17,9 @@ class MyToken(query.Token):
|
||||
def get_category(self):
|
||||
return 'this', 'that'
|
||||
|
||||
def get_country(self):
|
||||
return 'cc'
|
||||
|
||||
|
||||
def mktoken(tid: int):
|
||||
return MyToken(penalty=3.0, token=tid, count=1, addr_count=1,
|
||||
|
||||
@@ -2,12 +2,14 @@
|
||||
#
|
||||
# This file is part of Nominatim. (https://nominatim.org)
|
||||
#
|
||||
# Copyright (C) 2023 by the Nominatim developer community.
|
||||
# Copyright (C) 2025 by the Nominatim developer community.
|
||||
# For a full list of authors see the git log.
|
||||
"""
|
||||
Tests for creating abstract searches from token assignments.
|
||||
"""
|
||||
from typing import Optional
|
||||
import pytest
|
||||
import dataclasses
|
||||
|
||||
from nominatim_api.search.query import Token, TokenRange, QueryStruct, Phrase
|
||||
import nominatim_api.search.query as qmod
|
||||
@@ -17,10 +19,16 @@ from nominatim_api.types import SearchDetails
|
||||
import nominatim_api.search.db_searches as dbs
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class MyToken(Token):
|
||||
cc: Optional[str] = None
|
||||
|
||||
def get_category(self):
|
||||
return 'this', 'that'
|
||||
|
||||
def get_country(self):
|
||||
return self.cc
|
||||
|
||||
|
||||
def make_query(*args):
|
||||
q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')])
|
||||
@@ -30,18 +38,24 @@ def make_query(*args):
|
||||
q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY)
|
||||
|
||||
for start, tlist in enumerate(args):
|
||||
for end, ttype, tinfo in tlist:
|
||||
for tid, word in tinfo:
|
||||
q.add_token(TokenRange(start, end), ttype,
|
||||
MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0,
|
||||
token=tid, count=1, addr_count=1,
|
||||
lookup_word=word))
|
||||
for end, ttype, tinfos in tlist:
|
||||
for tinfo in tinfos:
|
||||
if isinstance(tinfo, tuple):
|
||||
q.add_token(TokenRange(start, end), ttype,
|
||||
MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0,
|
||||
token=tinfo[0], count=1, addr_count=1,
|
||||
lookup_word=tinfo[1]))
|
||||
else:
|
||||
q.add_token(TokenRange(start, end), ttype, tinfo)
|
||||
|
||||
return q
|
||||
|
||||
|
||||
def test_country_search():
|
||||
q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
|
||||
q = make_query([(1, qmod.TOKEN_COUNTRY, [
|
||||
MyToken(penalty=0.0, token=2, count=1, addr_count=1, lookup_word='Germany', cc='de'),
|
||||
MyToken(penalty=0.0, token=3, count=1, addr_count=1, lookup_word='UK', cc='en'),
|
||||
])])
|
||||
builder = SearchBuilder(q, SearchDetails())
|
||||
|
||||
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
|
||||
@@ -55,7 +69,10 @@ def test_country_search():
|
||||
|
||||
|
||||
def test_country_search_with_country_restriction():
|
||||
q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
|
||||
q = make_query([(1, qmod.TOKEN_COUNTRY, [
|
||||
MyToken(penalty=0.0, token=2, count=1, addr_count=1, lookup_word='Germany', cc='de'),
|
||||
MyToken(penalty=0.0, token=3, count=1, addr_count=1, lookup_word='UK', cc='en'),
|
||||
])])
|
||||
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
|
||||
|
||||
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
|
||||
@@ -69,7 +86,10 @@ def test_country_search_with_country_restriction():
|
||||
|
||||
|
||||
def test_country_search_with_conflicting_country_restriction():
|
||||
q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
|
||||
q = make_query([(1, qmod.TOKEN_COUNTRY, [
|
||||
MyToken(penalty=0.0, token=2, count=1, addr_count=1, lookup_word='Germany', cc='de'),
|
||||
MyToken(penalty=0.0, token=3, count=1, addr_count=1, lookup_word='UK', cc='en'),
|
||||
])])
|
||||
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
|
||||
|
||||
searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
|
||||
@@ -94,8 +114,11 @@ def test_postcode_search_simple():
|
||||
|
||||
|
||||
def test_postcode_with_country():
|
||||
q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
|
||||
[(2, qmod.TOKEN_COUNTRY, [(1, 'xx')])])
|
||||
q = make_query(
|
||||
[(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
|
||||
[(2, qmod.TOKEN_COUNTRY, [
|
||||
MyToken(penalty=0.0, token=1, count=1, addr_count=1, lookup_word='none', cc='xx'),
|
||||
])])
|
||||
builder = SearchBuilder(q, SearchDetails())
|
||||
|
||||
searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
|
||||
|
||||
@@ -116,9 +116,11 @@ async def test_penalty_postcodes_and_housenumbers(conn, term, order):
|
||||
|
||||
assert query.num_token_slots() == 1
|
||||
|
||||
torder = [(tl.tokens[0].penalty, tl.ttype) for tl in query.nodes[0].starting]
|
||||
torder = [(min(t.penalty for t in tl.tokens), tl.ttype) for tl in query.nodes[0].starting]
|
||||
torder.sort()
|
||||
|
||||
print(torder)
|
||||
|
||||
assert [t[1] for t in torder] == order
|
||||
|
||||
|
||||
|
||||
@@ -20,6 +20,9 @@ class MyToken(Token):
|
||||
def get_category(self):
|
||||
return 'this', 'that'
|
||||
|
||||
def get_country(self):
|
||||
return 'cc'
|
||||
|
||||
|
||||
def make_query(*args):
|
||||
q = QueryStruct([Phrase(args[0][1], '')])
|
||||
|
||||
@@ -163,12 +163,12 @@ def test_reverse_housenumber_interpolation(apiobj, frontend, with_geom):
|
||||
parent_place_id=990,
|
||||
rank_search=30, rank_address=30,
|
||||
housenumber='23',
|
||||
centroid=(10.0, 10.00002))
|
||||
centroid=(10.0, 10.0002))
|
||||
apiobj.add_osmline(place_id=992,
|
||||
parent_place_id=990,
|
||||
startnumber=1, endnumber=3, step=1,
|
||||
centroid=(10.0, 10.00001),
|
||||
geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
|
||||
centroid=(10.0, 10.0001),
|
||||
geometry='LINESTRING(9.995 10.0001, 10.005 10.0001)')
|
||||
apiobj.add_placex(place_id=1990, class_='highway', type='service',
|
||||
rank_search=27, rank_address=27,
|
||||
name={'name': 'Other Street'},
|
||||
@@ -177,8 +177,8 @@ def test_reverse_housenumber_interpolation(apiobj, frontend, with_geom):
|
||||
apiobj.add_osmline(place_id=1992,
|
||||
parent_place_id=1990,
|
||||
startnumber=1, endnumber=3, step=1,
|
||||
centroid=(10.0, 20.00001),
|
||||
geometry='LINESTRING(9.995 20.00001, 10.005 20.00001)')
|
||||
centroid=(10.0, 20.0001),
|
||||
geometry='LINESTRING(9.995 20.0001, 10.005 20.0001)')
|
||||
|
||||
params = {'geometry_output': napi.GeometryFormat.TEXT} if with_geom else {}
|
||||
|
||||
|
||||
@@ -99,7 +99,7 @@ def test_address_simple_places(apiobj, frontend, atype, address, search):
|
||||
|
||||
|
||||
def test_address_country(apiobj, frontend):
|
||||
apiobj.add_word_table([(None, 'ro', 'C', 'ro', None)])
|
||||
apiobj.add_word_table([(None, 'ro', 'C', 'ro', {'cc': 'ro'})])
|
||||
apiobj.add_country('ro', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
|
||||
apiobj.add_country_name('ro', {'name': 'România'})
|
||||
|
||||
|
||||
@@ -10,6 +10,8 @@ of the table.
|
||||
"""
|
||||
from nominatim_db.db.connection import execute_scalar
|
||||
|
||||
from psycopg.types.json import Jsonb
|
||||
|
||||
|
||||
class MockIcuWordTable:
|
||||
""" A word table for testing using legacy word table structure.
|
||||
@@ -42,11 +44,11 @@ class MockIcuWordTable:
|
||||
""", (word_token, word, cls, typ, oper))
|
||||
self.conn.commit()
|
||||
|
||||
def add_country(self, country_code, word_token):
|
||||
def add_country(self, country_code, word_token, lookup):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("""INSERT INTO word (word_token, type, word)
|
||||
VALUES(%s, 'C', %s)""",
|
||||
(word_token, country_code))
|
||||
cur.execute("""INSERT INTO word (word_token, type, word, info)
|
||||
VALUES(%s, 'C', %s, %s)""",
|
||||
(word_token, lookup, Jsonb({'cc': country_code})))
|
||||
self.conn.commit()
|
||||
|
||||
def add_postcode(self, word_token, postcode):
|
||||
@@ -93,7 +95,7 @@ class MockIcuWordTable:
|
||||
|
||||
def get_country(self):
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute("SELECT word, word_token FROM word WHERE type = 'C'")
|
||||
cur.execute("SELECT info->>'cc', word_token, word FROM word WHERE type = 'C'")
|
||||
result = set((tuple(row) for row in cur))
|
||||
assert len(result) == cur.rowcount, "Word table has duplicates."
|
||||
return result
|
||||
|
||||
@@ -237,3 +237,9 @@ def test_postcode_default_pattern_pass(sanitize, postcode):
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
|
||||
def test_postcode_default_pattern_fail(sanitize, postcode):
|
||||
assert sanitize(country='an', postcode=postcode) == []
|
||||
|
||||
|
||||
@pytest.mark.parametrize("postcode", ('00000', '00-000', 'PL-00000', 'PL 00-000'))
|
||||
@pytest.mark.sanitizer_params(convert_to_address=False)
|
||||
def test_postcode_zeros(sanitize, postcode):
|
||||
assert sanitize(country='pl', postcode=postcode) == []
|
||||
|
||||
@@ -343,16 +343,18 @@ def test_add_country_names_new(analyzer, word_table):
|
||||
with analyzer() as anl:
|
||||
anl.add_country_names('es', {'name': 'Espagña', 'name:en': 'Spain'})
|
||||
|
||||
assert word_table.get_country() == {('es', 'ESPAGÑA'), ('es', 'SPAIN')}
|
||||
assert word_table.get_country() == {('es', 'ESPAGÑA', 'Espagña'),
|
||||
('es', 'SPAIN', 'Spain')}
|
||||
|
||||
|
||||
def test_add_country_names_extend(analyzer, word_table):
|
||||
word_table.add_country('ch', 'SCHWEIZ')
|
||||
word_table.add_country('ch', 'SCHWEIZ', 'Schweiz')
|
||||
|
||||
with analyzer() as anl:
|
||||
anl.add_country_names('ch', {'name': 'Schweiz', 'name:fr': 'Suisse'})
|
||||
|
||||
assert word_table.get_country() == {('ch', 'SCHWEIZ'), ('ch', 'SUISSE')}
|
||||
assert word_table.get_country() == {('ch', 'SCHWEIZ', 'Schweiz'),
|
||||
('ch', 'SUISSE', 'Suisse')}
|
||||
|
||||
|
||||
class TestPlaceNames:
|
||||
@@ -403,7 +405,7 @@ class TestPlaceNames:
|
||||
info = self.analyzer.process_place(place)
|
||||
|
||||
self.expect_name_terms(info, '#norge', 'norge')
|
||||
assert word_table.get_country() == {('no', 'NORGE')}
|
||||
assert word_table.get_country() == {('no', 'NORGE', 'Norge')}
|
||||
|
||||
|
||||
class TestPlaceAddress:
|
||||
|
||||
@@ -203,7 +203,7 @@ def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer,
|
||||
placex_table.add(cls='amenity', typ='animal_shelter') # in db for special phrase filtering
|
||||
sp_importer.import_phrases(tokenizer, should_replace)
|
||||
|
||||
assert len(tokenizer.analyser_cache['special_phrases']) == 18
|
||||
assert len(tokenizer.analyser_cache['special_phrases']) == 19
|
||||
|
||||
assert check_table_exist(temp_db_cursor, class_test, type_test)
|
||||
assert check_placeid_and_centroid_indexes(temp_db_cursor, class_test, type_test)
|
||||
|
||||
@@ -54,4 +54,6 @@ def test_generate_phrases(sp_wiki_loader):
|
||||
('Water near', 'amenity', 'drinking_water', 'near'),
|
||||
('Embassy', 'amenity', 'embassy', '-'),
|
||||
('Embassys', 'amenity', 'embassy', '-'),
|
||||
('Embassies', 'amenity', 'embassy', '-')}
|
||||
('Embassies', 'amenity', 'embassy', '-'),
|
||||
# test for one-cell-per-line format
|
||||
('Coworkings near', 'amenity', 'coworking_space', 'near')}
|
||||
|
||||
196
test/testdata/special_phrases_test_content.txt
vendored
196
test/testdata/special_phrases_test_content.txt
vendored
@@ -1,78 +1,120 @@
|
||||
<mediawiki xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.11/ http://www.mediawiki.org/xml/export-0.11.xsd" version="0.11" xml:lang="en">
|
||||
<siteinfo>
|
||||
<sitename>OpenStreetMap Wiki</sitename>
|
||||
<dbname>wiki</dbname>
|
||||
<base>https://wiki.openstreetmap.org/wiki/Main_Page</base>
|
||||
<generator>MediaWiki 1.35.2</generator>
|
||||
<case>first-letter</case>
|
||||
<namespaces>
|
||||
<namespace key="-2" case="first-letter">Media</namespace>
|
||||
<namespace key="-1" case="first-letter">Special</namespace>
|
||||
<namespace key="0" case="first-letter"/>
|
||||
<namespace key="1" case="first-letter">Talk</namespace>
|
||||
<namespace key="2" case="first-letter">User</namespace>
|
||||
<namespace key="3" case="first-letter">User talk</namespace>
|
||||
<namespace key="4" case="first-letter">Wiki</namespace>
|
||||
<namespace key="5" case="first-letter">Wiki talk</namespace>
|
||||
<namespace key="6" case="first-letter">File</namespace>
|
||||
<namespace key="7" case="first-letter">File talk</namespace>
|
||||
<namespace key="8" case="first-letter">MediaWiki</namespace>
|
||||
<namespace key="9" case="first-letter">MediaWiki talk</namespace>
|
||||
<namespace key="10" case="first-letter">Template</namespace>
|
||||
<namespace key="11" case="first-letter">Template talk</namespace>
|
||||
<namespace key="12" case="first-letter">Help</namespace>
|
||||
<namespace key="13" case="first-letter">Help talk</namespace>
|
||||
<namespace key="14" case="first-letter">Category</namespace>
|
||||
<namespace key="15" case="first-letter">Category talk</namespace>
|
||||
<namespace key="120" case="first-letter">Item</namespace>
|
||||
<namespace key="121" case="first-letter">Item talk</namespace>
|
||||
<namespace key="122" case="first-letter">Property</namespace>
|
||||
<namespace key="123" case="first-letter">Property talk</namespace>
|
||||
<namespace key="200" case="first-letter">DE</namespace>
|
||||
<namespace key="201" case="first-letter">DE talk</namespace>
|
||||
<namespace key="202" case="first-letter">FR</namespace>
|
||||
<namespace key="203" case="first-letter">FR talk</namespace>
|
||||
<namespace key="204" case="first-letter">ES</namespace>
|
||||
<namespace key="205" case="first-letter">ES talk</namespace>
|
||||
<namespace key="206" case="first-letter">IT</namespace>
|
||||
<namespace key="207" case="first-letter">IT talk</namespace>
|
||||
<namespace key="208" case="first-letter">NL</namespace>
|
||||
<namespace key="209" case="first-letter">NL talk</namespace>
|
||||
<namespace key="210" case="first-letter">RU</namespace>
|
||||
<namespace key="211" case="first-letter">RU talk</namespace>
|
||||
<namespace key="212" case="first-letter">JA</namespace>
|
||||
<namespace key="213" case="first-letter">JA talk</namespace>
|
||||
<namespace key="710" case="first-letter">TimedText</namespace>
|
||||
<namespace key="711" case="first-letter">TimedText talk</namespace>
|
||||
<namespace key="828" case="first-letter">Module</namespace>
|
||||
<namespace key="829" case="first-letter">Module talk</namespace>
|
||||
<namespace key="2300" case="first-letter">Gadget</namespace>
|
||||
<namespace key="2301" case="first-letter">Gadget talk</namespace>
|
||||
<namespace key="2302" case="case-sensitive">Gadget definition</namespace>
|
||||
<namespace key="2303" case="case-sensitive">Gadget definition talk</namespace>
|
||||
</namespaces>
|
||||
</siteinfo>
|
||||
<page>
|
||||
<title>Nominatim/Special Phrases/EN</title>
|
||||
<ns>0</ns>
|
||||
<id>67365</id>
|
||||
<revision>
|
||||
<id>2100424</id>
|
||||
<parentid>2100422</parentid>
|
||||
<timestamp>2021-01-27T20:29:53Z</timestamp>
|
||||
<contributor>
|
||||
<username>Violaine Do</username>
|
||||
<id>88152</id>
|
||||
</contributor>
|
||||
<minor/>
|
||||
<comment>/* en */ add coworking amenity</comment>
|
||||
<origin>2100424</origin>
|
||||
<model>wikitext</model>
|
||||
<format>text/x-wiki</format>
|
||||
<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
|
||||
== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || "embassy" || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
|
||||
</text>
|
||||
<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
|
||||
</revision>
|
||||
</page>
|
||||
<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.11/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.11/ http://www.mediawiki.org/xml/export-0.11.xsd" version="0.11" xml:lang="en">
|
||||
<siteinfo>
|
||||
<sitename>OpenStreetMap Wiki</sitename>
|
||||
<dbname>wiki</dbname>
|
||||
<base>https://wiki.openstreetmap.org/wiki/Main_Page</base>
|
||||
<generator>MediaWiki 1.43.5</generator>
|
||||
<case>first-letter</case>
|
||||
<namespaces>
|
||||
<namespace key="-2" case="first-letter">Media</namespace>
|
||||
<namespace key="-1" case="first-letter">Special</namespace>
|
||||
<namespace key="0" case="first-letter"/>
|
||||
<namespace key="1" case="first-letter">Talk</namespace>
|
||||
<namespace key="2" case="first-letter">User</namespace>
|
||||
<namespace key="3" case="first-letter">User talk</namespace>
|
||||
<namespace key="4" case="first-letter">Wiki</namespace>
|
||||
<namespace key="5" case="first-letter">Wiki talk</namespace>
|
||||
<namespace key="6" case="first-letter">File</namespace>
|
||||
<namespace key="7" case="first-letter">File talk</namespace>
|
||||
<namespace key="8" case="first-letter">MediaWiki</namespace>
|
||||
<namespace key="9" case="first-letter">MediaWiki talk</namespace>
|
||||
<namespace key="10" case="first-letter">Template</namespace>
|
||||
<namespace key="11" case="first-letter">Template talk</namespace>
|
||||
<namespace key="12" case="first-letter">Help</namespace>
|
||||
<namespace key="13" case="first-letter">Help talk</namespace>
|
||||
<namespace key="14" case="first-letter">Category</namespace>
|
||||
<namespace key="15" case="first-letter">Category talk</namespace>
|
||||
<namespace key="120" case="first-letter">Item</namespace>
|
||||
<namespace key="121" case="first-letter">Item talk</namespace>
|
||||
<namespace key="122" case="first-letter">Property</namespace>
|
||||
<namespace key="123" case="first-letter">Property talk</namespace>
|
||||
<namespace key="200" case="first-letter">DE</namespace>
|
||||
<namespace key="201" case="first-letter">DE talk</namespace>
|
||||
<namespace key="202" case="first-letter">FR</namespace>
|
||||
<namespace key="203" case="first-letter">FR talk</namespace>
|
||||
<namespace key="204" case="first-letter">ES</namespace>
|
||||
<namespace key="205" case="first-letter">ES talk</namespace>
|
||||
<namespace key="206" case="first-letter">IT</namespace>
|
||||
<namespace key="207" case="first-letter">IT talk</namespace>
|
||||
<namespace key="208" case="first-letter">NL</namespace>
|
||||
<namespace key="209" case="first-letter">NL talk</namespace>
|
||||
<namespace key="210" case="first-letter">RU</namespace>
|
||||
<namespace key="211" case="first-letter">RU talk</namespace>
|
||||
<namespace key="212" case="first-letter">JA</namespace>
|
||||
<namespace key="213" case="first-letter">JA talk</namespace>
|
||||
<namespace key="710" case="first-letter">TimedText</namespace>
|
||||
<namespace key="711" case="first-letter">TimedText talk</namespace>
|
||||
<namespace key="828" case="first-letter">Module</namespace>
|
||||
<namespace key="829" case="first-letter">Module talk</namespace>
|
||||
<namespace key="3000" case="first-letter">Proposal</namespace>
|
||||
<namespace key="3001" case="first-letter">Proposal talk</namespace>
|
||||
</namespaces>
|
||||
</siteinfo>
|
||||
<page>
|
||||
<title>Nominatim/Special Phrases/EN</title>
|
||||
<ns>0</ns>
|
||||
<id>67365</id>
|
||||
<revision>
|
||||
<id>2861977</id>
|
||||
<parentid>2634159</parentid>
|
||||
<timestamp>2025-06-02T14:00:52Z</timestamp>
|
||||
<contributor>
|
||||
<username>Lonvia</username>
|
||||
<id>17191</id>
|
||||
</contributor>
|
||||
<comment>overgeneralized entry removed, phrases need to chosen so that all results with the given tag can be described with that phrase</comment>
|
||||
<origin>2861977</origin>
|
||||
<model>wikitext</model>
|
||||
<format>text/x-wiki</format>
|
||||
<text bytes="160765" sha1="0zlpuvnjs4io9e006rntbxm5b84kgst" xml:space="preserve">== en ==
|
||||
{| class="wikitable sortable"
|
||||
|-
|
||||
! Word / Phrase !! Key !! Value !! Operator !! Plural
|
||||
|-
|
||||
| Zip Line || aerialway || zip_line || - || N
|
||||
|-
|
||||
| Zip Lines || aerialway || zip_line || - || Y
|
||||
|-
|
||||
| Zip Line in || aerialway || zip_line || in || N
|
||||
|-
|
||||
| Zip Lines in || aerialway || zip_line || in || Y
|
||||
|-
|
||||
| Zip Line near || aerialway || zip_line || near || N
|
||||
|-
|
||||
| Animal shelter || amenity || animal_shelter || - || N
|
||||
|-
|
||||
| Animal shelters || amenity || animal_shelter || - || Y
|
||||
|-
|
||||
| Animal shelter in || amenity || animal_shelter || in || N
|
||||
|-
|
||||
| Animal shelters in || amenity || animal_shelter || in || Y
|
||||
|-
|
||||
| Animal shelter near || amenity || animal_shelter || near|| N
|
||||
|-
|
||||
| Animal shelters near || amenity || animal_shelter || NEAR|| Y
|
||||
|-
|
||||
| Drinking Water near || amenity || drinking_water || near || N
|
||||
|-
|
||||
| Water || amenity || drinking_water || - || N
|
||||
|-
|
||||
| Water in || amenity || drinking_water || In || N
|
||||
|-
|
||||
| Water near || amenity || drinking_water || near || N
|
||||
|-
|
||||
| Embassy || amenity || embassy || - || N
|
||||
|-
|
||||
| Embassys || amenity || "embassy" || - || Y
|
||||
|-
|
||||
| Embassies || amenity || embassy || - || Y
|
||||
|-
|
||||
| Coworkings near
|
||||
| amenity
|
||||
| coworking_space
|
||||
| near
|
||||
| Y
|
||||
|}
|
||||
[[Category:Word list]]</text>
|
||||
<sha1>0zlpuvnjs4io9e006rntbxm5b84kgst</sha1>
|
||||
</revision>
|
||||
</page>
|
||||
</mediawiki>
|
||||
|
||||
Reference in New Issue
Block a user