adapt docs for release

2026-02-26 11:08:13 +00:00 · 2025-10-29 11:08:42 +01:00
42 changed files with 260 additions and 594 deletions
--- a/.github/actions/build-nominatim/action.yml
+++ b/.github/actions/build-nominatim/action.yml
@@ -22,7 +22,7 @@ runs:
        - name: Install prerequisites from apt
          run: |
-            sudo apt-get install -y -qq python3-icu python3-datrie python3-jinja2 python3-psutil python3-dotenv python3-yaml python3-sqlalchemy python3-psycopg python3-asyncpg python3-mwparserfromhell
+            sudo apt-get install -y -qq python3-icu python3-datrie python3-jinja2 python3-psutil python3-dotenv python3-yaml python3-sqlalchemy python3-psycopg python3-asyncpg
          shell: bash
          if: inputs.dependencies == 'apt'
--- a/docs/admin/Installation.md
+++ b/docs/admin/Installation.md
@@ -37,7 +37,6 @@ Furthermore the following Python libraries are required:
  * [Jinja2](https://palletsprojects.com/p/jinja/)
  * [PyICU](https://pypi.org/project/PyICU/)
  * [PyYaml](https://pyyaml.org/) (5.1+)
  * [mwparserfromhell](https://github.com/earwig/mwparserfromhell/)
 These will be installed automatically when using pip installation.
@@ -111,14 +110,17 @@ Then you can install Nominatim with:
    pip install nominatim-db nominatim-api
-## Downloading and building Nominatim
+## Downloading and building Nominatim from source
 The following instructions are only relevant, if you want to build and
 install Nominatim **from source**.
 ### Downloading the latest release
 You can download the [latest release from nominatim.org](https://nominatim.org/downloads/).
 The release contains all necessary files. Just unpack it.
-### Downloading the latest development version
+### Downloading the source for the latest development version
 If you want to install latest development version from github:
@@ -132,7 +134,7 @@ The development version does not include the country grid. Download it separatel
 wget -O Nominatim/data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
 ```
-### Building Nominatim
+### Building Nominatim from source
 Nominatim is easiest to run from its own virtual environment. To create one, run:
--- a/docs/develop/Development-Environment.md
+++ b/docs/develop/Development-Environment.md
@@ -73,7 +73,7 @@ virtualenv ~/nominatim-dev-venv
    types-jinja2 types-markupsafe types-psutil types-psycopg2 \
    types-pygments types-pyyaml types-requests types-ujson \
    types-urllib3 typing-extensions unicorn falcon starlette \
-    uvicorn mypy osmium aiosqlite mwparserfromhell
+    uvicorn mypy osmium aiosqlite
 ```
 Now enter the virtual environment whenever you want to develop:
--- a/docs/develop/Testing.md
+++ b/docs/develop/Testing.md
@@ -52,15 +52,6 @@ To run the functional tests, do
    pytest test/bdd
 You can run a single feature file using expression matching:
    pytest test/bdd -k osm2pgsql/import/entrances.feature
 This even works for running single tests by adding the line number of the
 scenario header like that:
    pytest test/bdd -k 'osm2pgsql/import/entrances.feature and L4'
 The BDD tests create databases for the tests. You can set name of the databases
 through configuration variables in your `pytest.ini`:
--- a/lib-lua/themes/nominatim/presets.lua
+++ b/lib-lua/themes/nominatim/presets.lua
@@ -117,7 +117,6 @@ module.MAIN_TAGS.all_boundaries = {
    boundary = {'named',
                place = 'delete',
                land_area = 'delete',
                protected_area = 'fallback',
                postal_code = 'always'},
    landuse = 'fallback',
    place = 'always'
@@ -199,7 +198,7 @@ module.MAIN_TAGS_POIS = function (group)
                no = group},
    landuse = {cemetery = 'always'},
    leisure = {'always',
-               nature_reserve = 'named',
+               nature_reserve = 'fallback',
               swimming_pool = 'named',
               garden = 'named',
               common = 'named',
@@ -322,6 +321,7 @@ module.NAME_TAGS = {}
 module.NAME_TAGS.core = {main = {'name', 'name:*',
                                 'int_name', 'int_name:*',
                                 'nat_name', 'nat_name:*',
                                 'reg_name', 'reg_name:*',
                                 'loc_name', 'loc_name:*',
                                 'old_name', 'old_name:*',
--- a/lib-sql/functions/placex_triggers.sql
+++ b/lib-sql/functions/placex_triggers.sql
@@ -341,22 +341,6 @@ BEGIN
    END IF;
  END IF;
  IF bnd.extratags ? 'wikidata' THEN
    FOR linked_placex IN
      SELECT * FROM placex
      WHERE placex.class = 'place' AND placex.osm_type = 'N'
        AND placex.extratags ? 'wikidata' -- needed to select right index
        AND placex.extratags->'wikidata' = bnd.extratags->'wikidata'
        AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id)
        AND placex.rank_search < 26
        AND _st_covers(bnd.geometry, placex.geometry)
      ORDER BY lower(name->'name') = bnd_name desc
    LOOP
      {% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
      RETURN linked_placex;
    END LOOP;
  END IF;
  -- If extratags has a place tag, look for linked nodes by their place type.
  -- Area and node still have to have the same name.
  IF bnd.extratags ? 'place' and bnd.extratags->'place' != 'postcode'
@@ -377,6 +361,22 @@ BEGIN
    END LOOP;
  END IF;
  IF bnd.extratags ? 'wikidata' THEN
    FOR linked_placex IN
      SELECT * FROM placex
      WHERE placex.class = 'place' AND placex.osm_type = 'N'
        AND placex.extratags ? 'wikidata' -- needed to select right index
        AND placex.extratags->'wikidata' = bnd.extratags->'wikidata'
        AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id)
        AND placex.rank_search < 26
        AND _st_covers(bnd.geometry, placex.geometry)
      ORDER BY lower(name->'name') = bnd_name desc
    LOOP
      {% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
      RETURN linked_placex;
    END LOOP;
  END IF;
  -- Name searches can be done for ways as well as relations
  IF bnd_name is not null THEN
    {% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %}
@@ -874,7 +874,7 @@ BEGIN
  -- Remove linkage, if we have computed a different new linkee.
  UPDATE placex SET linked_place_id = null, indexed_status = 2
    WHERE linked_place_id = NEW.place_id
-          and (linked_place is null or place_id != linked_place);
+          and (linked_place is null or linked_place_id != linked_place);
  -- update not necessary for osmline, cause linked_place_id does not exist
  -- Postcodes are just here to compute the centroids. They are not searchable
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -1,4 +1,4 @@
-site_name: Nominatim Manual
+site_name: Nominatim 5.2.0 Manual
 theme:
  font: false
  name: material
--- a/packaging/nominatim-db/pyproject.toml
+++ b/packaging/nominatim-db/pyproject.toml
@@ -15,13 +15,12 @@ classifiers = [
    "Operating System :: OS Independent",
 ]
 dependencies = [
-    "psycopg != 3.3.0",
+    "psycopg",
    "python-dotenv",
    "jinja2",
    "pyYAML>=5.1",
    "psutil",
-    "PyICU",
+    "PyICU"
    "mwparserfromhell"
 ]
 dynamic = ["version"]
--- a/settings/country-names/ps.yaml
+++ b/settings/country-names/ps.yaml
@@ -1,4 +1,3 @@
 name: 
-    default: الأراضي الفلسطينية
+    default: Palestinian Territory
    en: Palestinian Territories
    "no": Det palestinske området
--- a/src/nominatim_api/result_formatting.py
+++ b/src/nominatim_api/result_formatting.py
@@ -10,7 +10,7 @@ Helper classes and functions for formatting results into API responses.
 from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping, Optional, cast
 from collections import defaultdict
 from pathlib import Path
-import importlib.util
+import importlib
 from .server.content_types import CONTENT_JSON
--- a/src/nominatim_api/reverse.py
+++ b/src/nominatim_api/reverse.py
@@ -157,19 +157,16 @@ class ReverseGeocoder:
            include.extend(('natural', 'water', 'waterway'))
        return table.c.class_.in_(tuple(include))
-    async def _find_closest_street_or_pois(self, distance: float,
+    async def _find_closest_street_or_poi(self, distance: float) -> Optional[SaRow]:
-                                           fuzziness: float) -> list[SaRow]:
+        """ Look up the closest rank 26+ place in the database, which
-        """ Look up the closest rank 26+ place in the database.
+            is closer than the given distance.
            The function finds the object that is closest to the reverse
            search point as well as all objects within 'fuzziness' distance
            to that best result.
        """
        t = self.conn.t.placex
        # PostgreSQL must not get the distance as a parameter because
        # there is a danger it won't be able to properly estimate index use
        # when used with prepared statements
-        diststr = sa.text(f"{distance + fuzziness}")
+        diststr = sa.text(f"{distance}")
        sql: SaLambdaSelect = sa.lambda_stmt(
            lambda: _select_from_placex(t)
@@ -177,7 +174,9 @@ class ReverseGeocoder:
            .where(t.c.indexed_status == 0)
            .where(t.c.linked_place_id == None)
            .where(sa.or_(sa.not_(t.c.geometry.is_area()),
-                          t.c.centroid.ST_Distance(WKT_PARAM) < diststr)))
+                          t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
            .order_by('distance')
            .limit(2))
        if self.has_geometries():
            sql = self._add_geometry_columns(sql, t.c.geometry)
@@ -199,44 +198,24 @@ class ReverseGeocoder:
                                    self._filter_by_layer(t)))
        if not restrict:
-            return []
+            return None
-        inner = sql.where(sa.or_(*restrict)) \
+        sql = sql.where(sa.or_(*restrict))
                   .add_columns(t.c.geometry.label('_geometry')) \
                   .subquery()
-        # Use a window function to get the closest results to the best result.
+        # If the closest object is inside an area, then check if there is a
-        windowed = sa.select(inner,
+        # POI node nearby and return that.
-                             sa.func.first_value(inner.c.distance)
+        prev_row = None
-                                    .over(order_by=inner.c.distance)
+        for row in await self.conn.execute(sql, self.bind_params):
-                                    .label('_min_distance'),
+            if prev_row is None:
-                             sa.func.first_value(
+                if row.rank_search <= 27 or row.osm_type == 'N' or row.distance > 0:
-                                        sa.case((inner.c.rank_search <= 27,
+                    return row
-                                                 inner.c._geometry.ST_ClosestPoint(WKT_PARAM)),
+                prev_row = row
-                                                else_=None))
+            else:
-                                    .over(order_by=inner.c.distance)
+                if row.rank_search > 27 and row.osm_type == 'N'\
-                                    .label('_closest_point'),
+                   and row.distance < 0.0001:
-                             sa.func.first_value(sa.case((sa.or_(inner.c.rank_search <= 27,
+                    return row
                                                                 inner.c.osm_type == 'N'), None),
                                                         else_=inner.c._geometry))
                                    .over(order_by=inner.c.distance)
                                    .label('_best_geometry')) \
                     .subquery()
-        outer = sa.select(*(c for c in windowed.c if not c.key.startswith('_')),
+        return prev_row
                          sa.case((sa.or_(windowed.c._closest_point == None,
                                          windowed.c.housenumber == None), None),
                                  else_=windowed.c.centroid.ST_Distance(windowed.c._closest_point))
                            .label('distance_from_best'),
                          sa.case((sa.or_(windowed.c._best_geometry == None,
                                          windowed.c.rank_search <= 27,
                                          windowed.c.osm_type != 'N'), False),
                                  else_=windowed.c.centroid.ST_CoveredBy(windowed.c._best_geometry))
                            .label('best_inside')) \
                  .where(windowed.c.distance < windowed.c._min_distance + fuzziness) \
                  .order_by(windowed.c.distance)
        return list(await self.conn.execute(outer, self.bind_params))
    async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
        t = self.conn.t.placex
@@ -322,69 +301,55 @@ class ReverseGeocoder:
        """ Find a street or POI/address for the given WKT point.
        """
        log().section('Reverse lookup on street/address level')
        row_func: RowFunc = nres.create_from_placex_row
        distance = 0.006
        parent_place_id = None
-        result = None
+        row = await self._find_closest_street_or_poi(distance)
-        hnr_distance = None
+        row_func: RowFunc = nres.create_from_placex_row
-        parent_street = None
+        log().var_dump('Result (street/building)', row)
-        for row in await self._find_closest_street_or_pois(distance, 0.001):
+
-            if result is None:
+        # If the closest result was a street, but an address was requested,
-                log().var_dump('Closest result', row)
+        # check for a housenumber nearby which is part of the street.
-                result = row
+        if row is not None:
-                if self.max_rank > 27 \
+            if self.max_rank > 27 \
-                        and self.layer_enabled(DataLayer.ADDRESS) \
+               and self.layer_enabled(DataLayer.ADDRESS) \
-                        and result.rank_address <= 27:
+               and row.rank_address <= 27:
-                    parent_street = result.place_id
+                distance = 0.001
-                    distance = 0.001
+                parent_place_id = row.place_id
-                else:
+                log().comment('Find housenumber for street')
-                    distance = row.distance
+                addr_row = await self._find_housenumber_for_street(parent_place_id)
-            # If the closest result was a street but an address was requested,
+                log().var_dump('Result (street housenumber)', addr_row)
-            # see if we can refine the result with a housenumber closeby.
+
-            elif parent_street is not None \
+                if addr_row is not None:
-                    and row.distance_from_best is not None \
+                    row = addr_row
-                    and row.distance_from_best < 0.001 \
+                    row_func = nres.create_from_placex_row
-                    and (hnr_distance is None or hnr_distance > row.distance_from_best) \
+                    distance = addr_row.distance
-                    and row.parent_place_id == parent_street:
+                elif row.country_code == 'us' and parent_place_id is not None:
-                log().var_dump('Housenumber to closest result', row)
+                    log().comment('Find TIGER housenumber for street')
-                result = row
+                    addr_row = await self._find_tiger_number_for_street(parent_place_id)
-                hnr_distance = row.distance_from_best
+                    log().var_dump('Result (street Tiger housenumber)', addr_row)
                    if addr_row is not None:
                        row_func = cast(RowFunc,
                                        functools.partial(nres.create_from_tiger_row,
                                                          osm_type=row.osm_type,
                                                          osm_id=row.osm_id))
                        row = addr_row
            else:
                distance = row.distance
            # If the closest object is inside an area, then check if there is
            # a POI nearby and return that with preference.
            elif result.osm_type != 'N' and result.rank_search > 27 \
                    and result.distance == 0 \
                    and row.best_inside:
                log().var_dump('POI near closest result area', row)
                result = row
                break  # it can't get better than that, everything else is farther away
        # For the US also check the TIGER data, when no housenumber/POI was found.
        if result is not None and parent_street is not None and hnr_distance is None \
                and result.country_code == 'us':
            log().comment('Find TIGER housenumber for street')
            addr_row = await self._find_tiger_number_for_street(parent_street)
            log().var_dump('Result (street Tiger housenumber)', addr_row)
            if addr_row is not None:
                row_func = cast(RowFunc,
                                functools.partial(nres.create_from_tiger_row,
                                                  osm_type=row.osm_type,
                                                  osm_id=row.osm_id))
                result = addr_row
        # Check for an interpolation that is either closer than our result
        # or belongs to a close street found.
-        # No point in doing this when the result is already inside a building,
+        if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS):
        # i.e. when the distance is already 0.
        if self.max_rank > 27 and self.layer_enabled(DataLayer.ADDRESS) and distance > 0:
            log().comment('Find interpolation for street')
-            addr_row = await self._find_interpolation_for_street(parent_street, distance)
+            addr_row = await self._find_interpolation_for_street(parent_place_id,
                                                                 distance)
            log().var_dump('Result (street interpolation)', addr_row)
            if addr_row is not None:
-                return addr_row, nres.create_from_osmline_row
+                row = addr_row
                row_func = nres.create_from_osmline_row
-        return result, row_func
+        return row, row_func
    async def _lookup_area_address(self) -> Optional[SaRow]:
        """ Lookup large addressable areas for the given WKT point.
--- a/src/nominatim_api/search/db_search_builder.py
+++ b/src/nominatim_api/search/db_search_builder.py
@@ -374,7 +374,7 @@ class SearchBuilder:
            tokens = self.get_country_tokens(assignment.country)
            if not tokens:
                return None
-            sdata.set_countries(tokens)
+            sdata.set_strings('countries', tokens)
            sdata.penalty += self.query.get_in_word_penalty(assignment.country)
        elif self.details.countries:
            sdata.countries = dbf.WeightedStrings(self.details.countries,
@@ -413,7 +413,7 @@ class SearchBuilder:
        """
        tokens = self.query.get_tokens(trange, qmod.TOKEN_COUNTRY)
        if self.details.countries:
-            tokens = [t for t in tokens if t.get_country() in self.details.countries]
+            tokens = [t for t in tokens if t.lookup_word in self.details.countries]
        return tokens
--- a/src/nominatim_api/search/db_search_fields.py
+++ b/src/nominatim_api/search/db_search_fields.py
@@ -244,21 +244,6 @@ class SearchData:
            setattr(self, field, wstrs)
    def set_countries(self, tokens: List[Token]) -> None:
        """ Set the WeightedStrings properties for countries. Multiple
            entries for the same country are deduplicated and the minimum
            penalty is used. Adapts the global penalty, so that the
            minimum penalty is 0.
        """
        if tokens:
            min_penalty = min(t.penalty for t in tokens)
            self.penalty += min_penalty
            countries: dict[str, float] = {}
            for t in tokens:
                cc = t.get_country()
                countries[cc] = min(t.penalty - min_penalty, countries.get(cc, 10000))
            self.countries = WeightedStrings(list(countries.keys()), list(countries.values()))
    def set_qualifiers(self, tokens: List[Token]) -> None:
        """ Set the qulaifier field from the given tokens.
        """
--- a/src/nominatim_api/search/icu_tokenizer.py
+++ b/src/nominatim_api/search/icu_tokenizer.py
@@ -59,16 +59,12 @@ class ICUToken(qmod.Token):
        assert self.info
        return self.info.get('class', ''), self.info.get('type', '')
-    def get_country(self) -> str:
+    def rematch(self, norm: str) -> None:
        assert self.info
        return cast(str, self.info.get('cc', ''))
    def match_penalty(self, norm: str) -> float:
        """ Check how well the token matches the given normalized string
            and add a penalty, if necessary.
        """
        if not self.lookup_word:
-            return 0.0
+            return
        seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
        distance = 0
@@ -79,7 +75,7 @@ class ICUToken(qmod.Token):
                distance += max((ato-afrom), (bto-bfrom))
            elif tag != 'equal':
                distance += abs((ato-afrom) - (bto-bfrom))
-        return (distance/len(self.lookup_word))
+        self.penalty += (distance/len(self.lookup_word))
    @staticmethod
    def from_db_row(row: SaRow) -> 'ICUToken':
@@ -334,10 +330,9 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
            norm = ''.join(f"{n.term_normalized}{'' if n.btype == qmod.BREAK_TOKEN else ' '}"
                           for n in query.nodes[start + 1:end + 1]).strip()
            for ttype, tokens in tlist.items():
-                for token in tokens:
+                if ttype != qmod.TOKEN_COUNTRY:
-                    itok = cast(ICUToken, token)
+                    for token in tokens:
-                    itok.penalty += itok.match_penalty(norm) * \
+                        cast(ICUToken, token).rematch(norm)
                        (1 if ttype in (qmod.TOKEN_WORD, qmod.TOKEN_PARTIAL) else 2)
    def compute_break_penalties(self, query: qmod.QueryStruct) -> None:
        """ Set the break penalties for the nodes in the query.
--- a/src/nominatim_api/search/query.py
+++ b/src/nominatim_api/search/query.py
@@ -127,12 +127,6 @@ class Token(ABC):
            category objects.
        """
    @abstractmethod
    def get_country(self) -> str:
        """ Return the country code this tojen is associated with
            (currently for country tokens only).
        """
@dataclasses.dataclass
 class TokenRange:
--- a/src/nominatim_api/server/falcon/server.py
+++ b/src/nominatim_api/server/falcon/server.py
@@ -7,8 +7,6 @@
 """
 Server implementation using the falcon webserver framework.
 """
 from __future__ import annotations
 from typing import Optional, Mapping, Any, List, cast
 from pathlib import Path
 import asyncio
@@ -163,7 +161,7 @@ class APIMiddleware:
    def __init__(self, project_dir: Path, environ: Optional[Mapping[str, str]]) -> None:
        self.api = NominatimAPIAsync(project_dir, environ)
-        self.app: Optional[App[Request, Response]] = None
+        self.app: Optional[App] = None
    @property
    def config(self) -> Configuration:
@@ -171,7 +169,7 @@ class APIMiddleware:
        """
        return self.api.config
-    def set_app(self, app: App[Request, Response]) -> None:
+    def set_app(self, app: App) -> None:
        """ Set the Falcon application this middleware is connected to.
        """
        self.app = app
@@ -195,7 +193,7 @@ class APIMiddleware:
 def get_application(project_dir: Path,
-                    environ: Optional[Mapping[str, str]] = None) -> App[Request, Response]:
+                    environ: Optional[Mapping[str, str]] = None) -> App:
    """ Create a Nominatim Falcon ASGI application.
    """
    apimw = APIMiddleware(project_dir, environ)
@@ -217,7 +215,7 @@ def get_application(project_dir: Path,
    return app
-def run_wsgi() -> App[Request, Response]:
+def run_wsgi() -> App:
    """ Entry point for uvicorn.
        Make sure uvicorn is run from the project directory.
--- a/src/nominatim_db/clicmd/setup.py
+++ b/src/nominatim_db/clicmd/setup.py
@@ -23,7 +23,6 @@ from ..tokenizer.base import AbstractTokenizer
 from ..version import NOMINATIM_VERSION
 from .args import NominatimArgs
 import time
 LOG = logging.getLogger()
@@ -87,8 +86,6 @@ class SetupAll:
        from ..tools import database_import, postcodes, freeze
        from ..indexer.indexer import Indexer
        start_time = time.time()
        num_threads = args.threads or psutil.cpu_count() or 1
        country_info.setup_country_config(args.config)
@@ -141,10 +138,6 @@ class SetupAll:
        LOG.warning('Recompute word counts')
        tokenizer.update_statistics(args.config, threads=num_threads)
        end_time = time.time()
        elapsed = end_time - start_time
        LOG.warning(f'Import completed successfully in {elapsed:.2f} seconds.')
        self._finalize_database(args.config.get_libpq_dsn(), args.offline)
        return 0
--- a/src/nominatim_db/config.py
+++ b/src/nominatim_db/config.py
@@ -197,7 +197,7 @@ class Configuration:
        if dsn.startswith('pgsql:'):
            return dict((p.split('=', 1) for p in dsn[6:].split(';')))
-        return conninfo_to_dict(dsn)  # type: ignore
+        return conninfo_to_dict(dsn)
    def get_import_style_file(self) -> Path:
        """ Return the import style file as a path object. Translates the
--- a/src/nominatim_db/data/postcode_format.py
+++ b/src/nominatim_db/data/postcode_format.py
@@ -29,9 +29,6 @@ class CountryPostcodeMatcher:
        self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
        self.pattern = re.compile(pc_pattern)
        # We want to exclude 0000, 00-000, 000 00 etc
        self.zero_pattern = re.compile(r'^[0\- ]+$')
        self.output = config.get('output', r'\g<0>')
    def match(self, postcode: str) -> Optional[Match[str]]:
@@ -43,10 +40,7 @@ class CountryPostcodeMatcher:
        normalized = self.norm_pattern.fullmatch(postcode.upper())
        if normalized:
-            match = self.pattern.fullmatch(normalized.group(1))
+            return self.pattern.fullmatch(normalized.group(1))
            if match and self.zero_pattern.match(match.string):
                return None
            return match
        return None
--- a/src/nominatim_db/tokenizer/icu_tokenizer.py
+++ b/src/nominatim_db/tokenizer/icu_tokenizer.py
@@ -475,23 +475,20 @@ class ICUNameAnalyzer(AbstractAnalyzer):
        assert self.conn is not None
        word_tokens = set()
        for name in names:
-            norm_name = self._normalized(name.name)
+            norm_name = self._search_normalized(name.name)
-            token_name = self._search_normalized(name.name)
+            if norm_name:
-            if norm_name and token_name:
+                word_tokens.add(norm_name)
                word_tokens.add((token_name, norm_name))
        with self.conn.cursor() as cur:
            # Get existing names
-            cur.execute("""SELECT word_token,
+            cur.execute("""SELECT word_token, coalesce(info ? 'internal', false) as is_internal
                                  word as lookup,
                                  coalesce(info ? 'internal', false) as is_internal
                             FROM word
-                             WHERE type = 'C' and info->>'cc' = %s""",
+                             WHERE type = 'C' and word = %s""",
                        (country_code, ))
            # internal/external names
-            existing_tokens: Dict[bool, Set[Tuple[str, str]]] = {True: set(), False: set()}
+            existing_tokens: Dict[bool, Set[str]] = {True: set(), False: set()}
            for word in cur:
-                existing_tokens[word[2]].add((word[0], word[1]))
+                existing_tokens[word[1]].add(word[0])
            # Delete names that no longer exist.
            gone_tokens = existing_tokens[internal] - word_tokens
@@ -499,10 +496,10 @@ class ICUNameAnalyzer(AbstractAnalyzer):
                gone_tokens.update(existing_tokens[False] & word_tokens)
            if gone_tokens:
                cur.execute("""DELETE FROM word
-                               USING jsonb_array_elements(%s) as data
+                               USING unnest(%s::text[]) as token
-                               WHERE type = 'C' and info->>'cc' = %s
+                               WHERE type = 'C' and word = %s
-                                     and word_token = data->>0 and word = data->>1""",
+                                     and word_token = token""",
-                            (Jsonb(list(gone_tokens)), country_code))
+                            (list(gone_tokens), country_code))
            # Only add those names that are not yet in the list.
            new_tokens = word_tokens - existing_tokens[True]
@@ -511,17 +508,15 @@ class ICUNameAnalyzer(AbstractAnalyzer):
            if new_tokens:
                if internal:
                    sql = """INSERT INTO word (word_token, type, word, info)
-                               (SELECT data->>0, 'C', data->>1,
+                               (SELECT token, 'C', %s, '{"internal": "yes"}'
-                                       jsonb_build_object('internal', 'yes', 'cc', %s::text)
+                                  FROM unnest(%s::text[]) as token)
                                  FROM jsonb_array_elements(%s) as data)
                           """
                else:
-                    sql = """INSERT INTO word (word_token, type, word, info)
+                    sql = """INSERT INTO word (word_token, type, word)
-                                   (SELECT data->>0, 'C', data->>1,
+                                   (SELECT token, 'C', %s
-                                           jsonb_build_object('cc', %s::text)
+                                    FROM unnest(%s::text[]) as token)
                                    FROM  jsonb_array_elements(%s) as data)
                          """
-                cur.execute(sql, (country_code, Jsonb(list(new_tokens))))
+                cur.execute(sql, (country_code, list(new_tokens)))
    def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
        """ Determine tokenizer information about the given place.
--- a/src/nominatim_db/tools/migration.py
+++ b/src/nominatim_db/tools/migration.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2025 by the Nominatim developer community.
+# Copyright (C) 2024 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Functions for database migration to newer software versions.
@@ -18,7 +18,6 @@ from ..db.connection import connect, Connection, \
 from ..db.sql_preprocessor import SQLPreprocessor
 from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
 from ..tokenizer import factory as tokenizer_factory
 from ..data.country_info import create_country_names, setup_country_config
 from . import refresh
 LOG = logging.getLogger()
@@ -157,25 +156,3 @@ def create_place_entrance_table(conn: Connection, config: Configuration, **_: An
            CREATE UNIQUE INDEX place_entrance_osm_id_idx ON place_entrance
              USING BTREE (osm_id);
              """)
@_migration(5, 2, 99, 1)
 def convert_country_tokens(conn: Connection, config: Configuration, **_: Any) -> None:
    """ Convert country word tokens
        Country tokens now save the country in the info field instead of the
        word. This migration removes all country tokens from the word table
        and reimports the default country name. This means that custom names
        are lost. If you need them back, invalidate the OSM objects containing
        the names by setting indexed_status to 2 and then reindex the database.
    """
    tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
    # There is only one tokenizer at the time of migration, so we make
    # some assumptions here about the structure of the database. This will
    # fail if somebody has written a custom tokenizer.
    with conn.cursor() as cur:
        cur.execute("DELETE FROM word WHERE type = 'C'")
    conn.commit()
    setup_country_config(config)
    create_country_names(conn, tokenizer, config.get_str_list('LANGUAGES'))
--- a/src/nominatim_db/tools/special_phrases/sp_wiki_loader.py
+++ b/src/nominatim_db/tools/special_phrases/sp_wiki_loader.py
@@ -11,8 +11,6 @@ from typing import Iterable
 import re
 import logging
 import mwparserfromhell
 from ...config import Configuration
 from ...utils.url_utils import get_url
 from .special_phrase import SpecialPhrase
@@ -38,6 +36,10 @@ class SPWikiLoader:
    """
    def __init__(self, config: Configuration) -> None:
        self.config = config
        # Compile the regex here to increase performances.
        self.occurence_pattern = re.compile(
            r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
        )
        # Hack around a bug where building=yes was imported with quotes into the wiki
        self.type_fix_pattern = re.compile(r'\"|&quot;')
@@ -56,21 +58,11 @@ class SPWikiLoader:
            LOG.warning('Importing phrases for lang: %s...', lang)
            loaded_xml = _get_wiki_content(lang)
-            wikicode = mwparserfromhell.parse(loaded_xml)
+            # One match will be of format [label, class, type, operator, plural]
            matches = self.occurence_pattern.findall(loaded_xml)
-            for table in wikicode.filter_tags(matches=lambda t: t.tag == 'table'):
+            for match in matches:
-                for row in table.contents.filter_tags(matches=lambda t: t.tag == 'tr'):
+                yield SpecialPhrase(match[0],
-                    cells = list(row.contents.filter_tags(matches=lambda t: t.tag == 'td'))
+                                    match[1],
-
+                                    self.type_fix_pattern.sub('', match[2]),
-                    if len(cells) < 5:
+                                    match[3])
                        continue
                    label = cells[0].contents.strip_code().strip()
                    cls = cells[1].contents.strip_code().strip()
                    typ = cells[2].contents.strip_code().strip()
                    operator = cells[3].contents.strip_code().strip()
                    yield SpecialPhrase(label,
                                        cls,
                                        self.type_fix_pattern.sub('', typ),
                                        operator)
--- a/src/nominatim_db/version.py
+++ b/src/nominatim_db/version.py
@@ -55,7 +55,7 @@ def parse_version(version: str) -> NominatimVersion:
    return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])
-NOMINATIM_VERSION = parse_version('5.2.99-0')
+NOMINATIM_VERSION = parse_version('5.2.0-0')
 POSTGRESQL_REQUIRED_VERSION = (12, 0)
 POSTGIS_REQUIRED_VERSION = (3, 0)
--- a/test/bdd/conftest.py
+++ b/test/bdd/conftest.py
@@ -9,7 +9,6 @@ Fixtures for BDD test steps
 """
 import sys
 import json
 import re
 from pathlib import Path
 import psycopg
@@ -21,8 +20,7 @@ sys.path.insert(0, str(SRC_DIR / 'src'))
 import pytest
 from pytest_bdd.parsers import re as step_parse
-from pytest_bdd import given, when, then, scenario
+from pytest_bdd import given, when, then
 from pytest_bdd.feature import get_features
 pytest.register_assert_rewrite('utils')
@@ -375,57 +373,3 @@ def check_place_missing_lines(db_conn, table, osm_type, osm_id, osm_class):
    with db_conn.cursor() as cur:
        assert cur.execute(sql, params).fetchone()[0] == 0
 if pytest.version_tuple >= (8, 0, 0):
    def pytest_pycollect_makemodule(module_path, parent):
        return BddTestCollector.from_parent(parent, path=module_path)
 class BddTestCollector(pytest.Module):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
    def collect(self):
        for item in super().collect():
            yield item
        if hasattr(self.obj, 'PYTEST_BDD_SCENARIOS'):
            for path in self.obj.PYTEST_BDD_SCENARIOS:
                for feature in get_features([str(Path(self.path.parent, path).resolve())]):
                    yield FeatureFile.from_parent(self,
                                                  name=str(Path(path, feature.rel_filename)),
                                                  path=Path(feature.filename),
                                                  feature=feature)
 # borrowed from pytest-bdd: src/pytest_bdd/scenario.py
 def make_python_name(string: str) -> str:
    """Make python attribute name out of a given string."""
    string = re.sub(r"\W", "", string.replace(" ", "_"))
    return re.sub(r"^\d+_*", "", string).lower()
 class FeatureFile(pytest.File):
    class obj:
        pass
    def __init__(self, feature, **kwargs):
        self.feature = feature
        super().__init__(**kwargs)
    def collect(self):
        for sname, sobject in self.feature.scenarios.items():
            class_name = f"L{sobject.line_number}"
            test_name = "test_" + make_python_name(sname)
            @scenario(self.feature.filename, sname)
            def _test():
                pass
            tclass = type(class_name, (),
                          {test_name: staticmethod(_test)})
            setattr(self.obj, class_name, tclass)
            yield pytest.Class.from_parent(self, name=class_name, obj=tclass)
--- a/test/bdd/features/db/query/reverse.feature
+++ b/test/bdd/features/db/query/reverse.feature
@@ -9,32 +9,13 @@ Feature: Reverse searches
        And the places
          | osm | class   | type       | geometry    |
          | W1  | aeroway | terminal   | (1,2,3,4,1) |
-          | N9  | amenity | restaurant | 9           |
+          | N1  | amenity | restaurant | 9           |
        When importing
        And reverse geocoding 1.0001,1.0001
        Then the result contains
         | object |
-         | N9  |
+         | N1  |
        When reverse geocoding 1.0003,1.0001
        Then the result contains
         | object |
         | W1  |
    Scenario: Find closest housenumber for street matches
        Given the 0.0001 grid with origin 1,1
          |    | 1 |   |    |
          |    |   | 2 |    |
          | 10 |   |   | 11 |
        And the places
          | osm | class   | type     | name        | geometry |
          | W1  | highway | service  | Goose Drive | 10,11    |
          | N2  | tourism | art_work | Beauty      | 2        |
        And the places
          | osm | class | type  | housenr | geometry |
          | N1  | place | house | 23      | 1        |
        When importing
        When reverse geocoding 1.0002,1.0002
        Then the result contains
          | object |
          | N1 |
--- a/test/bdd/test_api.py
+++ b/test/bdd/test_api.py
@@ -15,7 +15,7 @@ import xml.etree.ElementTree as ET
 import pytest
 from pytest_bdd.parsers import re as step_parse
-from pytest_bdd import when, given, then
+from pytest_bdd import scenarios, when, given, then
 from nominatim_db import cli
 from nominatim_db.config import Configuration
@@ -150,8 +150,4 @@ def parse_api_json_response(api_response, fmt, num):
    return result
-if pytest.version_tuple >= (8, 0, 0):
+scenarios('features/api')
    PYTEST_BDD_SCENARIOS = ['features/api']
 else:
    from pytest_bdd import scenarios
    scenarios('features/api')
--- a/test/bdd/test_db.py
+++ b/test/bdd/test_db.py
@@ -15,7 +15,7 @@ import re
 import psycopg
 import pytest
-from pytest_bdd import when, then, given
+from pytest_bdd import scenarios, when, then, given
 from pytest_bdd.parsers import re as step_parse
 from utils.place_inserter import PlaceColumn
@@ -276,8 +276,4 @@ def then_check_interpolation_table_negative(db_conn, oid):
        assert cur.fetchone()[0] == 0
-if pytest.version_tuple >= (8, 0, 0):
+scenarios('features/db')
    PYTEST_BDD_SCENARIOS = ['features/db']
 else:
    from pytest_bdd import scenarios
    scenarios('features/db')
--- a/test/bdd/test_osm2pgsql.py
+++ b/test/bdd/test_osm2pgsql.py
@@ -11,7 +11,7 @@ import asyncio
 import random
 import pytest
-from pytest_bdd import when, then, given
+from pytest_bdd import scenarios, when, then, given
 from pytest_bdd.parsers import re as step_parse
 from nominatim_db import cli
@@ -106,8 +106,4 @@ def check_place_content(db_conn, datatable, node_grid, table, exact):
    check_table_content(db_conn, table, datatable, grid=node_grid, exact=bool(exact))
-if pytest.version_tuple >= (8, 0, 0):
+scenarios('features/osm2pgsql')
    PYTEST_BDD_SCENARIOS = ['features/osm2pgsql']
 else:
    from pytest_bdd import scenarios
    scenarios('features/osm2pgsql')
--- a/test/python/api/search/test_api_search_query.py
+++ b/test/python/api/search/test_api_search_query.py
@@ -17,9 +17,6 @@ class MyToken(query.Token):
    def get_category(self):
        return 'this', 'that'
    def get_country(self):
        return 'cc'
 def mktoken(tid: int):
    return MyToken(penalty=3.0, token=tid, count=1, addr_count=1,
--- a/test/python/api/search/test_db_search_builder.py
+++ b/test/python/api/search/test_db_search_builder.py
@@ -2,14 +2,12 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2025 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Tests for creating abstract searches from token assignments.
 """
 from typing import Optional
 import pytest
 import dataclasses
 from nominatim_api.search.query import Token, TokenRange, QueryStruct, Phrase
 import nominatim_api.search.query as qmod
@@ -19,16 +17,10 @@ from nominatim_api.types import SearchDetails
 import nominatim_api.search.db_searches as dbs
@dataclasses.dataclass
 class MyToken(Token):
    cc: Optional[str] = None
    def get_category(self):
        return 'this', 'that'
    def get_country(self):
        return self.cc
 def make_query(*args):
    q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')])
@@ -38,24 +30,18 @@ def make_query(*args):
    q.add_node(qmod.BREAK_END, qmod.PHRASE_ANY)
    for start, tlist in enumerate(args):
-        for end, ttype, tinfos in tlist:
+        for end, ttype, tinfo in tlist:
-            for tinfo in tinfos:
+            for tid, word in tinfo:
-                if isinstance(tinfo, tuple):
+                q.add_token(TokenRange(start, end), ttype,
-                    q.add_token(TokenRange(start, end), ttype,
+                            MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0,
-                                MyToken(penalty=0.5 if ttype == qmod.TOKEN_PARTIAL else 0.0,
+                                    token=tid, count=1, addr_count=1,
-                                        token=tinfo[0], count=1, addr_count=1,
+                                    lookup_word=word))
                                        lookup_word=tinfo[1]))
                else:
                    q.add_token(TokenRange(start, end), ttype, tinfo)
    return q
 def test_country_search():
-    q = make_query([(1, qmod.TOKEN_COUNTRY, [
+    q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
        MyToken(penalty=0.0, token=2, count=1, addr_count=1, lookup_word='Germany', cc='de'),
        MyToken(penalty=0.0, token=3, count=1, addr_count=1, lookup_word='UK', cc='en'),
        ])])
    builder = SearchBuilder(q, SearchDetails())
    searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
@@ -69,10 +55,7 @@ def test_country_search():
 def test_country_search_with_country_restriction():
-    q = make_query([(1, qmod.TOKEN_COUNTRY, [
+    q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
        MyToken(penalty=0.0, token=2, count=1, addr_count=1, lookup_word='Germany', cc='de'),
        MyToken(penalty=0.0, token=3, count=1, addr_count=1, lookup_word='UK', cc='en'),
    ])])
    builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'en,fr'}))
    searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
@@ -86,10 +69,7 @@ def test_country_search_with_country_restriction():
 def test_country_search_with_conflicting_country_restriction():
-    q = make_query([(1, qmod.TOKEN_COUNTRY, [
+    q = make_query([(1, qmod.TOKEN_COUNTRY, [(2, 'de'), (3, 'en')])])
        MyToken(penalty=0.0, token=2, count=1, addr_count=1, lookup_word='Germany', cc='de'),
        MyToken(penalty=0.0, token=3, count=1, addr_count=1, lookup_word='UK', cc='en'),
    ])])
    builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
    searches = list(builder.build(TokenAssignment(country=TokenRange(0, 1))))
@@ -114,11 +94,8 @@ def test_postcode_search_simple():
 def test_postcode_with_country():
-    q = make_query(
+    q = make_query([(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
-        [(1, qmod.TOKEN_POSTCODE, [(34, '2367')])],
+                   [(2, qmod.TOKEN_COUNTRY, [(1, 'xx')])])
        [(2, qmod.TOKEN_COUNTRY, [
            MyToken(penalty=0.0, token=1, count=1, addr_count=1, lookup_word='none', cc='xx'),
            ])])
    builder = SearchBuilder(q, SearchDetails())
    searches = list(builder.build(TokenAssignment(postcode=TokenRange(0, 1),
--- a/test/python/api/search/test_icu_query_analyzer.py
+++ b/test/python/api/search/test_icu_query_analyzer.py
@@ -116,11 +116,9 @@ async def test_penalty_postcodes_and_housenumbers(conn, term, order):
    assert query.num_token_slots() == 1
-    torder = [(min(t.penalty for t in tl.tokens), tl.ttype) for tl in query.nodes[0].starting]
+    torder = [(tl.tokens[0].penalty, tl.ttype) for tl in query.nodes[0].starting]
    torder.sort()
    print(torder)
    assert [t[1] for t in torder] == order
--- a/test/python/api/search/test_token_assignment.py
+++ b/test/python/api/search/test_token_assignment.py
@@ -20,9 +20,6 @@ class MyToken(Token):
    def get_category(self):
        return 'this', 'that'
    def get_country(self):
        return 'cc'
 def make_query(*args):
    q = QueryStruct([Phrase(args[0][1], '')])
--- a/test/python/api/test_api_reverse.py
+++ b/test/python/api/test_api_reverse.py
@@ -163,12 +163,12 @@ def test_reverse_housenumber_interpolation(apiobj, frontend, with_geom):
                      parent_place_id=990,
                      rank_search=30, rank_address=30,
                      housenumber='23',
-                      centroid=(10.0, 10.0002))
+                      centroid=(10.0, 10.00002))
    apiobj.add_osmline(place_id=992,
                       parent_place_id=990,
                       startnumber=1, endnumber=3, step=1,
-                       centroid=(10.0, 10.0001),
+                       centroid=(10.0, 10.00001),
-                       geometry='LINESTRING(9.995 10.0001, 10.005 10.0001)')
+                       geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
    apiobj.add_placex(place_id=1990, class_='highway', type='service',
                      rank_search=27, rank_address=27,
                      name={'name': 'Other Street'},
@@ -177,8 +177,8 @@ def test_reverse_housenumber_interpolation(apiobj, frontend, with_geom):
    apiobj.add_osmline(place_id=1992,
                       parent_place_id=1990,
                       startnumber=1, endnumber=3, step=1,
-                       centroid=(10.0, 20.0001),
+                       centroid=(10.0, 20.00001),
-                       geometry='LINESTRING(9.995 20.0001, 10.005 20.0001)')
+                       geometry='LINESTRING(9.995 20.00001, 10.005 20.00001)')
    params = {'geometry_output': napi.GeometryFormat.TEXT} if with_geom else {}
--- a/test/python/api/test_api_search.py
+++ b/test/python/api/test_api_search.py
@@ -99,7 +99,7 @@ def test_address_simple_places(apiobj, frontend, atype, address, search):
 def test_address_country(apiobj, frontend):
-    apiobj.add_word_table([(None, 'ro', 'C', 'ro', {'cc': 'ro'})])
+    apiobj.add_word_table([(None, 'ro', 'C', 'ro', None)])
    apiobj.add_country('ro', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
    apiobj.add_country_name('ro', {'name': 'România'})
--- a/test/python/mock_icu_word_table.py
+++ b/test/python/mock_icu_word_table.py
@@ -10,8 +10,6 @@ of the table.
 """
 from nominatim_db.db.connection import execute_scalar
 from psycopg.types.json import Jsonb
 class MockIcuWordTable:
    """ A word table for testing using legacy word table structure.
@@ -44,11 +42,11 @@ class MockIcuWordTable:
                        """, (word_token, word, cls, typ, oper))
        self.conn.commit()
-    def add_country(self, country_code, word_token, lookup):
+    def add_country(self, country_code, word_token):
        with self.conn.cursor() as cur:
-            cur.execute("""INSERT INTO word (word_token, type, word, info)
+            cur.execute("""INSERT INTO word (word_token, type, word)
-                           VALUES(%s, 'C', %s, %s)""",
+                           VALUES(%s, 'C', %s)""",
-                        (word_token, lookup, Jsonb({'cc': country_code})))
+                        (word_token, country_code))
        self.conn.commit()
    def add_postcode(self, word_token, postcode):
@@ -95,7 +93,7 @@ class MockIcuWordTable:
    def get_country(self):
        with self.conn.cursor() as cur:
-            cur.execute("SELECT info->>'cc', word_token, word FROM word WHERE type = 'C'")
+            cur.execute("SELECT word, word_token FROM word WHERE type = 'C'")
            result = set((tuple(row) for row in cur))
            assert len(result) == cur.rowcount, "Word table has duplicates."
            return result
--- a/test/python/tokenizer/sanitizers/test_clean_postcodes.py
+++ b/test/python/tokenizer/sanitizers/test_clean_postcodes.py
@@ -237,9 +237,3 @@ def test_postcode_default_pattern_pass(sanitize, postcode):
@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
 def test_postcode_default_pattern_fail(sanitize, postcode):
    assert sanitize(country='an', postcode=postcode) == []
@pytest.mark.parametrize("postcode", ('00000', '00-000', 'PL-00000', 'PL 00-000'))
@pytest.mark.sanitizer_params(convert_to_address=False)
 def test_postcode_zeros(sanitize, postcode):
    assert sanitize(country='pl', postcode=postcode) == []
--- a/test/python/tokenizer/test_icu.py
+++ b/test/python/tokenizer/test_icu.py
@@ -343,18 +343,16 @@ def test_add_country_names_new(analyzer, word_table):
    with analyzer() as anl:
        anl.add_country_names('es', {'name': 'Espagña', 'name:en': 'Spain'})
-    assert word_table.get_country() == {('es', 'ESPAGÑA', 'Espagña'),
+    assert word_table.get_country() == {('es', 'ESPAGÑA'), ('es', 'SPAIN')}
                                        ('es', 'SPAIN', 'Spain')}
 def test_add_country_names_extend(analyzer, word_table):
-    word_table.add_country('ch', 'SCHWEIZ', 'Schweiz')
+    word_table.add_country('ch', 'SCHWEIZ')
    with analyzer() as anl:
        anl.add_country_names('ch', {'name': 'Schweiz', 'name:fr': 'Suisse'})
-    assert word_table.get_country() == {('ch', 'SCHWEIZ', 'Schweiz'),
+    assert word_table.get_country() == {('ch', 'SCHWEIZ'), ('ch', 'SUISSE')}
                                        ('ch', 'SUISSE', 'Suisse')}
 class TestPlaceNames:
@@ -405,7 +403,7 @@ class TestPlaceNames:
        info = self.analyzer.process_place(place)
        self.expect_name_terms(info, '#norge', 'norge')
-        assert word_table.get_country() == {('no', 'NORGE', 'Norge')}
+        assert word_table.get_country() == {('no', 'NORGE')}
 class TestPlaceAddress:
--- a/test/python/tools/test_import_special_phrases.py
+++ b/test/python/tools/test_import_special_phrases.py
@@ -203,7 +203,7 @@ def test_import_phrases(monkeypatch, temp_db_cursor, def_config, sp_importer,
    placex_table.add(cls='amenity', typ='animal_shelter')  # in db for special phrase filtering
    sp_importer.import_phrases(tokenizer, should_replace)
-    assert len(tokenizer.analyser_cache['special_phrases']) == 19
+    assert len(tokenizer.analyser_cache['special_phrases']) == 18
    assert check_table_exist(temp_db_cursor, class_test, type_test)
    assert check_placeid_and_centroid_indexes(temp_db_cursor, class_test, type_test)
--- a/test/python/tools/test_sp_wiki_loader.py
+++ b/test/python/tools/test_sp_wiki_loader.py
@@ -54,6 +54,4 @@ def test_generate_phrases(sp_wiki_loader):
         ('Water near', 'amenity', 'drinking_water', 'near'),
         ('Embassy', 'amenity', 'embassy', '-'),
         ('Embassys', 'amenity', 'embassy', '-'),
-         ('Embassies', 'amenity', 'embassy', '-'),
+         ('Embassies', 'amenity', 'embassy', '-')}
         # test for one-cell-per-line format
         ('Coworkings near', 'amenity', 'coworking_space', 'near')}
--- a/test/testdata/special_phrases_test_content.txt
+++ b/test/testdata/special_phrases_test_content.txt
@@ -1,120 +1,78 @@
-<mediawiki xmlns="http://www.mediawiki.org/xml/export-0.11/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.11/ http://www.mediawiki.org/xml/export-0.11.xsd" version="0.11" xml:lang="en">
+<mediawiki xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.11/ http://www.mediawiki.org/xml/export-0.11.xsd" version="0.11" xml:lang="en">
-  <siteinfo>
+<siteinfo>
-    <sitename>OpenStreetMap Wiki</sitename>
+<sitename>OpenStreetMap Wiki</sitename>
-    <dbname>wiki</dbname>
+<dbname>wiki</dbname>
-    <base>https://wiki.openstreetmap.org/wiki/Main_Page</base>
+<base>https://wiki.openstreetmap.org/wiki/Main_Page</base>
-    <generator>MediaWiki 1.43.5</generator>
+<generator>MediaWiki 1.35.2</generator>
-    <case>first-letter</case>
+<case>first-letter</case>
-    <namespaces>
+<namespaces>
-      <namespace key="-2" case="first-letter">Media</namespace>
+<namespace key="-2" case="first-letter">Media</namespace>
-      <namespace key="-1" case="first-letter">Special</namespace>
+<namespace key="-1" case="first-letter">Special</namespace>
-      <namespace key="0" case="first-letter"/>
+<namespace key="0" case="first-letter"/>
-      <namespace key="1" case="first-letter">Talk</namespace>
+<namespace key="1" case="first-letter">Talk</namespace>
-      <namespace key="2" case="first-letter">User</namespace>
+<namespace key="2" case="first-letter">User</namespace>
-      <namespace key="3" case="first-letter">User talk</namespace>
+<namespace key="3" case="first-letter">User talk</namespace>
-      <namespace key="4" case="first-letter">Wiki</namespace>
+<namespace key="4" case="first-letter">Wiki</namespace>
-      <namespace key="5" case="first-letter">Wiki talk</namespace>
+<namespace key="5" case="first-letter">Wiki talk</namespace>
-      <namespace key="6" case="first-letter">File</namespace>
+<namespace key="6" case="first-letter">File</namespace>
-      <namespace key="7" case="first-letter">File talk</namespace>
+<namespace key="7" case="first-letter">File talk</namespace>
-      <namespace key="8" case="first-letter">MediaWiki</namespace>
+<namespace key="8" case="first-letter">MediaWiki</namespace>
-      <namespace key="9" case="first-letter">MediaWiki talk</namespace>
+<namespace key="9" case="first-letter">MediaWiki talk</namespace>
-      <namespace key="10" case="first-letter">Template</namespace>
+<namespace key="10" case="first-letter">Template</namespace>
-      <namespace key="11" case="first-letter">Template talk</namespace>
+<namespace key="11" case="first-letter">Template talk</namespace>
-      <namespace key="12" case="first-letter">Help</namespace>
+<namespace key="12" case="first-letter">Help</namespace>
-      <namespace key="13" case="first-letter">Help talk</namespace>
+<namespace key="13" case="first-letter">Help talk</namespace>
-      <namespace key="14" case="first-letter">Category</namespace>
+<namespace key="14" case="first-letter">Category</namespace>
-      <namespace key="15" case="first-letter">Category talk</namespace>
+<namespace key="15" case="first-letter">Category talk</namespace>
-      <namespace key="120" case="first-letter">Item</namespace>
+<namespace key="120" case="first-letter">Item</namespace>
-      <namespace key="121" case="first-letter">Item talk</namespace>
+<namespace key="121" case="first-letter">Item talk</namespace>
-      <namespace key="122" case="first-letter">Property</namespace>
+<namespace key="122" case="first-letter">Property</namespace>
-      <namespace key="123" case="first-letter">Property talk</namespace>
+<namespace key="123" case="first-letter">Property talk</namespace>
-      <namespace key="200" case="first-letter">DE</namespace>
+<namespace key="200" case="first-letter">DE</namespace>
-      <namespace key="201" case="first-letter">DE talk</namespace>
+<namespace key="201" case="first-letter">DE talk</namespace>
-      <namespace key="202" case="first-letter">FR</namespace>
+<namespace key="202" case="first-letter">FR</namespace>
-      <namespace key="203" case="first-letter">FR talk</namespace>
+<namespace key="203" case="first-letter">FR talk</namespace>
-      <namespace key="204" case="first-letter">ES</namespace>
+<namespace key="204" case="first-letter">ES</namespace>
-      <namespace key="205" case="first-letter">ES talk</namespace>
+<namespace key="205" case="first-letter">ES talk</namespace>
-      <namespace key="206" case="first-letter">IT</namespace>
+<namespace key="206" case="first-letter">IT</namespace>
-      <namespace key="207" case="first-letter">IT talk</namespace>
+<namespace key="207" case="first-letter">IT talk</namespace>
-      <namespace key="208" case="first-letter">NL</namespace>
+<namespace key="208" case="first-letter">NL</namespace>
-      <namespace key="209" case="first-letter">NL talk</namespace>
+<namespace key="209" case="first-letter">NL talk</namespace>
-      <namespace key="210" case="first-letter">RU</namespace>
+<namespace key="210" case="first-letter">RU</namespace>
-      <namespace key="211" case="first-letter">RU talk</namespace>
+<namespace key="211" case="first-letter">RU talk</namespace>
-      <namespace key="212" case="first-letter">JA</namespace>
+<namespace key="212" case="first-letter">JA</namespace>
-      <namespace key="213" case="first-letter">JA talk</namespace>
+<namespace key="213" case="first-letter">JA talk</namespace>
-      <namespace key="710" case="first-letter">TimedText</namespace>
+<namespace key="710" case="first-letter">TimedText</namespace>
-      <namespace key="711" case="first-letter">TimedText talk</namespace>
+<namespace key="711" case="first-letter">TimedText talk</namespace>
-      <namespace key="828" case="first-letter">Module</namespace>
+<namespace key="828" case="first-letter">Module</namespace>
-      <namespace key="829" case="first-letter">Module talk</namespace>
+<namespace key="829" case="first-letter">Module talk</namespace>
-      <namespace key="3000" case="first-letter">Proposal</namespace>
+<namespace key="2300" case="first-letter">Gadget</namespace>
-      <namespace key="3001" case="first-letter">Proposal talk</namespace>
+<namespace key="2301" case="first-letter">Gadget talk</namespace>
-    </namespaces>
+<namespace key="2302" case="case-sensitive">Gadget definition</namespace>
-  </siteinfo>
+<namespace key="2303" case="case-sensitive">Gadget definition talk</namespace>
-  <page>
+</namespaces>
-    <title>Nominatim/Special Phrases/EN</title>
+</siteinfo>
-    <ns>0</ns>
+<page>
-    <id>67365</id>
+<title>Nominatim/Special Phrases/EN</title>
-    <revision>
+<ns>0</ns>
-      <id>2861977</id>
+<id>67365</id>
-      <parentid>2634159</parentid>
+<revision>
-      <timestamp>2025-06-02T14:00:52Z</timestamp>
+<id>2100424</id>
-      <contributor>
+<parentid>2100422</parentid>
-        <username>Lonvia</username>
+<timestamp>2021-01-27T20:29:53Z</timestamp>
-        <id>17191</id>
+<contributor>
-      </contributor>
+<username>Violaine Do</username>
-      <comment>overgeneralized entry removed, phrases need to chosen so that all results with the given tag can be described with that phrase</comment>
+<id>88152</id>
-      <origin>2861977</origin>
+</contributor>
-      <model>wikitext</model>
+<minor/>
-      <format>text/x-wiki</format>
+<comment>/* en */ add coworking amenity</comment>
-      <text bytes="160765" sha1="0zlpuvnjs4io9e006rntbxm5b84kgst" xml:space="preserve">== en ==
+<origin>2100424</origin>
-{| class="wikitable sortable"
+<model>wikitext</model>
-|-
+<format>text/x-wiki</format>
-! Word / Phrase !! Key !! Value !! Operator !! Plural
+<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
-|-
+== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || NEAR|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || In || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || "embassy" || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
-| Zip Line || aerialway || zip_line || - || N
+</text>
-|-
+<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
-| Zip Lines || aerialway || zip_line || - || Y
+</revision>
-|-
+</page>
 | Zip Line in || aerialway || zip_line || in || N
 |-
 | Zip Lines in || aerialway || zip_line || in || Y
 |-
 | Zip Line near || aerialway || zip_line || near || N
 |-
 | Animal shelter || amenity || animal_shelter || - || N
 |-
 | Animal shelters || amenity || animal_shelter || - || Y
 |-
 | Animal shelter in || amenity || animal_shelter || in || N
 |-
 | Animal shelters in || amenity || animal_shelter || in || Y
 |-
 | Animal shelter near || amenity || animal_shelter || near|| N
 |-
 | Animal shelters near || amenity || animal_shelter || NEAR|| Y
 |-
 | Drinking Water near || amenity || drinking_water || near || N
 |-
 | Water || amenity || drinking_water || - || N
 |-
 | Water in || amenity || drinking_water || In || N
 |-
 | Water near || amenity || drinking_water || near || N
 |-
 | Embassy || amenity || embassy || - || N
 |-
 | Embassys || amenity || "embassy" || - || Y
 |-
 | Embassies || amenity || embassy || - || Y
 |-
 | Coworkings near
 | amenity
 | coworking_space
 | near
 | Y
 |}
 [[Category:Word list]]</text>
      <sha1>0zlpuvnjs4io9e006rntbxm5b84kgst</sha1>
    </revision>
  </page>
 </mediawiki>
--- a/vagrant/Install-on-Ubuntu-22.sh
+++ b/vagrant/Install-on-Ubuntu-22.sh
@@ -25,7 +25,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
                        libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \
                        nlohmann-json3-dev postgresql-14-postgis-3 \
                        postgresql-contrib-14 postgresql-14-postgis-3-scripts \
-                        libicu-dev virtualenv git
+                        libicu-dev virtualenv
 #
 # System Configuration
@@ -97,23 +97,6 @@ fi                                    #DOCS:
 # Building and Configuration
 # --------------------------
 #
 # Get the source code from Github and change into the source directory
 #
 if [ "x$1" == "xyes" ]; then  #DOCS:    :::sh
    cd $USERHOME
    git clone https://github.com/osm-search/Nominatim.git
    cd Nominatim
 else                               #DOCS:
    cd $USERHOME/Nominatim         #DOCS:
 fi                                 #DOCS:
 # When installing the latest source from github, you also need to
 # download the country grid:
 if [ ! -f data/country_osm_grid.sql.gz ]; then       #DOCS:    :::sh
    wget -O data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
 fi                                 #DOCS:
 # Nominatim needs osm2pgsql >= 1.8. The version that comes with Ubuntu is
 # too old. Download and compile your own:
@@ -124,7 +107,6 @@ fi                                 #DOCS:
    cmake ../osm2pgsql
    make
    sudo make install
    cd $USERHOME/Nominatim
 # Nominatim should be installed in a separate Python virtual environment.
 # Create the virtual environment:
@@ -137,8 +119,7 @@ fi                                 #DOCS:
 # Now install Nominatim using pip:
-    cd $USERHOME/Nominatim
+    $USERHOME/nominatim-venv/bin/pip install nominatim-db
    $USERHOME/nominatim-venv/bin/pip install packaging/nominatim-db
 # Nominatim is now ready to use. You can continue with
 # [importing a database from OSM data](../admin/Import.md). If you want to set up
@@ -154,9 +135,7 @@ fi                                 #DOCS:
 # To install all packages, run:
 #DOCS:```sh
-$USERHOME/nominatim-venv/bin/pip install falcon uvicorn gunicorn
+$USERHOME/nominatim-venv/bin/pip install falcon uvicorn gunicorn nominatim-api
 cd $USERHOME/Nominatim
 $USERHOME/nominatim-venv/bin/pip install packaging/nominatim-api
 #DOCS:```
--- a/vagrant/Install-on-Ubuntu-24.sh
+++ b/vagrant/Install-on-Ubuntu-24.sh
@@ -21,7 +21,7 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
 # Now you can install all packages needed for Nominatim:
    sudo apt-get install -y osm2pgsql postgresql-postgis postgresql-postgis-scripts \
-                            pkg-config libicu-dev virtualenv git
+                            pkg-config libicu-dev virtualenv
 #
@@ -94,23 +94,6 @@ fi                                    #DOCS:
 # Building and Configuration
 # --------------------------
 #
 # Get the source code from Github and change into the source directory
 #
 if [ "x$1" == "xyes" ]; then  #DOCS:    :::sh
    cd $USERHOME
    git clone https://github.com/osm-search/Nominatim.git
    cd Nominatim
 else                               #DOCS:
    cd $USERHOME/Nominatim         #DOCS:
 fi                                 #DOCS:
 # When installing the latest source from github, you also need to
 # download the country grid:
 if [ ! -f data/country_osm_grid.sql.gz ]; then       #DOCS:    :::sh
    wget -O data/country_osm_grid.sql.gz https://nominatim.org/data/country_grid.sql.gz
 fi                                 #DOCS:
 # Nominatim should be installed in a separate Python virtual environment.
 # Create the virtual environment:
@@ -122,8 +105,7 @@ fi                                 #DOCS:
 # Now install Nominatim using pip:
-    cd $USERHOME/Nominatim
+    $USERHOME/nominatim-venv/bin/pip install nominatim-db
    $USERHOME/nominatim-venv/bin/pip install packaging/nominatim-db
 # Nominatim is now ready to use. The nominatim binary is available at
 # `$USERHOME/venv/bin/nominatim`. If you want to have 'nominatim' in your
@@ -147,9 +129,7 @@ fi                                 #DOCS:
 # To install all packages, run:
 #DOCS:```sh
-$USERHOME/nominatim-venv/bin/pip install falcon uvicorn gunicorn
+$USERHOME/nominatim-venv/bin/pip install falcon uvicorn gunicorn nominatim-api
 cd $USERHOME/Nominatim
 $USERHOME/nominatim-venv/bin/pip install packaging/nominatim-api
 #DOCS:```
 # Next you need to create a systemd job that runs Nominatim on gunicorn.