Merge pull request #4024 from lonvia/remove-fallback-importance

Clean up importance computation
2026-03-09 19:44:07 +00:00 · 2026-03-09 17:05:48 +01:00
parent d43e95f177 cd84386004
commit f11b21b0ba
10 changed files with 64 additions and 54 deletions
--- a/lib-sql/functions/placex_triggers.sql
+++ b/lib-sql/functions/placex_triggers.sql
@@ -731,6 +731,10 @@ BEGIN

  END IF;

+  IF NEW.importance IS NULL THEN
+    NEW.importance := 0.40001 - (NEW.rank_search::float / 75);
+  END IF;
+
  {% if debug %}RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %}

 {% if not disable_diff_updates %}
@@ -1076,10 +1080,8 @@ BEGIN
      {% if debug %}RAISE WARNING 'Waterway processed';{% endif %}
  END IF;

-  NEW.importance := null;
-  SELECT wikipedia, importance
-    FROM compute_importance(NEW.extratags, NEW.country_code, NEW.rank_search, NEW.centroid)
-    INTO NEW.wikipedia,NEW.importance;
+  SELECT wikipedia, importance INTO NEW.wikipedia, NEW.importance
+    FROM compute_importance(NEW.extratags, NEW.country_code, NEW.rank_search, NEW.centroid);

 {% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}

@@ -1136,10 +1138,10 @@ BEGIN
                                       NEW.token_info, NEW.centroid);

        IF array_length(name_vector, 1) is not NULL THEN
-          INSERT INTO search_name (place_id, search_rank, address_rank,
+          INSERT INTO search_name (place_id, address_rank,
                                   importance, country_code, name_vector,
                                   nameaddress_vector, centroid)
-                 VALUES (NEW.place_id, NEW.rank_search, NEW.rank_address,
+                 VALUES (NEW.place_id, NEW.rank_address,
                         NEW.importance, NEW.country_code, name_vector,
                         nameaddress_vector, NEW.centroid);
          {% if debug %}RAISE WARNING 'Place added to search table';{% endif %}
@@ -1309,10 +1311,10 @@ BEGIN
    {% if debug %}RAISE WARNING 'added to search name (full)';{% endif %}

    {% if not db.reverse_only %}
-        INSERT INTO search_name (place_id, search_rank, address_rank,
+        INSERT INTO search_name (place_id, address_rank,
                                 importance, country_code, name_vector,
                                 nameaddress_vector, centroid)
-               VALUES (NEW.place_id, NEW.rank_search, NEW.rank_address,
+               VALUES (NEW.place_id, NEW.rank_address,
                       NEW.importance, NEW.country_code, name_vector,
                       nameaddress_vector, NEW.centroid);
    {% endif %}
--- a/lib-sql/tables/placex.sql
+++ b/lib-sql/tables/placex.sql
@@ -12,7 +12,7 @@ CREATE TABLE placex (
  place_id BIGINT NOT NULL,
  parent_place_id BIGINT,
  linked_place_id BIGINT,
-  importance FLOAT,
+  importance FLOAT NOT NULL,
  indexed_date TIMESTAMP,
  geometry_sector INTEGER NOT NULL,
  rank_address SMALLINT NOT NULL,
--- a/lib-sql/tables/search_name.sql
+++ b/lib-sql/tables/search_name.sql
@@ -12,7 +12,6 @@ DROP TABLE IF EXISTS search_name;
 CREATE TABLE search_name (
  place_id BIGINT NOT NULL,
  importance FLOAT NOT NULL,
-  search_rank SMALLINT NOT NULL,
  address_rank SMALLINT NOT NULL,
  name_vector integer[] NOT NULL,
  nameaddress_vector integer[] NOT NULL,
--- a/src/nominatim_api/search/db_searches/address_search.py
+++ b/src/nominatim_api/search/db_searches/address_search.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2025 by the Nominatim developer community.
+# Copyright (C) 2026 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Implementation of search for an address (search with housenumber).
@@ -155,13 +155,7 @@ class AddressSearch(base.AbstractSearch):
        for ranking in self.rankings:
            penalty += ranking.sql_penalty(t)

-        sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
-                        t.c.country_code, t.c.centroid,
-                        t.c.name_vector, t.c.nameaddress_vector,
-                        sa.case((t.c.importance > 0, t.c.importance),
-                                else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
-                          .label('importance'),
-                        penalty.label('penalty'))
+        sql = sa.select(t.c.place_id, penalty.label('penalty'))

        for lookup in self.lookups:
            sql = sql.where(lookup.sql_condition(t))
@@ -202,9 +196,7 @@ class AddressSearch(base.AbstractSearch):

        inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery()

-        sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
-                        inner.c.country_code, inner.c.centroid, inner.c.importance,
-                        inner.c.penalty)
+        sql = sa.select(inner.c.place_id, inner.c.penalty)

        return sql.cte('searches')

@@ -240,12 +232,12 @@ class AddressSearch(base.AbstractSearch):
                               else_=1.0)

        if details.near is not None:
-            sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
+            sql = sql.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
                                  .label('importance'))
            sql = sql.order_by(sa.desc(sa.text('importance')))
        else:
-            sql = sql.order_by(penalty - tsearch.c.importance)
-            sql = sql.add_columns(tsearch.c.importance)
+            sql = sql.order_by(penalty - t.c.importance)
+            sql = sql.add_columns(t.c.importance)

        sql = sql.add_columns(penalty.label('accuracy'))\
                 .order_by(sa.text('accuracy'))
@@ -253,7 +245,7 @@ class AddressSearch(base.AbstractSearch):
        hnr_list = '|'.join(self.housenumbers.values)

        if self.has_address_terms:
-            sql = sql.where(sa.or_(tsearch.c.address_rank < 30,
+            sql = sql.where(sa.or_(t.c.rank_address < 30,
                                   sa.func.RegexpWord(hnr_list, t.c.housenumber)))

        inner = sql.subquery()
--- a/src/nominatim_api/search/db_searches/place_search.py
+++ b/src/nominatim_api/search/db_searches/place_search.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2025 by the Nominatim developer community.
+# Copyright (C) 2026 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Implementation of search for a named place (without housenumber).
@@ -58,12 +58,7 @@ class PlaceSearch(base.AbstractSearch):
        for ranking in self.rankings:
            penalty += ranking.sql_penalty(t)

-        sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank,
-                        t.c.country_code, t.c.centroid,
-                        t.c.name_vector, t.c.nameaddress_vector,
-                        sa.case((t.c.importance > 0, t.c.importance),
-                                else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
-                          .label('importance'))
+        sql = sa.select(t.c.place_id, t.c.importance)

        for lookup in self.lookups:
            sql = sql.where(lookup.sql_condition(t))
@@ -103,12 +98,13 @@ class PlaceSearch(base.AbstractSearch):

        if details.excluded:
            sql = sql.where(base.exclude_places(t))
-        if details.min_rank > 0:
-            sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM,
-                                   t.c.search_rank >= MIN_RANK_PARAM))
-        if details.max_rank < 30:
-            sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM,
-                                   t.c.search_rank <= MAX_RANK_PARAM))
+        # Do not restrict ranks too much yet because rank restriction
+        # currently also depends on search_rank to account for state-cities
+        # like Berlin.
+        if details.max_rank < 26:
+            sql = sql.where(t.c.address_rank < 26)
+        elif details.max_rank < 30:
+            sql = sql.where(t.c.address_rank < MAX_RANK_PARAM)

        sql = sql.add_columns(penalty.label('penalty'))

@@ -116,11 +112,9 @@ class PlaceSearch(base.AbstractSearch):
                   .order_by(sa.desc(sa.text('importance')))\
                   .subquery()

-        sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
-                        inner.c.country_code, inner.c.centroid, inner.c.importance,
-                        inner.c.penalty)
+        sql = sa.select(inner.c.place_id, inner.c.importance, inner.c.penalty)

-        # If the query is not an address search or has a geographic preference,
+        # If the query has no geographic preference,
        # preselect most important items to restrict the number of places
        # that need to be looked up in placex.
        if (details.viewbox is None or not details.bounded_viewbox)\
@@ -132,9 +126,7 @@ class PlaceSearch(base.AbstractSearch):

            inner = sql.subquery()

-            sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank,
-                            inner.c.country_code, inner.c.centroid, inner.c.importance,
-                            inner.c.penalty)\
+            sql = sa.select(inner.c.place_id, inner.c.penalty)\
                    .where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5)

        return sql.cte('searches')
@@ -169,12 +161,19 @@ class PlaceSearch(base.AbstractSearch):
                penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0), else_=1.0)

        if details.near is not None:
-            sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
+            sql = sql.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
                                  .label('importance'))
            sql = sql.order_by(sa.desc(sa.text('importance')))
        else:
-            sql = sql.order_by(penalty - tsearch.c.importance)
-            sql = sql.add_columns(tsearch.c.importance)
+            sql = sql.order_by(penalty - t.c.importance)
+            sql = sql.add_columns(t.c.importance)
+
+        if details.min_rank > 0:
+            sql = sql.where(sa.or_(t.c.rank_address >= MIN_RANK_PARAM,
+                                   t.c.rank_search >= MIN_RANK_PARAM))
+        if details.max_rank < 30:
+            sql = sql.where(sa.or_(t.c.rank_address <= MAX_RANK_PARAM,
+                                   t.c.rank_search <= MAX_RANK_PARAM))

        sql = sql.add_columns(penalty.label('accuracy'))\
                 .order_by(sa.text('accuracy'))
--- a/src/nominatim_api/sql/sqlalchemy_schema.py
+++ b/src/nominatim_api/sql/sqlalchemy_schema.py
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2025 by the Nominatim developer community.
+# Copyright (C) 2026 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 SQLAlchemy definitions for all tables used by the frontend.
@@ -112,7 +112,6 @@ class SearchTables:
            'search_name', meta,
            sa.Column('place_id', sa.BigInteger),
            sa.Column('importance', sa.Float),
-            sa.Column('search_rank', sa.SmallInteger),
            sa.Column('address_rank', sa.SmallInteger),
            sa.Column('name_vector', IntArray),
            sa.Column('nameaddress_vector', IntArray),
--- a/src/nominatim_db/tools/migration.py
+++ b/src/nominatim_db/tools/migration.py
@@ -14,7 +14,7 @@ from ..errors import UsageError
 from ..config import Configuration
 from ..db import properties
 from ..db.connection import connect, Connection, \
-                            table_exists, register_hstore
+                            table_exists, register_hstore, table_has_column
 from ..db.sql_preprocessor import SQLPreprocessor
 from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
 from ..tokenizer import factory as tokenizer_factory
@@ -420,3 +420,20 @@ def create_place_interpolation_table(conn: Connection, config: Configuration, **
              SET type = coalesce(address->'interpolation', 'all'),
                  address = address - 'interpolation'::TEXT;
            """)
+
+
+@_migration(5, 2, 99, 4)
+def backfill_importance(conn: Connection, **_: Any) -> None:
+    """ Backfill missing importance values.
+    """
+    conn.execute("""UPDATE placex
+                    SET importance = 0.40001 - (rank_search::float / 75)
+                    WHERE importance is NULL OR importance <= 0
+                 """)
+    if table_exists(conn, 'search_name')\
+       and table_has_column(conn, 'search_name', 'search_rank'):
+        conn.execute("""UPDATE search_name
+                        SET importance = 0.40001 - (search_rank::float / 75)
+                        WHERE importance is NULL OR importance <= 0
+                     """)
+        conn.execute("ALTER TABLE search_name DROP COLUMN search_rank")
--- a/src/nominatim_db/version.py
+++ b/src/nominatim_db/version.py
@@ -55,7 +55,7 @@ def parse_version(version: str) -> NominatimVersion:
    return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])


-NOMINATIM_VERSION = parse_version('5.2.99-3')
+NOMINATIM_VERSION = parse_version('5.2.99-4')

 POSTGRESQL_REQUIRED_VERSION = (12, 0)
 POSTGIS_REQUIRED_VERSION = (3, 0)
--- a/test/python/conftest.py
+++ b/test/python/conftest.py
@@ -293,13 +293,14 @@ def placex_row(placex_table, temp_db_cursor):
    def _add(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
             admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
             country=None, housenumber=None, rank_search=30, rank_address=30,
-             centroid='POINT(10 4)', indexed_status=0, indexed_date=None):
+             centroid='POINT(10 4)', indexed_status=0, indexed_date=None,
+             importance=0.00001):
        args = {'place_id': pysql.SQL("nextval('seq_place')"),
                'osm_type': osm_type, 'osm_id': osm_id or next(idseq),
                'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level,
                'address': address, 'housenumber': housenumber,
                'rank_search': rank_search, 'rank_address': rank_address,
-                'extratags': extratags,
+                'extratags': extratags, 'importance': importance,
                'centroid': _with_srid(centroid), 'geometry': _with_srid(geom),
                'country_code': country,
                'indexed_status': indexed_status, 'indexed_date': indexed_date,
--- a/test/python/tools/test_database_import.py
+++ b/test/python/tools/test_database_import.py
@@ -176,6 +176,7 @@ async def test_load_data(dsn, place_row, place_interpolation_row, placex_table,
        BEGIN
          NEW.place_id := nextval('seq_place');
          NEW.indexed_status := 1;
+          NEW.importance := 0.00001;
          NEW.centroid := ST_Centroid(NEW.geometry);
          NEW.partition := 0;
          NEW.geometry_sector := 2424;