Merge pull request #4024 from lonvia/remove-fallback-importance

Clean up importance computation
This commit is contained in:
Sarah Hoffmann
2026-03-09 17:05:48 +01:00
committed by GitHub
10 changed files with 64 additions and 54 deletions

View File

@@ -731,6 +731,10 @@ BEGIN
END IF; END IF;
IF NEW.importance IS NULL THEN
NEW.importance := 0.40001 - (NEW.rank_search::float / 75);
END IF;
{% if debug %}RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %} {% if debug %}RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %}
{% if not disable_diff_updates %} {% if not disable_diff_updates %}
@@ -1076,10 +1080,8 @@ BEGIN
{% if debug %}RAISE WARNING 'Waterway processed';{% endif %} {% if debug %}RAISE WARNING 'Waterway processed';{% endif %}
END IF; END IF;
NEW.importance := null; SELECT wikipedia, importance INTO NEW.wikipedia, NEW.importance
SELECT wikipedia, importance FROM compute_importance(NEW.extratags, NEW.country_code, NEW.rank_search, NEW.centroid);
FROM compute_importance(NEW.extratags, NEW.country_code, NEW.rank_search, NEW.centroid)
INTO NEW.wikipedia,NEW.importance;
{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %} {% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
@@ -1136,10 +1138,10 @@ BEGIN
NEW.token_info, NEW.centroid); NEW.token_info, NEW.centroid);
IF array_length(name_vector, 1) is not NULL THEN IF array_length(name_vector, 1) is not NULL THEN
INSERT INTO search_name (place_id, search_rank, address_rank, INSERT INTO search_name (place_id, address_rank,
importance, country_code, name_vector, importance, country_code, name_vector,
nameaddress_vector, centroid) nameaddress_vector, centroid)
VALUES (NEW.place_id, NEW.rank_search, NEW.rank_address, VALUES (NEW.place_id, NEW.rank_address,
NEW.importance, NEW.country_code, name_vector, NEW.importance, NEW.country_code, name_vector,
nameaddress_vector, NEW.centroid); nameaddress_vector, NEW.centroid);
{% if debug %}RAISE WARNING 'Place added to search table';{% endif %} {% if debug %}RAISE WARNING 'Place added to search table';{% endif %}
@@ -1309,10 +1311,10 @@ BEGIN
{% if debug %}RAISE WARNING 'added to search name (full)';{% endif %} {% if debug %}RAISE WARNING 'added to search name (full)';{% endif %}
{% if not db.reverse_only %} {% if not db.reverse_only %}
INSERT INTO search_name (place_id, search_rank, address_rank, INSERT INTO search_name (place_id, address_rank,
importance, country_code, name_vector, importance, country_code, name_vector,
nameaddress_vector, centroid) nameaddress_vector, centroid)
VALUES (NEW.place_id, NEW.rank_search, NEW.rank_address, VALUES (NEW.place_id, NEW.rank_address,
NEW.importance, NEW.country_code, name_vector, NEW.importance, NEW.country_code, name_vector,
nameaddress_vector, NEW.centroid); nameaddress_vector, NEW.centroid);
{% endif %} {% endif %}

View File

@@ -12,7 +12,7 @@ CREATE TABLE placex (
place_id BIGINT NOT NULL, place_id BIGINT NOT NULL,
parent_place_id BIGINT, parent_place_id BIGINT,
linked_place_id BIGINT, linked_place_id BIGINT,
importance FLOAT, importance FLOAT NOT NULL,
indexed_date TIMESTAMP, indexed_date TIMESTAMP,
geometry_sector INTEGER NOT NULL, geometry_sector INTEGER NOT NULL,
rank_address SMALLINT NOT NULL, rank_address SMALLINT NOT NULL,

View File

@@ -12,7 +12,6 @@ DROP TABLE IF EXISTS search_name;
CREATE TABLE search_name ( CREATE TABLE search_name (
place_id BIGINT NOT NULL, place_id BIGINT NOT NULL,
importance FLOAT NOT NULL, importance FLOAT NOT NULL,
search_rank SMALLINT NOT NULL,
address_rank SMALLINT NOT NULL, address_rank SMALLINT NOT NULL,
name_vector integer[] NOT NULL, name_vector integer[] NOT NULL,
nameaddress_vector integer[] NOT NULL, nameaddress_vector integer[] NOT NULL,

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Implementation of search for an address (search with housenumber). Implementation of search for an address (search with housenumber).
@@ -155,13 +155,7 @@ class AddressSearch(base.AbstractSearch):
for ranking in self.rankings: for ranking in self.rankings:
penalty += ranking.sql_penalty(t) penalty += ranking.sql_penalty(t)
sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank, sql = sa.select(t.c.place_id, penalty.label('penalty'))
t.c.country_code, t.c.centroid,
t.c.name_vector, t.c.nameaddress_vector,
sa.case((t.c.importance > 0, t.c.importance),
else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
.label('importance'),
penalty.label('penalty'))
for lookup in self.lookups: for lookup in self.lookups:
sql = sql.where(lookup.sql_condition(t)) sql = sql.where(lookup.sql_condition(t))
@@ -202,9 +196,7 @@ class AddressSearch(base.AbstractSearch):
inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery() inner = sql.limit(10000).order_by(sa.desc(sa.text('importance'))).subquery()
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank, sql = sa.select(inner.c.place_id, inner.c.penalty)
inner.c.country_code, inner.c.centroid, inner.c.importance,
inner.c.penalty)
return sql.cte('searches') return sql.cte('searches')
@@ -240,12 +232,12 @@ class AddressSearch(base.AbstractSearch):
else_=1.0) else_=1.0)
if details.near is not None: if details.near is not None:
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM)) sql = sql.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance')) .label('importance'))
sql = sql.order_by(sa.desc(sa.text('importance'))) sql = sql.order_by(sa.desc(sa.text('importance')))
else: else:
sql = sql.order_by(penalty - tsearch.c.importance) sql = sql.order_by(penalty - t.c.importance)
sql = sql.add_columns(tsearch.c.importance) sql = sql.add_columns(t.c.importance)
sql = sql.add_columns(penalty.label('accuracy'))\ sql = sql.add_columns(penalty.label('accuracy'))\
.order_by(sa.text('accuracy')) .order_by(sa.text('accuracy'))
@@ -253,7 +245,7 @@ class AddressSearch(base.AbstractSearch):
hnr_list = '|'.join(self.housenumbers.values) hnr_list = '|'.join(self.housenumbers.values)
if self.has_address_terms: if self.has_address_terms:
sql = sql.where(sa.or_(tsearch.c.address_rank < 30, sql = sql.where(sa.or_(t.c.rank_address < 30,
sa.func.RegexpWord(hnr_list, t.c.housenumber))) sa.func.RegexpWord(hnr_list, t.c.housenumber)))
inner = sql.subquery() inner = sql.subquery()

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
Implementation of search for a named place (without housenumber). Implementation of search for a named place (without housenumber).
@@ -58,12 +58,7 @@ class PlaceSearch(base.AbstractSearch):
for ranking in self.rankings: for ranking in self.rankings:
penalty += ranking.sql_penalty(t) penalty += ranking.sql_penalty(t)
sql = sa.select(t.c.place_id, t.c.search_rank, t.c.address_rank, sql = sa.select(t.c.place_id, t.c.importance)
t.c.country_code, t.c.centroid,
t.c.name_vector, t.c.nameaddress_vector,
sa.case((t.c.importance > 0, t.c.importance),
else_=0.40001-(sa.cast(t.c.search_rank, sa.Float())/75))
.label('importance'))
for lookup in self.lookups: for lookup in self.lookups:
sql = sql.where(lookup.sql_condition(t)) sql = sql.where(lookup.sql_condition(t))
@@ -103,12 +98,13 @@ class PlaceSearch(base.AbstractSearch):
if details.excluded: if details.excluded:
sql = sql.where(base.exclude_places(t)) sql = sql.where(base.exclude_places(t))
if details.min_rank > 0: # Do not restrict ranks too much yet because rank restriction
sql = sql.where(sa.or_(t.c.address_rank >= MIN_RANK_PARAM, # currently also depends on search_rank to account for state-cities
t.c.search_rank >= MIN_RANK_PARAM)) # like Berlin.
if details.max_rank < 30: if details.max_rank < 26:
sql = sql.where(sa.or_(t.c.address_rank <= MAX_RANK_PARAM, sql = sql.where(t.c.address_rank < 26)
t.c.search_rank <= MAX_RANK_PARAM)) elif details.max_rank < 30:
sql = sql.where(t.c.address_rank < MAX_RANK_PARAM)
sql = sql.add_columns(penalty.label('penalty')) sql = sql.add_columns(penalty.label('penalty'))
@@ -116,11 +112,9 @@ class PlaceSearch(base.AbstractSearch):
.order_by(sa.desc(sa.text('importance')))\ .order_by(sa.desc(sa.text('importance')))\
.subquery() .subquery()
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank, sql = sa.select(inner.c.place_id, inner.c.importance, inner.c.penalty)
inner.c.country_code, inner.c.centroid, inner.c.importance,
inner.c.penalty)
# If the query is not an address search or has a geographic preference, # If the query has no geographic preference,
# preselect most important items to restrict the number of places # preselect most important items to restrict the number of places
# that need to be looked up in placex. # that need to be looked up in placex.
if (details.viewbox is None or not details.bounded_viewbox)\ if (details.viewbox is None or not details.bounded_viewbox)\
@@ -132,9 +126,7 @@ class PlaceSearch(base.AbstractSearch):
inner = sql.subquery() inner = sql.subquery()
sql = sa.select(inner.c.place_id, inner.c.search_rank, inner.c.address_rank, sql = sa.select(inner.c.place_id, inner.c.penalty)\
inner.c.country_code, inner.c.centroid, inner.c.importance,
inner.c.penalty)\
.where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5) .where(inner.c.penalty - inner.c.importance < inner.c.min_penalty + 0.5)
return sql.cte('searches') return sql.cte('searches')
@@ -169,12 +161,19 @@ class PlaceSearch(base.AbstractSearch):
penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0), else_=1.0) penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0), else_=1.0)
if details.near is not None: if details.near is not None:
sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM)) sql = sql.add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance')) .label('importance'))
sql = sql.order_by(sa.desc(sa.text('importance'))) sql = sql.order_by(sa.desc(sa.text('importance')))
else: else:
sql = sql.order_by(penalty - tsearch.c.importance) sql = sql.order_by(penalty - t.c.importance)
sql = sql.add_columns(tsearch.c.importance) sql = sql.add_columns(t.c.importance)
if details.min_rank > 0:
sql = sql.where(sa.or_(t.c.rank_address >= MIN_RANK_PARAM,
t.c.rank_search >= MIN_RANK_PARAM))
if details.max_rank < 30:
sql = sql.where(sa.or_(t.c.rank_address <= MAX_RANK_PARAM,
t.c.rank_search <= MAX_RANK_PARAM))
sql = sql.add_columns(penalty.label('accuracy'))\ sql = sql.add_columns(penalty.label('accuracy'))\
.order_by(sa.text('accuracy')) .order_by(sa.text('accuracy'))

View File

@@ -2,7 +2,7 @@
# #
# This file is part of Nominatim. (https://nominatim.org) # This file is part of Nominatim. (https://nominatim.org)
# #
# Copyright (C) 2025 by the Nominatim developer community. # Copyright (C) 2026 by the Nominatim developer community.
# For a full list of authors see the git log. # For a full list of authors see the git log.
""" """
SQLAlchemy definitions for all tables used by the frontend. SQLAlchemy definitions for all tables used by the frontend.
@@ -112,7 +112,6 @@ class SearchTables:
'search_name', meta, 'search_name', meta,
sa.Column('place_id', sa.BigInteger), sa.Column('place_id', sa.BigInteger),
sa.Column('importance', sa.Float), sa.Column('importance', sa.Float),
sa.Column('search_rank', sa.SmallInteger),
sa.Column('address_rank', sa.SmallInteger), sa.Column('address_rank', sa.SmallInteger),
sa.Column('name_vector', IntArray), sa.Column('name_vector', IntArray),
sa.Column('nameaddress_vector', IntArray), sa.Column('nameaddress_vector', IntArray),

View File

@@ -14,7 +14,7 @@ from ..errors import UsageError
from ..config import Configuration from ..config import Configuration
from ..db import properties from ..db import properties
from ..db.connection import connect, Connection, \ from ..db.connection import connect, Connection, \
table_exists, register_hstore table_exists, register_hstore, table_has_column
from ..db.sql_preprocessor import SQLPreprocessor from ..db.sql_preprocessor import SQLPreprocessor
from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
from ..tokenizer import factory as tokenizer_factory from ..tokenizer import factory as tokenizer_factory
@@ -420,3 +420,20 @@ def create_place_interpolation_table(conn: Connection, config: Configuration, **
SET type = coalesce(address->'interpolation', 'all'), SET type = coalesce(address->'interpolation', 'all'),
address = address - 'interpolation'::TEXT; address = address - 'interpolation'::TEXT;
""") """)
@_migration(5, 2, 99, 4)
def backfill_importance(conn: Connection, **_: Any) -> None:
""" Backfill missing importance values.
"""
conn.execute("""UPDATE placex
SET importance = 0.40001 - (rank_search::float / 75)
WHERE importance is NULL OR importance <= 0
""")
if table_exists(conn, 'search_name')\
and table_has_column(conn, 'search_name', 'search_rank'):
conn.execute("""UPDATE search_name
SET importance = 0.40001 - (search_rank::float / 75)
WHERE importance is NULL OR importance <= 0
""")
conn.execute("ALTER TABLE search_name DROP COLUMN search_rank")

View File

@@ -55,7 +55,7 @@ def parse_version(version: str) -> NominatimVersion:
return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')]) return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])
NOMINATIM_VERSION = parse_version('5.2.99-3') NOMINATIM_VERSION = parse_version('5.2.99-4')
POSTGRESQL_REQUIRED_VERSION = (12, 0) POSTGRESQL_REQUIRED_VERSION = (12, 0)
POSTGIS_REQUIRED_VERSION = (3, 0) POSTGIS_REQUIRED_VERSION = (3, 0)

View File

@@ -293,13 +293,14 @@ def placex_row(placex_table, temp_db_cursor):
def _add(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None, def _add(osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
admin_level=None, address=None, extratags=None, geom='POINT(10 4)', admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
country=None, housenumber=None, rank_search=30, rank_address=30, country=None, housenumber=None, rank_search=30, rank_address=30,
centroid='POINT(10 4)', indexed_status=0, indexed_date=None): centroid='POINT(10 4)', indexed_status=0, indexed_date=None,
importance=0.00001):
args = {'place_id': pysql.SQL("nextval('seq_place')"), args = {'place_id': pysql.SQL("nextval('seq_place')"),
'osm_type': osm_type, 'osm_id': osm_id or next(idseq), 'osm_type': osm_type, 'osm_id': osm_id or next(idseq),
'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level, 'class': cls, 'type': typ, 'name': names, 'admin_level': admin_level,
'address': address, 'housenumber': housenumber, 'address': address, 'housenumber': housenumber,
'rank_search': rank_search, 'rank_address': rank_address, 'rank_search': rank_search, 'rank_address': rank_address,
'extratags': extratags, 'extratags': extratags, 'importance': importance,
'centroid': _with_srid(centroid), 'geometry': _with_srid(geom), 'centroid': _with_srid(centroid), 'geometry': _with_srid(geom),
'country_code': country, 'country_code': country,
'indexed_status': indexed_status, 'indexed_date': indexed_date, 'indexed_status': indexed_status, 'indexed_date': indexed_date,

View File

@@ -176,6 +176,7 @@ async def test_load_data(dsn, place_row, place_interpolation_row, placex_table,
BEGIN BEGIN
NEW.place_id := nextval('seq_place'); NEW.place_id := nextval('seq_place');
NEW.indexed_status := 1; NEW.indexed_status := 1;
NEW.importance := 0.00001;
NEW.centroid := ST_Centroid(NEW.geometry); NEW.centroid := ST_Centroid(NEW.geometry);
NEW.partition := 0; NEW.partition := 0;
NEW.geometry_sector := 2424; NEW.geometry_sector := 2424;