mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
simplify weigh_search() function
Use JSON arrays which can have mixed types and therefore have a more logical structure than separate arrays. Avoid JSON dicts because of their verboseness.
This commit is contained in:
@@ -287,21 +287,19 @@ LANGUAGE plpgsql IMMUTABLE;
|
|||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
|
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
|
||||||
term_vectors TEXT[],
|
rankings TEXT,
|
||||||
weight_vectors FLOAT[],
|
|
||||||
def_weight FLOAT)
|
def_weight FLOAT)
|
||||||
RETURNS FLOAT
|
RETURNS FLOAT
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
pos INT := 1;
|
rank JSON;
|
||||||
terms TEXT;
|
|
||||||
BEGIN
|
BEGIN
|
||||||
FOREACH terms IN ARRAY term_vectors
|
FOR rank IN
|
||||||
|
SELECT * FROM json_array_elements(rankings::JSON)
|
||||||
LOOP
|
LOOP
|
||||||
IF search_vector @> terms::INTEGER[] THEN
|
IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN
|
||||||
RETURN weight_vectors[pos];
|
RETURN (rank->>0)::float;
|
||||||
END IF;
|
END IF;
|
||||||
pos := pos + 1;
|
|
||||||
END LOOP;
|
END LOOP;
|
||||||
RETURN def_weight;
|
RETURN def_weight;
|
||||||
END;
|
END;
|
||||||
|
|||||||
@@ -14,6 +14,7 @@ import sqlalchemy as sa
|
|||||||
|
|
||||||
from nominatim.typing import SaFromClause, SaColumn, SaExpression
|
from nominatim.typing import SaFromClause, SaColumn, SaExpression
|
||||||
from nominatim.api.search.query import Token
|
from nominatim.api.search.query import Token
|
||||||
|
from nominatim.utils.json_writer import JsonWriter
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
class WeightedStrings:
|
class WeightedStrings:
|
||||||
@@ -128,11 +129,17 @@ class FieldRanking:
|
|||||||
"""
|
"""
|
||||||
assert self.rankings
|
assert self.rankings
|
||||||
|
|
||||||
return sa.func.weigh_search(table.c[self.column],
|
rout = JsonWriter().start_array()
|
||||||
[f"{{{','.join((str(s) for s in r.tokens))}}}"
|
for rank in self.rankings:
|
||||||
for r in self.rankings],
|
rout.start_array().value(rank.penalty).next()
|
||||||
[r.penalty for r in self.rankings],
|
rout.start_array()
|
||||||
self.default)
|
for token in rank.tokens:
|
||||||
|
rout.value(token).next()
|
||||||
|
rout.end_array()
|
||||||
|
rout.end_array().next()
|
||||||
|
rout.end_array()
|
||||||
|
|
||||||
|
return sa.func.weigh_search(table.c[self.column], rout(), self.default)
|
||||||
|
|
||||||
|
|
||||||
@dataclasses.dataclass
|
@dataclasses.dataclass
|
||||||
|
|||||||
@@ -76,8 +76,8 @@ class JsonWriter:
|
|||||||
def end_array(self) -> 'JsonWriter':
|
def end_array(self) -> 'JsonWriter':
|
||||||
""" Write the closing bracket of a JSON array.
|
""" Write the closing bracket of a JSON array.
|
||||||
"""
|
"""
|
||||||
assert self.pending in (',', '[', '')
|
assert self.pending in (',', '[', ']', ')', '')
|
||||||
if self.pending == '[':
|
if self.pending not in (',', ''):
|
||||||
self.data.write(self.pending)
|
self.data.write(self.pending)
|
||||||
self.pending = ']'
|
self.pending = ']'
|
||||||
return self
|
return self
|
||||||
|
|||||||
Reference in New Issue
Block a user