simplify weigh_search() function

Use JSON arrays which can have mixed types and therefore have
a more logical structure than separate arrays. Avoid JSON dicts
because of their verboseness.
This commit is contained in:
Sarah Hoffmann
2023-12-05 16:07:56 +01:00
parent 05e47fbb28
commit c41f2fed21
3 changed files with 20 additions and 15 deletions

View File

@@ -287,21 +287,19 @@ LANGUAGE plpgsql IMMUTABLE;
CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[], CREATE OR REPLACE FUNCTION weigh_search(search_vector INT[],
term_vectors TEXT[], rankings TEXT,
weight_vectors FLOAT[],
def_weight FLOAT) def_weight FLOAT)
RETURNS FLOAT RETURNS FLOAT
AS $$ AS $$
DECLARE DECLARE
pos INT := 1; rank JSON;
terms TEXT;
BEGIN BEGIN
FOREACH terms IN ARRAY term_vectors FOR rank IN
SELECT * FROM json_array_elements(rankings::JSON)
LOOP LOOP
IF search_vector @> terms::INTEGER[] THEN IF true = ALL(SELECT x::int = ANY(search_vector) FROM json_array_elements_text(rank->1) as x) THEN
RETURN weight_vectors[pos]; RETURN (rank->>0)::float;
END IF; END IF;
pos := pos + 1;
END LOOP; END LOOP;
RETURN def_weight; RETURN def_weight;
END; END;

View File

@@ -14,6 +14,7 @@ import sqlalchemy as sa
from nominatim.typing import SaFromClause, SaColumn, SaExpression from nominatim.typing import SaFromClause, SaColumn, SaExpression
from nominatim.api.search.query import Token from nominatim.api.search.query import Token
from nominatim.utils.json_writer import JsonWriter
@dataclasses.dataclass @dataclasses.dataclass
class WeightedStrings: class WeightedStrings:
@@ -128,11 +129,17 @@ class FieldRanking:
""" """
assert self.rankings assert self.rankings
return sa.func.weigh_search(table.c[self.column], rout = JsonWriter().start_array()
[f"{{{','.join((str(s) for s in r.tokens))}}}" for rank in self.rankings:
for r in self.rankings], rout.start_array().value(rank.penalty).next()
[r.penalty for r in self.rankings], rout.start_array()
self.default) for token in rank.tokens:
rout.value(token).next()
rout.end_array()
rout.end_array().next()
rout.end_array()
return sa.func.weigh_search(table.c[self.column], rout(), self.default)
@dataclasses.dataclass @dataclasses.dataclass

View File

@@ -76,8 +76,8 @@ class JsonWriter:
def end_array(self) -> 'JsonWriter': def end_array(self) -> 'JsonWriter':
""" Write the closing bracket of a JSON array. """ Write the closing bracket of a JSON array.
""" """
assert self.pending in (',', '[', '') assert self.pending in (',', '[', ']', ')', '')
if self.pending == '[': if self.pending not in (',', ''):
self.data.write(self.pending) self.data.write(self.pending)
self.pending = ']' self.pending = ']'
return self return self