move linking of places to the preparation stage

Linked places may bring in extra names. These names need to be
processed by the tokenizer. That means that the linking needs
to be done before the data is handed to the tokenizer. Move finding
the linked place into the preparation stage and update the name
fields. Everything else is still done in the indexing stage.
This commit is contained in:
Sarah Hoffmann
2021-08-20 21:53:13 +02:00
parent 925195725d
commit 28ee3d0949
4 changed files with 61 additions and 15 deletions

View File

@@ -11,8 +11,11 @@
CREATE OR REPLACE FUNCTION placex_prepare_update(p placex, CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
OUT name HSTORE, OUT name HSTORE,
OUT address HSTORE, OUT address HSTORE,
OUT country_feature VARCHAR) OUT country_feature VARCHAR,
OUT linked_place_id BIGINT)
AS $$ AS $$
DECLARE
location RECORD;
BEGIN BEGIN
-- For POI nodes, check if the address should be derived from a surrounding -- For POI nodes, check if the address should be derived from a surrounding
-- building. -- building.
@@ -34,6 +37,18 @@ BEGIN
address := address - '_unlisted_place'::TEXT; address := address - '_unlisted_place'::TEXT;
name := p.name; name := p.name;
-- Names of linked places need to be merged in, so search for a linkable
-- place already here.
SELECT * INTO location FROM find_linked_place(p);
IF location.place_id is not NULL THEN
linked_place_id := location.place_id;
IF NOT location.name IS NULL THEN
name := location.name || name;
END IF;
END IF;
country_feature := CASE WHEN p.admin_level = 2 country_feature := CASE WHEN p.admin_level = 2
and p.class = 'boundary' and p.type = 'administrative' and p.class = 'boundary' and p.type = 'administrative'
and p.osm_type = 'R' and p.osm_type = 'R'
@@ -683,6 +698,8 @@ DECLARE
nameaddress_vector INTEGER[]; nameaddress_vector INTEGER[];
addr_nameaddress_vector INTEGER[]; addr_nameaddress_vector INTEGER[];
linked_place BIGINT;
linked_node_id BIGINT; linked_node_id BIGINT;
linked_importance FLOAT; linked_importance FLOAT;
linked_wikipedia TEXT; linked_wikipedia TEXT;
@@ -718,9 +735,14 @@ BEGIN
NEW.extratags := NEW.extratags - 'linked_place'::TEXT; NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
-- NEW.linked_place_id contains the precomputed linkee. Save this and restore
-- the previous link status.
linked_place := NEW.linked_place_id;
NEW.linked_place_id := OLD.linked_place_id;
IF NEW.linked_place_id is not null THEN IF NEW.linked_place_id is not null THEN
NEW.token_info := null; NEW.token_info := null;
{% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %} {% if debug %}RAISE WARNING 'place already linked to %', OLD.linked_place_id;{% endif %}
RETURN NEW; RETURN NEW;
END IF; END IF;
@@ -956,8 +978,9 @@ BEGIN
-- --------------------------------------------------------------------------- -- ---------------------------------------------------------------------------
-- Full indexing -- Full indexing
{% if debug %}RAISE WARNING 'Using full index mode for % %', NEW.osm_type, NEW.osm_id;{% endif %} {% if debug %}RAISE WARNING 'Using full index mode for % %', NEW.osm_type, NEW.osm_id;{% endif %}
SELECT * INTO location FROM find_linked_place(NEW); IF linked_place is not null THEN
IF location.place_id is not null THEN SELECT * INTO location FROM placex WHERE place_id = linked_place;
{% if debug %}RAISE WARNING 'Linked %', location;{% endif %} {% if debug %}RAISE WARNING 'Linked %', location;{% endif %}
-- Use the linked point as the centre point of the geometry, -- Use the linked point as the centre point of the geometry,
@@ -974,11 +997,6 @@ BEGIN
NEW.rank_address := location.rank_address; NEW.rank_address := location.rank_address;
END IF; END IF;
-- merge in the label name
IF NOT location.name IS NULL THEN
NEW.name := location.name || NEW.name;
END IF;
-- merge in extra tags -- merge in extra tags
NEW.extratags := hstore('linked_' || location.class, location.type) NEW.extratags := hstore('linked_' || location.class, location.type)
|| coalesce(location.extratags, ''::hstore) || coalesce(location.extratags, ''::hstore)

View File

@@ -16,6 +16,7 @@ class AbstractPlacexRunner:
""" Returns SQL commands for indexing of the placex table. """ Returns SQL commands for indexing of the placex table.
""" """
SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ') SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)"
def __init__(self, rank, analyzer): def __init__(self, rank, analyzer):
self.rank = rank self.rank = rank
@@ -27,10 +28,11 @@ class AbstractPlacexRunner:
def _index_sql(num_places): def _index_sql(num_places):
return pysql.SQL( return pysql.SQL(
""" UPDATE placex """ UPDATE placex
SET indexed_status = 0, address = v.addr, token_info = v.ti SET indexed_status = 0, address = v.addr, token_info = v.ti,
FROM (VALUES {}) as v(id, addr, ti) name = v.name, linked_place_id = v.linked_place_id
FROM (VALUES {}) as v(id, name, addr, linked_place_id, ti)
WHERE place_id = v.id WHERE place_id = v.id
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places)) """).format(_mk_valuelist(AbstractPlacexRunner.UPDATE_LINE, num_places))
@staticmethod @staticmethod
@@ -43,7 +45,8 @@ class AbstractPlacexRunner:
def index_places(self, worker, places): def index_places(self, worker, places):
values = [] values = []
for place in places: for place in places:
values.extend((place[x] for x in ('place_id', 'address'))) for field in ('place_id', 'name', 'address', 'linked_place_id'):
values.append(place[field])
values.append(psycopg2.extras.Json(self.analyzer.process_place(place))) values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
worker.perform(self._index_sql(len(places)), values) worker.perform(self._index_sql(len(places)), values)

View File

@@ -0,0 +1,22 @@
@DB
Feature: Searching linked places
Tests that information from linked places can be searched correctly
Scenario: Additional names from linked places are searchable
Given the places
| osm | class | type | admin | name | geometry |
| R13 | boundary | administrative | 6 | Garbo | poly-area:0.1 |
Given the places
| osm | class | type | admin | name+name:it | geometry |
| N2 | place | hamlet | 15 | Vario | 0.006 0.00001 |
And the relations
| id | members | tags+type |
| 13 | N2:label | boundary |
When importing
Then placex contains
| object | linked_place_id |
| N2 | R13 |
When sending search query "Vario"
Then results contain
| osm |
| R13 |

View File

@@ -19,8 +19,10 @@ class IndexerTestDB:
with self.conn.cursor() as cur: with self.conn.cursor() as cur:
cur.execute('CREATE EXTENSION hstore') cur.execute('CREATE EXTENSION hstore')
cur.execute("""CREATE TABLE placex (place_id BIGINT, cur.execute("""CREATE TABLE placex (place_id BIGINT,
name HSTORE,
class TEXT, class TEXT,
type TEXT, type TEXT,
linked_place_id BIGINT,
rank_address SMALLINT, rank_address SMALLINT,
rank_search SMALLINT, rank_search SMALLINT,
indexed_status SMALLINT, indexed_status SMALLINT,
@@ -55,11 +57,12 @@ class IndexerTestDB:
cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex, cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
OUT name HSTORE, OUT name HSTORE,
OUT address HSTORE, OUT address HSTORE,
OUT country_feature VARCHAR) OUT country_feature VARCHAR,
OUT linked_place_id BIGINT)
AS $$ AS $$
BEGIN BEGIN
address := p.address; address := p.address;
name := p.address; name := p.name;
END; END;
$$ LANGUAGE plpgsql STABLE; $$ LANGUAGE plpgsql STABLE;
""") """)