move linking of places to the preparation stage

Linked places may bring in extra names. These names need to be
processed by the tokenizer. That means that the linking needs
to be done before the data is handed to the tokenizer. Move finding
the linked place into the preparation stage and update the name
fields. Everything else is still done in the indexing stage.
This commit is contained in:
Sarah Hoffmann
2021-08-20 21:53:13 +02:00
parent 925195725d
commit 28ee3d0949
4 changed files with 61 additions and 15 deletions

View File

@@ -11,8 +11,11 @@
CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
OUT name HSTORE,
OUT address HSTORE,
OUT country_feature VARCHAR)
OUT country_feature VARCHAR,
OUT linked_place_id BIGINT)
AS $$
DECLARE
location RECORD;
BEGIN
-- For POI nodes, check if the address should be derived from a surrounding
-- building.
@@ -34,6 +37,18 @@ BEGIN
address := address - '_unlisted_place'::TEXT;
name := p.name;
-- Names of linked places need to be merged in, so search for a linkable
-- place already here.
SELECT * INTO location FROM find_linked_place(p);
IF location.place_id is not NULL THEN
linked_place_id := location.place_id;
IF NOT location.name IS NULL THEN
name := location.name || name;
END IF;
END IF;
country_feature := CASE WHEN p.admin_level = 2
and p.class = 'boundary' and p.type = 'administrative'
and p.osm_type = 'R'
@@ -683,6 +698,8 @@ DECLARE
nameaddress_vector INTEGER[];
addr_nameaddress_vector INTEGER[];
linked_place BIGINT;
linked_node_id BIGINT;
linked_importance FLOAT;
linked_wikipedia TEXT;
@@ -718,9 +735,14 @@ BEGIN
NEW.extratags := NEW.extratags - 'linked_place'::TEXT;
-- NEW.linked_place_id contains the precomputed linkee. Save this and restore
-- the previous link status.
linked_place := NEW.linked_place_id;
NEW.linked_place_id := OLD.linked_place_id;
IF NEW.linked_place_id is not null THEN
NEW.token_info := null;
{% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %}
{% if debug %}RAISE WARNING 'place already linked to %', OLD.linked_place_id;{% endif %}
RETURN NEW;
END IF;
@@ -956,8 +978,9 @@ BEGIN
-- ---------------------------------------------------------------------------
-- Full indexing
{% if debug %}RAISE WARNING 'Using full index mode for % %', NEW.osm_type, NEW.osm_id;{% endif %}
SELECT * INTO location FROM find_linked_place(NEW);
IF location.place_id is not null THEN
IF linked_place is not null THEN
SELECT * INTO location FROM placex WHERE place_id = linked_place;
{% if debug %}RAISE WARNING 'Linked %', location;{% endif %}
-- Use the linked point as the centre point of the geometry,
@@ -974,11 +997,6 @@ BEGIN
NEW.rank_address := location.rank_address;
END IF;
-- merge in the label name
IF NOT location.name IS NULL THEN
NEW.name := location.name || NEW.name;
END IF;
-- merge in extra tags
NEW.extratags := hstore('linked_' || location.class, location.type)
|| coalesce(location.extratags, ''::hstore)

View File

@@ -16,6 +16,7 @@ class AbstractPlacexRunner:
""" Returns SQL commands for indexing of the placex table.
"""
SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
UPDATE_LINE = "(%s, %s::hstore, %s::hstore, %s::int, %s::jsonb)"
def __init__(self, rank, analyzer):
self.rank = rank
@@ -27,10 +28,11 @@ class AbstractPlacexRunner:
def _index_sql(num_places):
return pysql.SQL(
""" UPDATE placex
SET indexed_status = 0, address = v.addr, token_info = v.ti
FROM (VALUES {}) as v(id, addr, ti)
SET indexed_status = 0, address = v.addr, token_info = v.ti,
name = v.name, linked_place_id = v.linked_place_id
FROM (VALUES {}) as v(id, name, addr, linked_place_id, ti)
WHERE place_id = v.id
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
""").format(_mk_valuelist(AbstractPlacexRunner.UPDATE_LINE, num_places))
@staticmethod
@@ -43,7 +45,8 @@ class AbstractPlacexRunner:
def index_places(self, worker, places):
values = []
for place in places:
values.extend((place[x] for x in ('place_id', 'address')))
for field in ('place_id', 'name', 'address', 'linked_place_id'):
values.append(place[field])
values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
worker.perform(self._index_sql(len(places)), values)

View File

@@ -0,0 +1,22 @@
@DB
Feature: Searching linked places
Tests that information from linked places can be searched correctly
Scenario: Additional names from linked places are searchable
Given the places
| osm | class | type | admin | name | geometry |
| R13 | boundary | administrative | 6 | Garbo | poly-area:0.1 |
Given the places
| osm | class | type | admin | name+name:it | geometry |
| N2 | place | hamlet | 15 | Vario | 0.006 0.00001 |
And the relations
| id | members | tags+type |
| 13 | N2:label | boundary |
When importing
Then placex contains
| object | linked_place_id |
| N2 | R13 |
When sending search query "Vario"
Then results contain
| osm |
| R13 |

View File

@@ -19,8 +19,10 @@ class IndexerTestDB:
with self.conn.cursor() as cur:
cur.execute('CREATE EXTENSION hstore')
cur.execute("""CREATE TABLE placex (place_id BIGINT,
name HSTORE,
class TEXT,
type TEXT,
linked_place_id BIGINT,
rank_address SMALLINT,
rank_search SMALLINT,
indexed_status SMALLINT,
@@ -55,11 +57,12 @@ class IndexerTestDB:
cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
OUT name HSTORE,
OUT address HSTORE,
OUT country_feature VARCHAR)
OUT country_feature VARCHAR,
OUT linked_place_id BIGINT)
AS $$
BEGIN
address := p.address;
name := p.address;
name := p.name;
END;
$$ LANGUAGE plpgsql STABLE;
""")