mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
move postcode normalization into tokenizer
This commit is contained in:
@@ -12,39 +12,47 @@ $$
|
|||||||
LANGUAGE plpgsql IMMUTABLE;
|
LANGUAGE plpgsql IMMUTABLE;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
|
||||||
|
RETURNS HSTORE
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
location RECORD;
|
||||||
|
waynodes BIGINT[];
|
||||||
|
BEGIN
|
||||||
|
IF akeys(in_address) != ARRAY['interpolation'] THEN
|
||||||
|
RETURN in_address;
|
||||||
|
END IF;
|
||||||
|
|
||||||
|
SELECT nodes INTO waynodes FROM planet_osm_ways WHERE id = wayid;
|
||||||
|
FOR location IN
|
||||||
|
SELECT placex.address, placex.osm_id FROM placex
|
||||||
|
WHERE osm_type = 'N' and osm_id = ANY(waynodes)
|
||||||
|
and placex.address is not null
|
||||||
|
and (placex.address ? 'street' or placex.address ? 'place')
|
||||||
|
and indexed_status < 100
|
||||||
|
LOOP
|
||||||
|
-- mark it as a derived address
|
||||||
|
RETURN location.address || in_address || hstore('_inherited', '');
|
||||||
|
END LOOP;
|
||||||
|
|
||||||
|
RETURN in_address;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql STABLE;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
-- find the parent road of the cut road parts
|
-- find the parent road of the cut road parts
|
||||||
CREATE OR REPLACE FUNCTION get_interpolation_parent(wayid BIGINT, street TEXT,
|
CREATE OR REPLACE FUNCTION get_interpolation_parent(street TEXT,
|
||||||
place TEXT, partition SMALLINT,
|
place TEXT, partition SMALLINT,
|
||||||
centroid GEOMETRY, geom GEOMETRY)
|
centroid GEOMETRY, geom GEOMETRY)
|
||||||
RETURNS BIGINT
|
RETURNS BIGINT
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
addr_street TEXT;
|
|
||||||
addr_place TEXT;
|
|
||||||
parent_place_id BIGINT;
|
parent_place_id BIGINT;
|
||||||
|
|
||||||
waynodes BIGINT[];
|
|
||||||
|
|
||||||
location RECORD;
|
location RECORD;
|
||||||
BEGIN
|
BEGIN
|
||||||
addr_street = street;
|
parent_place_id := find_parent_for_address(street, place, partition, centroid);
|
||||||
addr_place = place;
|
|
||||||
|
|
||||||
IF addr_street is null and addr_place is null THEN
|
|
||||||
select nodes from planet_osm_ways where id = wayid INTO waynodes;
|
|
||||||
FOR location IN SELECT placex.address from placex
|
|
||||||
where osm_type = 'N' and osm_id = ANY(waynodes)
|
|
||||||
and placex.address is not null
|
|
||||||
and (placex.address ? 'street' or placex.address ? 'place')
|
|
||||||
and indexed_status < 100
|
|
||||||
limit 1 LOOP
|
|
||||||
addr_street = location.address->'street';
|
|
||||||
addr_place = location.address->'place';
|
|
||||||
END LOOP;
|
|
||||||
END IF;
|
|
||||||
|
|
||||||
parent_place_id := find_parent_for_address(addr_street, addr_place,
|
|
||||||
partition, centroid);
|
|
||||||
|
|
||||||
IF parent_place_id is null THEN
|
IF parent_place_id is null THEN
|
||||||
FOR location IN SELECT place_id FROM placex
|
FOR location IN SELECT place_id FROM placex
|
||||||
@@ -147,17 +155,20 @@ BEGIN
|
|||||||
NEW.interpolationtype = NEW.address->'interpolation';
|
NEW.interpolationtype = NEW.address->'interpolation';
|
||||||
|
|
||||||
place_centroid := ST_PointOnSurface(NEW.linegeo);
|
place_centroid := ST_PointOnSurface(NEW.linegeo);
|
||||||
NEW.parent_place_id = get_interpolation_parent(NEW.osm_id, NEW.address->'street',
|
NEW.parent_place_id = get_interpolation_parent(NEW.address->'street',
|
||||||
NEW.address->'place',
|
NEW.address->'place',
|
||||||
NEW.partition, place_centroid, NEW.linegeo);
|
NEW.partition, place_centroid, NEW.linegeo);
|
||||||
|
|
||||||
IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
|
IF NEW.address is not NULL AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN
|
||||||
interpol_postcode := NEW.address->'postcode';
|
interpol_postcode := NEW.address->'postcode';
|
||||||
housenum := getorcreate_postcode_id(NEW.address->'postcode');
|
|
||||||
ELSE
|
ELSE
|
||||||
interpol_postcode := NULL;
|
interpol_postcode := NULL;
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
|
IF NEW.address ? '_inherited' THEN
|
||||||
|
NEW.address := hstore('interpolation', NEW.interpolationtype);
|
||||||
|
END IF;
|
||||||
|
|
||||||
-- if the line was newly inserted, split the line as necessary
|
-- if the line was newly inserted, split the line as necessary
|
||||||
IF OLD.indexed_status = 1 THEN
|
IF OLD.indexed_status = 1 THEN
|
||||||
select nodes from planet_osm_ways where id = NEW.osm_id INTO waynodes;
|
select nodes from planet_osm_ways where id = NEW.osm_id INTO waynodes;
|
||||||
|
|||||||
@@ -817,10 +817,6 @@ BEGIN
|
|||||||
IF NEW.address is not NULL THEN
|
IF NEW.address is not NULL THEN
|
||||||
addr_street := NEW.address->'street';
|
addr_street := NEW.address->'street';
|
||||||
addr_place := NEW.address->'place';
|
addr_place := NEW.address->'place';
|
||||||
|
|
||||||
IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN
|
|
||||||
i := getorcreate_postcode_id(NEW.address->'postcode');
|
|
||||||
END IF;
|
|
||||||
END IF;
|
END IF;
|
||||||
|
|
||||||
NEW.postcode := null;
|
NEW.postcode := null;
|
||||||
|
|||||||
@@ -34,6 +34,13 @@ AS $$
|
|||||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
||||||
|
RETURNS TEXT
|
||||||
|
AS $$
|
||||||
|
SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
|
||||||
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
|
||||||
-- Return token info that should be saved permanently in the database.
|
-- Return token info that should be saved permanently in the database.
|
||||||
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
|
CREATE OR REPLACE FUNCTION token_strip_info(info JSONB)
|
||||||
RETURNS JSONB
|
RETURNS JSONB
|
||||||
@@ -133,26 +140,26 @@ $$
|
|||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
|
CREATE OR REPLACE FUNCTION create_postcode_id(postcode TEXT)
|
||||||
RETURNS INTEGER
|
RETURNS BOOLEAN
|
||||||
AS $$
|
AS $$
|
||||||
DECLARE
|
DECLARE
|
||||||
|
r RECORD;
|
||||||
lookup_token TEXT;
|
lookup_token TEXT;
|
||||||
lookup_word TEXT;
|
|
||||||
return_word_id INTEGER;
|
return_word_id INTEGER;
|
||||||
BEGIN
|
BEGIN
|
||||||
lookup_word := upper(trim(postcode));
|
lookup_token := ' ' || make_standard_name(postcode);
|
||||||
lookup_token := ' ' || make_standard_name(lookup_word);
|
FOR r IN
|
||||||
SELECT min(word_id) FROM word
|
SELECT word_id FROM word
|
||||||
WHERE word_token = lookup_token and word = lookup_word
|
WHERE word_token = lookup_token and word = postcode
|
||||||
and class='place' and type='postcode'
|
and class='place' and type='postcode'
|
||||||
INTO return_word_id;
|
LOOP
|
||||||
IF return_word_id IS NULL THEN
|
RETURN false;
|
||||||
return_word_id := nextval('seq_word');
|
END LOOP;
|
||||||
INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
|
|
||||||
'place', 'postcode', null, 0);
|
INSERT INTO word VALUES (nextval('seq_word'), lookup_token, postcode,
|
||||||
END IF;
|
'place', 'postcode', null, 0);
|
||||||
RETURN return_word_id;
|
RETURN true;
|
||||||
END;
|
END;
|
||||||
$$
|
$$
|
||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|||||||
@@ -116,7 +116,8 @@ class SetupAll:
|
|||||||
|
|
||||||
if args.continue_at is None or args.continue_at == 'load-data':
|
if args.continue_at is None or args.continue_at == 'load-data':
|
||||||
LOG.warning('Calculate postcodes')
|
LOG.warning('Calculate postcodes')
|
||||||
postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)
|
postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir,
|
||||||
|
tokenizer)
|
||||||
|
|
||||||
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
|
||||||
if args.continue_at is not None and args.continue_at != 'load-data':
|
if args.continue_at is not None and args.continue_at != 'load-data':
|
||||||
|
|||||||
@@ -147,7 +147,7 @@ class Indexer:
|
|||||||
|
|
||||||
if maxrank == 30:
|
if maxrank == 30:
|
||||||
self._index(runners.RankRunner(0, analyzer))
|
self._index(runners.RankRunner(0, analyzer))
|
||||||
self._index(runners.InterpolationRunner(), 20)
|
self._index(runners.InterpolationRunner(analyzer), 20)
|
||||||
self._index(runners.RankRunner(30, analyzer), 20)
|
self._index(runners.RankRunner(30, analyzer), 20)
|
||||||
else:
|
else:
|
||||||
self._index(runners.RankRunner(maxrank, analyzer))
|
self._index(runners.RankRunner(maxrank, analyzer))
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ class AbstractPlacexRunner:
|
|||||||
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
||||||
FROM (VALUES {}) as v(id, addr, ti)
|
FROM (VALUES {}) as v(id, addr, ti)
|
||||||
WHERE place_id = v.id
|
WHERE place_id = v.id
|
||||||
""".format(','.join(["(%s, %s::hstore, %s::json)"] * num_places))
|
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
|
||||||
|
|
||||||
|
|
||||||
def index_places(self, worker, places):
|
def index_places(self, worker, places):
|
||||||
@@ -82,6 +82,10 @@ class InterpolationRunner:
|
|||||||
location_property_osmline.
|
location_property_osmline.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __init__(self, analyzer):
|
||||||
|
self.analyzer = analyzer
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def name():
|
def name():
|
||||||
return "interpolation lines (location_property_osmline)"
|
return "interpolation lines (location_property_osmline)"
|
||||||
@@ -93,15 +97,30 @@ class InterpolationRunner:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def sql_get_objects():
|
def sql_get_objects():
|
||||||
return """SELECT place_id FROM location_property_osmline
|
return """SELECT place_id, get_interpolation_address(address, osm_id) as address
|
||||||
|
FROM location_property_osmline
|
||||||
WHERE indexed_status > 0
|
WHERE indexed_status > 0
|
||||||
ORDER BY geometry_sector"""
|
ORDER BY geometry_sector"""
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def index_places(worker, ids):
|
@functools.lru_cache(maxsize=1)
|
||||||
worker.perform(""" UPDATE location_property_osmline
|
def _index_sql(num_places):
|
||||||
SET indexed_status = 0 WHERE place_id IN ({})
|
return """ UPDATE location_property_osmline
|
||||||
""".format(','.join((str(i[0]) for i in ids))))
|
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
||||||
|
FROM (VALUES {}) as v(id, addr, ti)
|
||||||
|
WHERE place_id = v.id
|
||||||
|
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
|
||||||
|
|
||||||
|
|
||||||
|
def index_places(self, worker, places):
|
||||||
|
values = []
|
||||||
|
for place in places:
|
||||||
|
values.extend((place[x] for x in ('place_id', 'address')))
|
||||||
|
values.append(psycopg2.extras.Json(self.analyzer.process_place(place)))
|
||||||
|
|
||||||
|
worker.perform(self._index_sql(len(places)), values)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class PostcodeRunner:
|
class PostcodeRunner:
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
"""
|
"""
|
||||||
Tokenizer implementing normalisation as used before Nominatim 4.
|
Tokenizer implementing normalisation as used before Nominatim 4.
|
||||||
"""
|
"""
|
||||||
|
from collections import OrderedDict
|
||||||
import logging
|
import logging
|
||||||
import re
|
import re
|
||||||
import shutil
|
import shutil
|
||||||
@@ -213,6 +214,15 @@ class LegacyNameAnalyzer:
|
|||||||
self.conn.close()
|
self.conn.close()
|
||||||
self.conn = None
|
self.conn = None
|
||||||
|
|
||||||
|
|
||||||
|
def add_postcodes_from_db(self):
|
||||||
|
""" Add postcodes from the location_postcode table to the word table.
|
||||||
|
"""
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute("""SELECT count(create_postcode_id(pc))
|
||||||
|
FROM (SELECT distinct(postcode) as pc
|
||||||
|
FROM location_postcode) x""")
|
||||||
|
|
||||||
def process_place(self, place):
|
def process_place(self, place):
|
||||||
""" Determine tokenizer information about the given place.
|
""" Determine tokenizer information about the given place.
|
||||||
|
|
||||||
@@ -226,11 +236,25 @@ class LegacyNameAnalyzer:
|
|||||||
address = place.get('address')
|
address = place.get('address')
|
||||||
|
|
||||||
if address:
|
if address:
|
||||||
|
self._add_postcode(address.get('postcode'))
|
||||||
token_info.add_housenumbers(self.conn, address)
|
token_info.add_housenumbers(self.conn, address)
|
||||||
|
|
||||||
return token_info.data
|
return token_info.data
|
||||||
|
|
||||||
|
|
||||||
|
def _add_postcode(self, postcode):
|
||||||
|
""" Make sure the normalized postcode is present in the word table.
|
||||||
|
"""
|
||||||
|
if not postcode or re.search(r'[:,;]', postcode) is not None:
|
||||||
|
return
|
||||||
|
|
||||||
|
def _create_postcode_from_db(pcode):
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute('SELECT create_postcode_id(%s)', (pcode, ))
|
||||||
|
|
||||||
|
self._cache.postcodes.get(postcode.strip().upper(), _create_postcode_from_db)
|
||||||
|
|
||||||
|
|
||||||
class _TokenInfo:
|
class _TokenInfo:
|
||||||
""" Collect token information to be sent back to the database.
|
""" Collect token information to be sent back to the database.
|
||||||
"""
|
"""
|
||||||
@@ -285,6 +309,32 @@ class _TokenInfo:
|
|||||||
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
|
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
|
||||||
|
|
||||||
|
|
||||||
|
class _LRU:
|
||||||
|
""" Least recently used cache that accepts a generator function to
|
||||||
|
produce the item when there is a cache miss.
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, maxsize=128):
|
||||||
|
self.data = OrderedDict()
|
||||||
|
self.maxsize = maxsize
|
||||||
|
|
||||||
|
def get(self, key, generator):
|
||||||
|
""" Get the item with the given key from the cache. If nothing
|
||||||
|
is found in the cache, generate the value through the
|
||||||
|
generator function and store it in the cache.
|
||||||
|
"""
|
||||||
|
value = self.data.get(key)
|
||||||
|
if value is not None:
|
||||||
|
self.data.move_to_end(key)
|
||||||
|
else:
|
||||||
|
value = generator(key)
|
||||||
|
if len(self.data) >= self.maxsize:
|
||||||
|
self.data.popitem(last=False)
|
||||||
|
self.data[key] = value
|
||||||
|
|
||||||
|
return value
|
||||||
|
|
||||||
|
|
||||||
class _TokenCache:
|
class _TokenCache:
|
||||||
""" Cache for token information to avoid repeated database queries.
|
""" Cache for token information to avoid repeated database queries.
|
||||||
|
|
||||||
@@ -292,6 +342,9 @@ class _TokenCache:
|
|||||||
analyzer.
|
analyzer.
|
||||||
"""
|
"""
|
||||||
def __init__(self, conn):
|
def __init__(self, conn):
|
||||||
|
# various LRU caches
|
||||||
|
self.postcodes = _LRU(maxsize=32)
|
||||||
|
|
||||||
# Lookup houseunumbers up to 100 and cache them
|
# Lookup houseunumbers up to 100 and cache them
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
|
cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ of artificial postcode centroids.
|
|||||||
from nominatim.db.utils import execute_file
|
from nominatim.db.utils import execute_file
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
|
|
||||||
def import_postcodes(dsn, project_dir):
|
def import_postcodes(dsn, project_dir, tokenizer):
|
||||||
""" Set up the initial list of postcodes.
|
""" Set up the initial list of postcodes.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -41,10 +41,11 @@ def import_postcodes(dsn, project_dir):
|
|||||||
INSERT INTO location_postcode
|
INSERT INTO location_postcode
|
||||||
(place_id, indexed_status, country_code, postcode, geometry)
|
(place_id, indexed_status, country_code, postcode, geometry)
|
||||||
SELECT nextval('seq_place'), 1, country_code,
|
SELECT nextval('seq_place'), 1, country_code,
|
||||||
upper(trim (both ' ' from address->'postcode')) as pc,
|
token_normalized_postcode(address->'postcode') as pc,
|
||||||
ST_Centroid(ST_Collect(ST_Centroid(geometry)))
|
ST_Centroid(ST_Collect(ST_Centroid(geometry)))
|
||||||
FROM placex
|
FROM placex
|
||||||
WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
|
WHERE address ? 'postcode'
|
||||||
|
and token_normalized_postcode(address->'postcode') is not null
|
||||||
AND geometry IS NOT null
|
AND geometry IS NOT null
|
||||||
GROUP BY country_code, pc
|
GROUP BY country_code, pc
|
||||||
""")
|
""")
|
||||||
@@ -52,9 +53,10 @@ def import_postcodes(dsn, project_dir):
|
|||||||
cur.execute("""
|
cur.execute("""
|
||||||
INSERT INTO location_postcode
|
INSERT INTO location_postcode
|
||||||
(place_id, indexed_status, country_code, postcode, geometry)
|
(place_id, indexed_status, country_code, postcode, geometry)
|
||||||
SELECT nextval('seq_place'), 1, 'us', postcode,
|
SELECT nextval('seq_place'), 1, 'us',
|
||||||
|
token_normalized_postcode(postcode),
|
||||||
ST_SetSRID(ST_Point(x,y),4326)
|
ST_SetSRID(ST_Point(x,y),4326)
|
||||||
FROM us_postcode WHERE postcode NOT IN
|
FROM us_postcode WHERE token_normalized_postcode(postcode) NOT IN
|
||||||
(SELECT postcode FROM location_postcode
|
(SELECT postcode FROM location_postcode
|
||||||
WHERE country_code = 'us')
|
WHERE country_code = 'us')
|
||||||
""")
|
""")
|
||||||
@@ -62,8 +64,9 @@ def import_postcodes(dsn, project_dir):
|
|||||||
cur.execute("""
|
cur.execute("""
|
||||||
INSERT INTO location_postcode
|
INSERT INTO location_postcode
|
||||||
(place_id, indexed_status, country_code, postcode, geometry)
|
(place_id, indexed_status, country_code, postcode, geometry)
|
||||||
SELECT nextval('seq_place'), 1, 'gb', postcode, geometry
|
SELECT nextval('seq_place'), 1, 'gb',
|
||||||
FROM gb_postcode WHERE postcode NOT IN
|
token_normalized_postcode(postcode), geometry
|
||||||
|
FROM gb_postcode WHERE token_normalized_postcode(postcode) NOT IN
|
||||||
(SELECT postcode FROM location_postcode
|
(SELECT postcode FROM location_postcode
|
||||||
WHERE country_code = 'gb')
|
WHERE country_code = 'gb')
|
||||||
""")
|
""")
|
||||||
@@ -72,9 +75,7 @@ def import_postcodes(dsn, project_dir):
|
|||||||
DELETE FROM word WHERE class='place' and type='postcode'
|
DELETE FROM word WHERE class='place' and type='postcode'
|
||||||
and word NOT IN (SELECT postcode FROM location_postcode)
|
and word NOT IN (SELECT postcode FROM location_postcode)
|
||||||
""")
|
""")
|
||||||
|
|
||||||
cur.execute("""
|
|
||||||
SELECT count(getorcreate_postcode_id(v)) FROM
|
|
||||||
(SELECT distinct(postcode) as v FROM location_postcode) p
|
|
||||||
""")
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
with tokenizer.name_analyzer() as analyzer:
|
||||||
|
analyzer.add_postcodes_from_db()
|
||||||
|
|||||||
@@ -299,7 +299,7 @@ def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def tokenizer_mock(monkeypatch, property_table, temp_db_conn):
|
def tokenizer_mock(monkeypatch, property_table, temp_db_conn, dsn):
|
||||||
""" Sets up the configuration so that the test dummy tokenizer will be
|
""" Sets up the configuration so that the test dummy tokenizer will be
|
||||||
loaded.
|
loaded.
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -43,6 +43,9 @@ class DummyNameAnalyzer:
|
|||||||
"""
|
"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def add_postcodes_from_db(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def process_place(self, place):
|
def process_place(self, place):
|
||||||
""" Determine tokenizer information about the given place.
|
""" Determine tokenizer information about the given place.
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,9 @@ class IndexerTestDB:
|
|||||||
geometry_sector INTEGER)""")
|
geometry_sector INTEGER)""")
|
||||||
cur.execute("""CREATE TABLE location_property_osmline (
|
cur.execute("""CREATE TABLE location_property_osmline (
|
||||||
place_id BIGINT,
|
place_id BIGINT,
|
||||||
|
osm_id BIGINT,
|
||||||
|
address HSTORE,
|
||||||
|
token_info JSONB,
|
||||||
indexed_status SMALLINT,
|
indexed_status SMALLINT,
|
||||||
indexed_date TIMESTAMP,
|
indexed_date TIMESTAMP,
|
||||||
geometry_sector INTEGER)""")
|
geometry_sector INTEGER)""")
|
||||||
@@ -61,6 +64,14 @@ class IndexerTestDB:
|
|||||||
END;
|
END;
|
||||||
$$ LANGUAGE plpgsql STABLE;
|
$$ LANGUAGE plpgsql STABLE;
|
||||||
""")
|
""")
|
||||||
|
cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
|
||||||
|
RETURNS HSTORE AS $$
|
||||||
|
BEGIN
|
||||||
|
RETURN in_address;
|
||||||
|
END;
|
||||||
|
$$ LANGUAGE plpgsql STABLE;
|
||||||
|
""")
|
||||||
|
|
||||||
for table in ('placex', 'location_property_osmline', 'location_postcode'):
|
for table in ('placex', 'location_property_osmline', 'location_postcode'):
|
||||||
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
|
cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
|
||||||
FOR EACH ROW EXECUTE PROCEDURE date_update()
|
FOR EACH ROW EXECUTE PROCEDURE date_update()
|
||||||
@@ -91,9 +102,9 @@ class IndexerTestDB:
|
|||||||
next_id = next(self.osmline_id)
|
next_id = next(self.osmline_id)
|
||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
cur.execute("""INSERT INTO location_property_osmline
|
cur.execute("""INSERT INTO location_property_osmline
|
||||||
(place_id, indexed_status, geometry_sector)
|
(place_id, osm_id, indexed_status, geometry_sector)
|
||||||
VALUES (%s, 1, %s)""",
|
VALUES (%s, %s, 1, %s)""",
|
||||||
(next_id, sector))
|
(next_id, next_id, sector))
|
||||||
return next_id
|
return next_id
|
||||||
|
|
||||||
def add_postcode(self, country, postcode):
|
def add_postcode(self, country, postcode):
|
||||||
|
|||||||
@@ -5,6 +5,11 @@ Tests for functions to maintain the artificial postcode table.
|
|||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
from nominatim.tools import postcodes
|
from nominatim.tools import postcodes
|
||||||
|
import dummy_tokenizer
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def tokenizer():
|
||||||
|
return dummy_tokenizer.DummyTokenizer(None, None)
|
||||||
|
|
||||||
@pytest.fixture
|
@pytest.fixture
|
||||||
def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
|
def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
|
||||||
@@ -20,26 +25,26 @@ def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
|
|||||||
postcode TEXT,
|
postcode TEXT,
|
||||||
geometry GEOMETRY(Geometry, 4326)""")
|
geometry GEOMETRY(Geometry, 4326)""")
|
||||||
temp_db_cursor.execute('CREATE SEQUENCE seq_place')
|
temp_db_cursor.execute('CREATE SEQUENCE seq_place')
|
||||||
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
|
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
||||||
RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
|
RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
||||||
def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
|
def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
|
||||||
postcodes.import_postcodes(dsn, tmp_path)
|
postcodes.import_postcodes(dsn, tmp_path, tokenizer)
|
||||||
|
|
||||||
assert temp_db_cursor.table_exists('gb_postcode')
|
assert temp_db_cursor.table_exists('gb_postcode')
|
||||||
assert temp_db_cursor.table_exists('us_postcode')
|
assert temp_db_cursor.table_exists('us_postcode')
|
||||||
assert temp_db_cursor.table_rows('location_postcode') == 0
|
assert temp_db_cursor.table_rows('location_postcode') == 0
|
||||||
|
|
||||||
|
|
||||||
def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
|
def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path, tokenizer):
|
||||||
temp_db_cursor.execute("""
|
temp_db_cursor.execute("""
|
||||||
INSERT INTO placex (place_id, country_code, address, geometry)
|
INSERT INTO placex (place_id, country_code, address, geometry)
|
||||||
VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
|
VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
|
||||||
""")
|
""")
|
||||||
|
|
||||||
postcodes.import_postcodes(dsn, tmp_path)
|
postcodes.import_postcodes(dsn, tmp_path, tokenizer)
|
||||||
|
|
||||||
rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
|
rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
|
||||||
ST_X(geometry), ST_Y(geometry)
|
ST_X(geometry), ST_Y(geometry)
|
||||||
|
|||||||
Reference in New Issue
Block a user