Code cleaning, tests simplification and use of python3-icu package

This commit is contained in:
AntoJvlt
2021-03-22 23:56:24 +01:00
parent 919469c8fe
commit ff34198569
9 changed files with 68 additions and 162 deletions

View File

@@ -6,8 +6,7 @@ runs:
steps: steps:
- name: Install prerequisites - name: Install prerequisites
run: | run: |
sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
sudo pip install PyICU
shell: bash shell: bash
- name: Download dependencies - name: Download dependencies

View File

@@ -1,6 +1,7 @@
[MASTER] [MASTER]
extension-pkg-whitelist=osmium extension-pkg-whitelist=osmium
ignored-modules=icu
[MESSAGES CONTROL] [MESSAGES CONTROL]

View File

@@ -2,9 +2,7 @@
import sys import sys
import os import os
sys.path.insert(0, '@NOMINATIM_LIBDIR@/lib-python') sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
#Add config directory to the python path for module importation
sys.path.insert(1, '@NOMINATIM_CONFIGDIR@/..')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__) os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)

View File

@@ -5,10 +5,9 @@ import logging
import os import os
import re import re
import subprocess import subprocess
import sys
import json import json
from os.path import isfile from os.path import isfile
from icu import Transliterator # pylint: disable-msg=no-name-in-module from icu import Transliterator
from psycopg2.sql import Identifier, Literal, SQL from psycopg2.sql import Identifier, Literal, SQL
from nominatim.tools.exec_utils import get_url from nominatim.tools.exec_utils import get_url
@@ -32,7 +31,7 @@ def import_from_wiki(args, db_connection, languages=None):
languages = _get_languages(args.config) if not languages else languages languages = _get_languages(args.config) if not languages else languages
#array for pairs of class/type #array for pairs of class/type
pairs = dict() class_type_pairs = set()
transliterator = Transliterator.createFromRules("special-phrases normalizer", transliterator = Transliterator.createFromRules("special-phrases normalizer",
args.config.TERM_NORMALIZATION) args.config.TERM_NORMALIZATION)
@@ -63,14 +62,14 @@ def import_from_wiki(args, db_connection, languages=None):
continue continue
#add class/type to the pairs dict #add class/type to the pairs dict
pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type) class_type_pairs.add((phrase_class, phrase_type))
_process_amenity( _process_amenity(
db_connection, phrase_label, normalized_label, db_connection, phrase_label, normalized_label,
phrase_class, phrase_type, phrase_operator phrase_class, phrase_type, phrase_operator
) )
_create_place_classtype_table_and_indexes(db_connection, args.config, pairs) _create_place_classtype_table_and_indexes(db_connection, args.config, class_type_pairs)
db_connection.commit() db_connection.commit()
LOG.warning('Import done.') LOG.warning('Import done.')
@@ -118,12 +117,8 @@ def _check_sanity(lang, phrase_class, phrase_type, pattern):
Check sanity of given inputs in case somebody added garbage in the wiki. Check sanity of given inputs in case somebody added garbage in the wiki.
If a bad class/type is detected the system will exit with an error. If a bad class/type is detected the system will exit with an error.
""" """
try: if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
sys.exit()
except SystemExit:
LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type) LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
raise
def _process_amenity(db_connection, phrase_label, normalized_label, def _process_amenity(db_connection, phrase_label, normalized_label,
@@ -147,7 +142,7 @@ def _process_amenity(db_connection, phrase_label, normalized_label,
(phrase_label, normalized_label, phrase_class, phrase_type)) (phrase_label, normalized_label, phrase_class, phrase_type))
def _create_place_classtype_table_and_indexes(db_connection, config, pairs): def _create_place_classtype_table_and_indexes(db_connection, config, class_type_pairs):
""" """
Create table place_classtype for each given pair. Create table place_classtype for each given pair.
Also create indexes on place_id and centroid. Also create indexes on place_id and centroid.
@@ -161,7 +156,7 @@ def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
with db_connection.cursor() as db_cursor: with db_connection.cursor() as db_cursor:
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)") db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
for _, pair in pairs.items(): for pair in class_type_pairs.items():
phrase_class = pair[0] phrase_class = pair[0]
phrase_type = pair[1] phrase_type = pair[1]
@@ -188,53 +183,54 @@ def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, p
""" """
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit. Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
""" """
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with db_connection.cursor() as db_cursor: with db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(f""" db_cursor.execute(SQL("""
CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace} CREATE TABLE IF NOT EXISTS {{}} {}
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
WHERE class = {{}} AND type = {{}}""") WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'), .format(Identifier(table_name), Literal(phrase_class),
Literal(phrase_class), Literal(phrase_type))) Literal(phrase_type)))
def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type): def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
""" """
Create indexes on centroid and place_id for the place_classtype table. Create indexes on centroid and place_id for the place_classtype table.
""" """
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
#Index on centroid #Index on centroid
if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'): if not db_connection.index_exists(index_prefix + 'centroid'):
with db_connection.cursor() as db_cursor: with db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(f""" db_cursor.execute(SQL("""
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""") CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
.format(Identifier( .format(Identifier(index_prefix + 'centroid'),
f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""), Identifier(base_table)), sql_tablespace)
Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
#Index on place_id #Index on place_id
if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'): if not db_connection.index_exists(index_prefix + 'place_id'):
with db_connection.cursor() as db_cursor: with db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(f""" db_cursor.execute(SQL(
CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""") """CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
.format(Identifier( .format(Identifier(index_prefix + 'place_id'),
f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""), Identifier(base_table)))
Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type): def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
""" """
Grant access on read to the table place_classtype for the webuser. Grant access on read to the table place_classtype for the webuser.
""" """
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with db_connection.cursor() as db_cursor: with db_connection.cursor() as db_cursor:
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""") db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'), .format(Identifier(table_name), Identifier(config.DATABASE_WEBUSER)))
Identifier(config.DATABASE_WEBUSER)))
def _convert_php_settings_if_needed(args, file_path): def _convert_php_settings_if_needed(args, file_path):
""" """
Convert php settings file of special phrases to json file if it is still in php format. Convert php settings file of special phrases to json file if it is still in php format.
""" """
file, extension = os.path.splitext(file_path) file, extension = os.path.splitext(file_path)
json_file_path = f'{file}.json' json_file_path = file + '.json'
if extension == '.php' and not isfile(json_file_path): if extension == '.php' and not isfile(json_file_path):
try: try:
subprocess.run(['/usr/bin/env', 'php', '-Cq', subprocess.run(['/usr/bin/env', 'php', '-Cq',

View File

@@ -77,7 +77,7 @@ NOMINATIM_TIGER_DATA_PATH=
NOMINATIM_WIKIPEDIA_DATA_PATH= NOMINATIM_WIKIPEDIA_DATA_PATH=
# Configuration file for special phrase import. # Configuration file for special phrase import.
# When unset, the internal default settings from 'settings/phrase_settings.py' # When unset, the internal default settings from 'settings/phrase-settings.json'
# are used. # are used.
NOMINATIM_PHRASE_CONFIG= NOMINATIM_PHRASE_CONFIG=

View File

@@ -4,75 +4,27 @@
import pytest import pytest
from nominatim.tools.special_phrases import _create_place_classtype_indexes, _create_place_classtype_table, _get_wiki_content, _grant_access_to_webuser, _process_amenity from nominatim.tools.special_phrases import _create_place_classtype_indexes, _create_place_classtype_table, _get_wiki_content, _grant_access_to_webuser, _process_amenity
def test_get_wiki_content(): def test_process_amenity_with_operator(temp_db_conn, getorcreate_amenityoperator_funcs):
assert _get_wiki_content('fr') _process_amenity(temp_db_conn, '', '', '', '', 'near')
_process_amenity(temp_db_conn, '', '', '', '', 'in')
def execute_and_verify_add_word(temp_db_conn, phrase_label, normalized_label, def test_process_amenity_without_operator(temp_db_conn, getorcreate_amenity_funcs):
phrase_class, phrase_type): _process_amenity(temp_db_conn, '', '', '', '', '')
_process_amenity(temp_db_conn, phrase_label, normalized_label,
phrase_class, phrase_type, '')
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute(f"""
SELECT * FROM word
WHERE word_token=' {normalized_label}'
AND word='{normalized_label}'
AND class='{phrase_class}'
AND type='{phrase_type}'
AND type='{phrase_type}'""")
return temp_db_cursor.fetchone()
def execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label,
phrase_class, phrase_type, phrase_operator):
_process_amenity(temp_db_conn, phrase_label, normalized_label,
phrase_class, phrase_type, phrase_operator)
with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute(f"""
SELECT * FROM word
WHERE word_token=' {normalized_label}'
AND word='{normalized_label}'
AND class='{phrase_class}'
AND type='{phrase_type}'
AND operator='{phrase_operator}'""")
return temp_db_cursor.fetchone()
def test_process_amenity_with_near_operator(temp_db_conn, word_table, amenity_operator_funcs):
phrase_label = ' label '
normalized_label = 'label'
phrase_class = 'class'
phrase_type = 'type'
assert execute_and_verify_add_word(temp_db_conn, phrase_label, normalized_label,
phrase_class, phrase_type)
assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label,
phrase_class, phrase_type, 'near')
assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, normalized_label,
phrase_class, phrase_type, 'in')
def index_exists(db_connect, index):
""" Check that an index with the given name exists in the database.
"""
with db_connect.cursor() as cur:
cur.execute("""SELECT tablename FROM pg_indexes
WHERE indexname = %s and schemaname = 'public'""", (index, ))
if cur.rowcount == 0:
return False
return True
def test_create_place_classtype_indexes(temp_db_conn): def test_create_place_classtype_indexes(temp_db_conn):
phrase_class = 'class' phrase_class = 'class'
phrase_type = 'type' phrase_type = 'type'
table_name = f'place_classtype_{phrase_class}_{phrase_type}' table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor: with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute("CREATE EXTENSION postgis;") temp_db_cursor.execute("CREATE EXTENSION postgis;")
temp_db_cursor.execute(f'CREATE TABLE {table_name}(place_id BIGINT, centroid GEOMETRY)') temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
_create_place_classtype_indexes(temp_db_conn, '', phrase_class, phrase_type) _create_place_classtype_indexes(temp_db_conn, '', phrase_class, phrase_type)
centroid_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid') centroid_index_exists = temp_db_conn.index_exists(index_prefix + 'centroid')
place_id_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id') place_id_index_exists = temp_db_conn.index_exists(index_prefix + 'place_id')
assert centroid_index_exists and place_id_index_exists assert centroid_index_exists and place_id_index_exists
@@ -93,10 +45,10 @@ def test_create_place_classtype_table(temp_db_conn, placex_table):
def test_grant_access_to_web_user(temp_db_conn, def_config): def test_grant_access_to_web_user(temp_db_conn, def_config):
phrase_class = 'class' phrase_class = 'class'
phrase_type = 'type' phrase_type = 'type'
table_name = f'place_classtype_{phrase_class}_{phrase_type}' table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
with temp_db_conn.cursor() as temp_db_cursor: with temp_db_conn.cursor() as temp_db_cursor:
temp_db_cursor.execute(f'CREATE TABLE {table_name}()') temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
_grant_access_to_webuser(temp_db_conn, def_config, phrase_class, phrase_type) _grant_access_to_webuser(temp_db_conn, def_config, phrase_class, phrase_type)
@@ -110,65 +62,28 @@ def test_grant_access_to_web_user(temp_db_conn, def_config):
assert result assert result
@pytest.fixture @pytest.fixture
def amenity_operator_funcs(temp_db_cursor): def make_strandard_name_func(temp_db_cursor):
temp_db_cursor.execute(f""" temp_db_cursor.execute(f"""
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
AS $$
DECLARE
o TEXT;
BEGIN BEGIN
RETURN trim(name); --Basically return only the trimed name for the tests RETURN trim(name); --Basically return only the trimed name for the tests
END; END;
$$ $$ LANGUAGE plpgsql IMMUTABLE;""")
LANGUAGE plpgsql IMMUTABLE;
CREATE SEQUENCE seq_word start 1;
@pytest.fixture
def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
temp_db_cursor.execute(f"""
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
lookup_class text, lookup_type text) lookup_class text, lookup_type text)
RETURNS INTEGER RETURNS void as $$
AS $$ BEGIN END;
DECLARE $$ LANGUAGE plpgsql""")
lookup_token TEXT;
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word
WHERE word_token = lookup_token and word = normalized_word
and class = lookup_class and type = lookup_type
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
lookup_class, lookup_type, null, 0);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, @pytest.fixture
normalized_word TEXT, def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
lookup_class text, temp_db_cursor.execute(f"""
lookup_type text, CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
op text) lookup_class text, lookup_type text, op text)
RETURNS INTEGER RETURNS void as $$
AS $$ BEGIN END;
DECLARE $$ LANGUAGE plpgsql""")
lookup_token TEXT;
return_word_id INTEGER;
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word
WHERE word_token = lookup_token and word = normalized_word
and class = lookup_class and type = lookup_type and operator = op
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
lookup_class, lookup_type, null, 0, op);
END IF;
RETURN return_word_id;
END;
$$
LANGUAGE plpgsql;""")

View File

@@ -29,13 +29,13 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
libbz2-dev libpq-dev libproj-dev \ libbz2-dev libpq-dev libproj-dev \
postgresql-server-dev-10 postgresql-10-postgis-2.4 \ postgresql-server-dev-10 postgresql-10-postgis-2.4 \
postgresql-contrib-10 postgresql-10-postgis-scripts \ postgresql-contrib-10 postgresql-10-postgis-scripts \
php php-pgsql php-intl python3-pip \ php php-pgsql php-intl libicu-dev python3-pip \
python3-psycopg2 python3-psutil python3-jinja2 git libicu-dev python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so # The python-dotenv package that comes with Ubuntu 18.04 is too old, so
# install the latest version from pip: # install the latest version from pip:
pip3 install python-dotenv PyICU pip3 install python-dotenv
# #
# System Configuration # System Configuration

View File

@@ -32,11 +32,8 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
libbz2-dev libpq-dev libproj-dev \ libbz2-dev libpq-dev libproj-dev \
postgresql-server-dev-12 postgresql-12-postgis-3 \ postgresql-server-dev-12 postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \ postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php php-pgsql php-intl python3-dotenv \ php php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 git libicu-dev python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
#Python packages:
pip3 install PyICU
# #
# System Configuration # System Configuration