mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Ported functions for the import of special phrases from php to python.
- the command is now --import-special-phrases - the output is not an sql file anymore, data are directly imported to the database. - the little part on the documentation (section data import) has been modified.
This commit is contained in:
@@ -114,7 +114,6 @@ if (BUILD_IMPORTER)
|
|||||||
export.php
|
export.php
|
||||||
query.php
|
query.php
|
||||||
setup.php
|
setup.php
|
||||||
specialphrases.php
|
|
||||||
update.php
|
update.php
|
||||||
warm.php
|
warm.php
|
||||||
)
|
)
|
||||||
@@ -217,7 +216,7 @@ endif()
|
|||||||
include(GNUInstallDirs)
|
include(GNUInstallDirs)
|
||||||
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
|
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
|
||||||
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
|
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
|
||||||
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
|
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}/settings)
|
||||||
|
|
||||||
if (BUILD_IMPORTER)
|
if (BUILD_IMPORTER)
|
||||||
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
|
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
|
||||||
@@ -258,8 +257,9 @@ if (BUILD_API)
|
|||||||
endif()
|
endif()
|
||||||
|
|
||||||
install(FILES settings/env.defaults
|
install(FILES settings/env.defaults
|
||||||
|
settings/__init__.py
|
||||||
settings/address-levels.json
|
settings/address-levels.json
|
||||||
settings/phrase_settings.php
|
settings/phrase_settings.py
|
||||||
settings/import-admin.style
|
settings/import-admin.style
|
||||||
settings/import-street.style
|
settings/import-street.style
|
||||||
settings/import-address.style
|
settings/import-address.style
|
||||||
|
|||||||
@@ -2,7 +2,9 @@
|
|||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
|
|
||||||
sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
|
sys.path.insert(0, '@NOMINATIM_LIBDIR@/lib-python')
|
||||||
|
#Add config directory to the python path for module importation
|
||||||
|
sys.path.insert(1, '@NOMINATIM_CONFIGDIR@/..')
|
||||||
|
|
||||||
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
|
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
|
||||||
|
|
||||||
|
|||||||
@@ -268,10 +268,9 @@ running this function.
|
|||||||
|
|
||||||
If you want to be able to search for places by their type through
|
If you want to be able to search for places by their type through
|
||||||
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
|
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
|
||||||
you also need to enable these key phrases like this:
|
you also need to import these key phrases like this:
|
||||||
|
|
||||||
nominatim special-phrases --from-wiki > specialphrases.sql
|
nominatim import-special-phrases --from-wiki
|
||||||
psql -d nominatim -f specialphrases.sql
|
|
||||||
|
|
||||||
Note that this command downloads the phrases from the wiki link above. You
|
Note that this command downloads the phrases from the wiki link above. You
|
||||||
need internet access for the step.
|
need internet access for the step.
|
||||||
|
|||||||
@@ -118,7 +118,7 @@ $$
|
|||||||
LANGUAGE plpgsql;
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
|
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT,
|
||||||
lookup_class text, lookup_type text)
|
lookup_class text, lookup_type text)
|
||||||
RETURNS INTEGER
|
RETURNS INTEGER
|
||||||
AS $$
|
AS $$
|
||||||
@@ -128,12 +128,12 @@ DECLARE
|
|||||||
BEGIN
|
BEGIN
|
||||||
lookup_token := ' '||trim(lookup_word);
|
lookup_token := ' '||trim(lookup_word);
|
||||||
SELECT min(word_id) FROM word
|
SELECT min(word_id) FROM word
|
||||||
WHERE word_token = lookup_token and word = normalized_word
|
WHERE word_token = lookup_token and word = lookup_word
|
||||||
and class = lookup_class and type = lookup_type
|
and class = lookup_class and type = lookup_type
|
||||||
INTO return_word_id;
|
INTO return_word_id;
|
||||||
IF return_word_id IS NULL THEN
|
IF return_word_id IS NULL THEN
|
||||||
return_word_id := nextval('seq_word');
|
return_word_id := nextval('seq_word');
|
||||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
|
||||||
lookup_class, lookup_type, null, 0);
|
lookup_class, lookup_type, null, 0);
|
||||||
END IF;
|
END IF;
|
||||||
RETURN return_word_id;
|
RETURN return_word_id;
|
||||||
@@ -143,7 +143,6 @@ LANGUAGE plpgsql;
|
|||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
|
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
|
||||||
normalized_word TEXT,
|
|
||||||
lookup_class text,
|
lookup_class text,
|
||||||
lookup_type text,
|
lookup_type text,
|
||||||
op text)
|
op text)
|
||||||
@@ -155,12 +154,12 @@ DECLARE
|
|||||||
BEGIN
|
BEGIN
|
||||||
lookup_token := ' '||trim(lookup_word);
|
lookup_token := ' '||trim(lookup_word);
|
||||||
SELECT min(word_id) FROM word
|
SELECT min(word_id) FROM word
|
||||||
WHERE word_token = lookup_token and word = normalized_word
|
WHERE word_token = lookup_token and word = lookup_word
|
||||||
and class = lookup_class and type = lookup_type and operator = op
|
and class = lookup_class and type = lookup_type and operator = op
|
||||||
INTO return_word_id;
|
INTO return_word_id;
|
||||||
IF return_word_id IS NULL THEN
|
IF return_word_id IS NULL THEN
|
||||||
return_word_id := nextval('seq_word');
|
return_word_id := nextval('seq_word');
|
||||||
INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
|
INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
|
||||||
lookup_class, lookup_type, null, 0, op);
|
lookup_class, lookup_type, null, 0, op);
|
||||||
END IF;
|
END IF;
|
||||||
RETURN return_word_id;
|
RETURN return_word_id;
|
||||||
|
|||||||
@@ -112,30 +112,6 @@ class CommandlineParser:
|
|||||||
# pylint: disable=C0111
|
# pylint: disable=C0111
|
||||||
# Using non-top-level imports to make pyosmium optional for replication only.
|
# Using non-top-level imports to make pyosmium optional for replication only.
|
||||||
# pylint: disable=E0012,C0415
|
# pylint: disable=E0012,C0415
|
||||||
|
|
||||||
|
|
||||||
class SetupSpecialPhrases:
|
|
||||||
"""\
|
|
||||||
Maintain special phrases.
|
|
||||||
"""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def add_args(parser):
|
|
||||||
group = parser.add_argument_group('Input arguments')
|
|
||||||
group.add_argument('--from-wiki', action='store_true',
|
|
||||||
help='Pull special phrases from the OSM wiki.')
|
|
||||||
group = parser.add_argument_group('Output arguments')
|
|
||||||
group.add_argument('-o', '--output', default='-',
|
|
||||||
help="""File to write the preprocessed phrases to.
|
|
||||||
If omitted, it will be written to stdout.""")
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def run(args):
|
|
||||||
if args.output != '-':
|
|
||||||
raise NotImplementedError('Only output to stdout is currently implemented.')
|
|
||||||
return run_legacy_script('specialphrases.php', '--wiki-import', nominatim_env=args)
|
|
||||||
|
|
||||||
|
|
||||||
class UpdateAddData:
|
class UpdateAddData:
|
||||||
"""\
|
"""\
|
||||||
Add additional data from a file or an online source.
|
Add additional data from a file or an online source.
|
||||||
@@ -278,7 +254,7 @@ def nominatim(**kwargs):
|
|||||||
parser.add_subcommand('freeze', clicmd.SetupFreeze)
|
parser.add_subcommand('freeze', clicmd.SetupFreeze)
|
||||||
parser.add_subcommand('replication', clicmd.UpdateReplication)
|
parser.add_subcommand('replication', clicmd.UpdateReplication)
|
||||||
|
|
||||||
parser.add_subcommand('special-phrases', SetupSpecialPhrases)
|
parser.add_subcommand('import-special-phrases', clicmd.ImportSpecialPhrases)
|
||||||
|
|
||||||
parser.add_subcommand('add-data', UpdateAddData)
|
parser.add_subcommand('add-data', UpdateAddData)
|
||||||
parser.add_subcommand('index', clicmd.UpdateIndex)
|
parser.add_subcommand('index', clicmd.UpdateIndex)
|
||||||
|
|||||||
@@ -10,3 +10,4 @@ from .refresh import UpdateRefresh
|
|||||||
from .admin import AdminFuncs
|
from .admin import AdminFuncs
|
||||||
from .freeze import SetupFreeze
|
from .freeze import SetupFreeze
|
||||||
from .transition import AdminTransition
|
from .transition import AdminTransition
|
||||||
|
from .special_phrases import ImportSpecialPhrases
|
||||||
|
|||||||
29
nominatim/clicmd/special_phrases.py
Normal file
29
nominatim/clicmd/special_phrases.py
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
"""
|
||||||
|
Implementation of the 'import-special-phrases' command.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
from nominatim.tools.special_phrases import import_from_wiki
|
||||||
|
from nominatim.db.connection import connect
|
||||||
|
|
||||||
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
|
# Do not repeat documentation of subcommand classes.
|
||||||
|
# pylint: disable=C0111
|
||||||
|
|
||||||
|
class ImportSpecialPhrases:
|
||||||
|
"""\
|
||||||
|
Import special phrases.
|
||||||
|
"""
|
||||||
|
@staticmethod
|
||||||
|
def add_args(parser):
|
||||||
|
group = parser.add_argument_group('Input arguments')
|
||||||
|
group.add_argument('--from-wiki', action='store_true',
|
||||||
|
help='Import special phrases from the OSM wiki to the database.')
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def run(args):
|
||||||
|
if args.from_wiki:
|
||||||
|
LOG.warning('Special phrases importation starting')
|
||||||
|
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||||
|
import_from_wiki(args.config, db_connection)
|
||||||
|
return 0
|
||||||
201
nominatim/tools/special_phrases.py
Normal file
201
nominatim/tools/special_phrases.py
Normal file
@@ -0,0 +1,201 @@
|
|||||||
|
"""
|
||||||
|
Functions to import special phrases into the database.
|
||||||
|
"""
|
||||||
|
import logging
|
||||||
|
import re
|
||||||
|
import sys
|
||||||
|
from psycopg2.sql import Identifier, Literal, SQL
|
||||||
|
from settings.phrase_settings import BLACK_LIST, WHITE_LIST
|
||||||
|
from nominatim.tools.exec_utils import get_url
|
||||||
|
|
||||||
|
LOG = logging.getLogger()
|
||||||
|
|
||||||
|
def import_from_wiki(config, db_connection, languages=None):
|
||||||
|
"""
|
||||||
|
Iterate through all specified languages and
|
||||||
|
extract corresponding special phrases from the wiki.
|
||||||
|
"""
|
||||||
|
#Compile the match regex to increase performance for the following loop.
|
||||||
|
occurence_pattern = re.compile(
|
||||||
|
r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
|
||||||
|
)
|
||||||
|
sanity_check_pattern = re.compile(r'^\w+$')
|
||||||
|
|
||||||
|
languages = _get_languages(config) if not languages else languages
|
||||||
|
|
||||||
|
#array for pairs of class/type
|
||||||
|
pairs = dict()
|
||||||
|
for lang in languages:
|
||||||
|
LOG.warning('Import phrases for lang: %s', lang)
|
||||||
|
wiki_page_xml_content = _get_wiki_content(lang)
|
||||||
|
#One match will be of format [label, class, type, operator, plural]
|
||||||
|
matches = occurence_pattern.findall(wiki_page_xml_content)
|
||||||
|
|
||||||
|
for match in matches:
|
||||||
|
phrase_label = match[0].strip()
|
||||||
|
phrase_class = match[1].strip()
|
||||||
|
phrase_type = match[2].strip()
|
||||||
|
phrase_operator = match[3].strip()
|
||||||
|
#hack around a bug where building=yes was imported withq quotes into the wiki
|
||||||
|
phrase_type = re.sub(r'\"|"', '', phrase_type)
|
||||||
|
|
||||||
|
#sanity check, in case somebody added garbage in the wiki
|
||||||
|
_check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
|
||||||
|
|
||||||
|
#blacklisting: disallow certain class/type combinations
|
||||||
|
if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
|
||||||
|
continue
|
||||||
|
#whitelisting: if class is in whitelist, allow only tags in the list
|
||||||
|
if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
|
||||||
|
continue
|
||||||
|
|
||||||
|
#add class/type to the pairs dict
|
||||||
|
pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
|
||||||
|
|
||||||
|
_process_amenity(
|
||||||
|
db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
|
||||||
|
)
|
||||||
|
|
||||||
|
_create_place_classtype_table_and_indexes(db_connection, config, pairs)
|
||||||
|
db_connection.commit()
|
||||||
|
LOG.warning('Import done.')
|
||||||
|
|
||||||
|
|
||||||
|
def _get_languages(config):
|
||||||
|
"""
|
||||||
|
Get list of all languages from env config file
|
||||||
|
or default if there is no languages configured.
|
||||||
|
The system will extract special phrases only from all specified languages.
|
||||||
|
"""
|
||||||
|
default_languages = [
|
||||||
|
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
|
||||||
|
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
|
||||||
|
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
|
||||||
|
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
|
||||||
|
return config.LANGUAGES or default_languages
|
||||||
|
|
||||||
|
|
||||||
|
def _get_wiki_content(lang):
|
||||||
|
"""
|
||||||
|
Request and return the wiki page's content
|
||||||
|
corresponding to special phrases for a given lang.
|
||||||
|
Requested URL Example :
|
||||||
|
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
|
||||||
|
"""
|
||||||
|
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
|
||||||
|
return get_url(url)
|
||||||
|
|
||||||
|
|
||||||
|
def _check_sanity(lang, phrase_class, phrase_type, pattern):
|
||||||
|
"""
|
||||||
|
Check sanity of given inputs in case somebody added garbage in the wiki.
|
||||||
|
If a bad class/type is detected the system will exit with an error.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
|
||||||
|
sys.exit()
|
||||||
|
except SystemExit:
|
||||||
|
LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
|
||||||
|
raise
|
||||||
|
|
||||||
|
|
||||||
|
def _process_amenity(db_connection, phrase_label, phrase_class, phrase_type, phrase_operator):
|
||||||
|
"""
|
||||||
|
Add phrase lookup and corresponding class and type to the word table based on the operator.
|
||||||
|
"""
|
||||||
|
with db_connection.cursor() as db_cursor:
|
||||||
|
if phrase_operator == 'near':
|
||||||
|
db_cursor.execute("""SELECT getorcreate_amenityoperator(
|
||||||
|
make_standard_name(%s), %s, %s, 'near')""",
|
||||||
|
(phrase_label, phrase_class, phrase_type))
|
||||||
|
elif phrase_operator == 'in':
|
||||||
|
db_cursor.execute("""SELECT getorcreate_amenityoperator(
|
||||||
|
make_standard_name(%s), %s, %s, 'in')""",
|
||||||
|
(phrase_label, phrase_class, phrase_type))
|
||||||
|
else:
|
||||||
|
db_cursor.execute("""SELECT getorcreate_amenity(
|
||||||
|
make_standard_name(%s), %s, %s)""",
|
||||||
|
(phrase_label, phrase_class, phrase_type))
|
||||||
|
|
||||||
|
|
||||||
|
def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
|
||||||
|
"""
|
||||||
|
Create table place_classtype for each given pair.
|
||||||
|
Also create indexes on place_id and centroid.
|
||||||
|
"""
|
||||||
|
LOG.warning('Create tables and indexes...')
|
||||||
|
|
||||||
|
sql_tablespace = config.TABLESPACE_AUX_DATA
|
||||||
|
if sql_tablespace:
|
||||||
|
sql_tablespace = ' TABLESPACE '+sql_tablespace
|
||||||
|
|
||||||
|
with db_connection.cursor() as db_cursor:
|
||||||
|
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
|
||||||
|
|
||||||
|
for _, pair in pairs.items():
|
||||||
|
phrase_class = pair[0]
|
||||||
|
phrase_type = pair[1]
|
||||||
|
|
||||||
|
#Table creation
|
||||||
|
_create_place_classtype_table(
|
||||||
|
db_connection, sql_tablespace, phrase_class, phrase_type
|
||||||
|
)
|
||||||
|
|
||||||
|
#Indexes creation
|
||||||
|
_create_place_classtype_indexes(
|
||||||
|
db_connection, sql_tablespace, phrase_class, phrase_type
|
||||||
|
)
|
||||||
|
|
||||||
|
#Grant access on read to the web user.
|
||||||
|
_grant_access_to_webuser(
|
||||||
|
db_connection, config, phrase_class, phrase_type
|
||||||
|
)
|
||||||
|
|
||||||
|
with db_connection.cursor() as db_cursor:
|
||||||
|
db_cursor.execute("DROP INDEX idx_placex_classtype")
|
||||||
|
|
||||||
|
|
||||||
|
def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, phrase_type):
|
||||||
|
"""
|
||||||
|
Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
|
||||||
|
"""
|
||||||
|
with db_connection.cursor() as db_cursor:
|
||||||
|
db_cursor.execute(SQL(f"""
|
||||||
|
CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace}
|
||||||
|
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
|
||||||
|
WHERE class = {{}} AND type = {{}}""")
|
||||||
|
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
|
||||||
|
Literal(phrase_class), Literal(phrase_type)))
|
||||||
|
|
||||||
|
|
||||||
|
def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
|
||||||
|
"""
|
||||||
|
Create indexes on centroid and place_id for the place_classtype table.
|
||||||
|
"""
|
||||||
|
#Index on centroid
|
||||||
|
if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
|
||||||
|
with db_connection.cursor() as db_cursor:
|
||||||
|
db_cursor.execute(SQL(f"""
|
||||||
|
CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
|
||||||
|
.format(Identifier(
|
||||||
|
f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
|
||||||
|
Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
|
||||||
|
|
||||||
|
#Index on place_id
|
||||||
|
if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
|
||||||
|
with db_connection.cursor() as db_cursor:
|
||||||
|
db_cursor.execute(SQL(f"""
|
||||||
|
CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
|
||||||
|
.format(Identifier(
|
||||||
|
f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
|
||||||
|
Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
|
||||||
|
|
||||||
|
|
||||||
|
def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
|
||||||
|
"""
|
||||||
|
Grant access on read to the table place_classtype for the webuser.
|
||||||
|
"""
|
||||||
|
with db_connection.cursor() as db_cursor:
|
||||||
|
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
|
||||||
|
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
|
||||||
|
Identifier(config.DATABASE_WEBUSER)))
|
||||||
3
settings/__init__.py
Normal file
3
settings/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
|||||||
|
"""
|
||||||
|
Module for settings
|
||||||
|
"""
|
||||||
@@ -77,7 +77,7 @@ NOMINATIM_TIGER_DATA_PATH=
|
|||||||
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
NOMINATIM_WIKIPEDIA_DATA_PATH=
|
||||||
|
|
||||||
# Configuration file for special phrase import.
|
# Configuration file for special phrase import.
|
||||||
# When unset, the internal default settings from 'settings/phrase_settings.php'
|
# When unset, the internal default settings from 'settings/phrase_settings.py'
|
||||||
# are used.
|
# are used.
|
||||||
NOMINATIM_PHRASE_CONFIG=
|
NOMINATIM_PHRASE_CONFIG=
|
||||||
|
|
||||||
|
|||||||
26
settings/phrase_settings.py
Normal file
26
settings/phrase_settings.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
"""
|
||||||
|
These settings control the import of special phrases from the wiki.
|
||||||
|
"""
|
||||||
|
#class/type combinations to exclude
|
||||||
|
BLACK_LIST = {
|
||||||
|
'bounday': [
|
||||||
|
'administrative'
|
||||||
|
],
|
||||||
|
'place': [
|
||||||
|
'house',
|
||||||
|
'houses'
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
|
#If a class is in the white list then all types will
|
||||||
|
#be ignored except the ones given in the list.
|
||||||
|
#Also use this list to exclude an entire class from
|
||||||
|
#special phrases.
|
||||||
|
WHITE_LIST = {
|
||||||
|
'highway': [
|
||||||
|
'bus_stop',
|
||||||
|
'rest_area',
|
||||||
|
'raceway'
|
||||||
|
],
|
||||||
|
'building': []
|
||||||
|
}
|
||||||
@@ -64,7 +64,6 @@ def test_cli_help(capsys):
|
|||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("command,script", [
|
@pytest.mark.parametrize("command,script", [
|
||||||
(('special-phrases',), 'specialphrases'),
|
|
||||||
(('add-data', '--file', 'foo.osm'), 'update'),
|
(('add-data', '--file', 'foo.osm'), 'update'),
|
||||||
(('export',), 'export')
|
(('export',), 'export')
|
||||||
])
|
])
|
||||||
@@ -172,6 +171,12 @@ def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ra
|
|||||||
assert bnd_mock.called == do_bnds
|
assert bnd_mock.called == do_bnds
|
||||||
assert rank_mock.called == do_ranks
|
assert rank_mock.called == do_ranks
|
||||||
|
|
||||||
|
def test_special_phrases_command(temp_db, mock_func_factory):
|
||||||
|
func = mock_func_factory(nominatim.clicmd.special_phrases, 'import_from_wiki')
|
||||||
|
|
||||||
|
call_nominatim('import-special-phrases', '--from-wiki')
|
||||||
|
|
||||||
|
assert func.called == 1
|
||||||
|
|
||||||
@pytest.mark.parametrize("command,func", [
|
@pytest.mark.parametrize("command,func", [
|
||||||
('postcodes', 'update_postcodes'),
|
('postcodes', 'update_postcodes'),
|
||||||
|
|||||||
164
test/python/test_tools_import_special_phrases.py
Normal file
164
test/python/test_tools_import_special_phrases.py
Normal file
@@ -0,0 +1,164 @@
|
|||||||
|
"""
|
||||||
|
Tests for import special phrases functions
|
||||||
|
"""
|
||||||
|
import pytest
|
||||||
|
from nominatim.tools.special_phrases import _create_place_classtype_indexes, _create_place_classtype_table, _get_wiki_content, _grant_access_to_webuser, _process_amenity
|
||||||
|
|
||||||
|
def test_get_wiki_content():
|
||||||
|
assert _get_wiki_content('fr')
|
||||||
|
|
||||||
|
def execute_and_verify_add_word(temp_db_conn, phrase_label, phrase_class, phrase_type):
|
||||||
|
_process_amenity(temp_db_conn, phrase_label, phrase_class, phrase_type, '')
|
||||||
|
|
||||||
|
with temp_db_conn.cursor() as temp_db_cursor:
|
||||||
|
temp_db_cursor.execute(f"""
|
||||||
|
SELECT * FROM word
|
||||||
|
WHERE word_token=' {phrase_label}'
|
||||||
|
AND word='{phrase_label}'
|
||||||
|
AND class='{phrase_class}'
|
||||||
|
AND type='{phrase_type}'""")
|
||||||
|
return temp_db_cursor.fetchone()
|
||||||
|
|
||||||
|
def execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, phrase_class, phrase_type, phrase_operator):
|
||||||
|
_process_amenity(temp_db_conn, phrase_label, phrase_class, phrase_type, phrase_operator)
|
||||||
|
|
||||||
|
with temp_db_conn.cursor() as temp_db_cursor:
|
||||||
|
temp_db_cursor.execute(f"""
|
||||||
|
SELECT * FROM word
|
||||||
|
WHERE word_token=' {phrase_label}'
|
||||||
|
AND word='{phrase_label}'
|
||||||
|
AND class='{phrase_class}'
|
||||||
|
AND type='{phrase_type}'
|
||||||
|
AND operator='{phrase_operator}'""")
|
||||||
|
return temp_db_cursor.fetchone()
|
||||||
|
|
||||||
|
def test_process_amenity_with_near_operator(temp_db_conn, word_table, amenity_operator_funcs):
|
||||||
|
phrase_label = 'label'
|
||||||
|
phrase_class = 'class'
|
||||||
|
phrase_type = 'type'
|
||||||
|
|
||||||
|
assert execute_and_verify_add_word(temp_db_conn, phrase_label, phrase_class, phrase_type)
|
||||||
|
assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, phrase_class, phrase_type, 'near')
|
||||||
|
assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, phrase_class, phrase_type, 'in')
|
||||||
|
|
||||||
|
def index_exists(db_connect, index):
|
||||||
|
""" Check that an index with the given name exists in the database.
|
||||||
|
"""
|
||||||
|
with db_connect.cursor() as cur:
|
||||||
|
cur.execute("""SELECT tablename FROM pg_indexes
|
||||||
|
WHERE indexname = %s and schemaname = 'public'""", (index, ))
|
||||||
|
if cur.rowcount == 0:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def test_create_place_classtype_indexes(temp_db_conn):
|
||||||
|
phrase_class = 'class'
|
||||||
|
phrase_type = 'type'
|
||||||
|
table_name = f'place_classtype_{phrase_class}_{phrase_type}'
|
||||||
|
|
||||||
|
with temp_db_conn.cursor() as temp_db_cursor:
|
||||||
|
temp_db_cursor.execute("CREATE EXTENSION postgis;")
|
||||||
|
temp_db_cursor.execute(f'CREATE TABLE {table_name}(place_id BIGINT, centroid GEOMETRY)')
|
||||||
|
|
||||||
|
_create_place_classtype_indexes(temp_db_conn, '', phrase_class, phrase_type)
|
||||||
|
|
||||||
|
centroid_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid')
|
||||||
|
place_id_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id')
|
||||||
|
|
||||||
|
assert centroid_index_exists and place_id_index_exists
|
||||||
|
|
||||||
|
def test_create_place_classtype_table(temp_db_conn, placex_table):
|
||||||
|
phrase_class = 'class'
|
||||||
|
phrase_type = 'type'
|
||||||
|
_create_place_classtype_table(temp_db_conn, '', phrase_class, phrase_type)
|
||||||
|
|
||||||
|
with temp_db_conn.cursor() as temp_db_cursor:
|
||||||
|
temp_db_cursor.execute(f"""
|
||||||
|
SELECT *
|
||||||
|
FROM information_schema.tables
|
||||||
|
WHERE table_type='BASE TABLE'
|
||||||
|
AND table_name='place_classtype_{phrase_class}_{phrase_type}'""")
|
||||||
|
result = temp_db_cursor.fetchone()
|
||||||
|
assert result
|
||||||
|
|
||||||
|
def test_grant_access_to_web_user(temp_db_conn, def_config):
|
||||||
|
phrase_class = 'class'
|
||||||
|
phrase_type = 'type'
|
||||||
|
table_name = f'place_classtype_{phrase_class}_{phrase_type}'
|
||||||
|
|
||||||
|
with temp_db_conn.cursor() as temp_db_cursor:
|
||||||
|
temp_db_cursor.execute(f'CREATE TABLE {table_name}()')
|
||||||
|
|
||||||
|
_grant_access_to_webuser(temp_db_conn, def_config, phrase_class, phrase_type)
|
||||||
|
|
||||||
|
with temp_db_conn.cursor() as temp_db_cursor:
|
||||||
|
temp_db_cursor.execute(f"""
|
||||||
|
SELECT * FROM information_schema.role_table_grants
|
||||||
|
WHERE table_name='{table_name}'
|
||||||
|
AND grantee='{def_config.DATABASE_WEBUSER}'
|
||||||
|
AND privilege_type='SELECT'""")
|
||||||
|
result = temp_db_cursor.fetchone()
|
||||||
|
assert result
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def amenity_operator_funcs(temp_db_cursor):
|
||||||
|
temp_db_cursor.execute(f"""
|
||||||
|
CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
o TEXT;
|
||||||
|
BEGIN
|
||||||
|
RETURN name; --Basically return the same name for the tests
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql IMMUTABLE;
|
||||||
|
|
||||||
|
CREATE SEQUENCE seq_word start 1;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT,
|
||||||
|
lookup_class text, lookup_type text)
|
||||||
|
RETURNS INTEGER
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
lookup_token TEXT;
|
||||||
|
return_word_id INTEGER;
|
||||||
|
BEGIN
|
||||||
|
lookup_token := ' '||trim(lookup_word);
|
||||||
|
SELECT min(word_id) FROM word
|
||||||
|
WHERE word_token = lookup_token and word = lookup_word
|
||||||
|
and class = lookup_class and type = lookup_type
|
||||||
|
INTO return_word_id;
|
||||||
|
IF return_word_id IS NULL THEN
|
||||||
|
return_word_id := nextval('seq_word');
|
||||||
|
INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
|
||||||
|
lookup_class, lookup_type, null, 0);
|
||||||
|
END IF;
|
||||||
|
RETURN return_word_id;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;
|
||||||
|
|
||||||
|
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
|
||||||
|
lookup_class text,
|
||||||
|
lookup_type text,
|
||||||
|
op text)
|
||||||
|
RETURNS INTEGER
|
||||||
|
AS $$
|
||||||
|
DECLARE
|
||||||
|
lookup_token TEXT;
|
||||||
|
return_word_id INTEGER;
|
||||||
|
BEGIN
|
||||||
|
lookup_token := ' '||trim(lookup_word);
|
||||||
|
SELECT min(word_id) FROM word
|
||||||
|
WHERE word_token = lookup_token and word = lookup_word
|
||||||
|
and class = lookup_class and type = lookup_type and operator = op
|
||||||
|
INTO return_word_id;
|
||||||
|
IF return_word_id IS NULL THEN
|
||||||
|
return_word_id := nextval('seq_word');
|
||||||
|
INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
|
||||||
|
lookup_class, lookup_type, null, 0, op);
|
||||||
|
END IF;
|
||||||
|
RETURN return_word_id;
|
||||||
|
END;
|
||||||
|
$$
|
||||||
|
LANGUAGE plpgsql;""")
|
||||||
Reference in New Issue
Block a user