mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-06 18:14:16 +00:00
Added --no-replace command for special phrases importation and added corresponding tests
This commit is contained in:
@@ -27,6 +27,8 @@ class ImportSpecialPhrases:
|
|||||||
help='Import special phrases from the OSM wiki to the database.')
|
help='Import special phrases from the OSM wiki to the database.')
|
||||||
group.add_argument('--import-from-csv', metavar='FILE',
|
group.add_argument('--import-from-csv', metavar='FILE',
|
||||||
help='Import special phrases from a CSV file.')
|
help='Import special phrases from a CSV file.')
|
||||||
|
group.add_argument('--no-replace', action='store_true',
|
||||||
|
help='Keep the old phrases and only add the new ones.')
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def run(args):
|
def run(args):
|
||||||
@@ -51,7 +53,8 @@ class ImportSpecialPhrases:
|
|||||||
from ..tokenizer import factory as tokenizer_factory
|
from ..tokenizer import factory as tokenizer_factory
|
||||||
|
|
||||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||||
|
should_replace = not args.no_replace
|
||||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||||
SPImporter(
|
SPImporter(
|
||||||
args.config, args.phplib_dir, db_connection, loader
|
args.config, args.phplib_dir, db_connection, loader
|
||||||
).import_phrases(tokenizer)
|
).import_phrases(tokenizer, should_replace)
|
||||||
|
|||||||
@@ -306,7 +306,7 @@ class LegacyICUNameAnalyzer:
|
|||||||
# WHERE word_id is null and type = 'postcode'""")
|
# WHERE word_id is null and type = 'postcode'""")
|
||||||
|
|
||||||
|
|
||||||
def update_special_phrases(self, phrases):
|
def update_special_phrases(self, phrases, should_replace):
|
||||||
""" Replace the search index for special phrases with the new phrases.
|
""" Replace the search index for special phrases with the new phrases.
|
||||||
"""
|
"""
|
||||||
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
|
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
|
||||||
@@ -345,7 +345,7 @@ class LegacyICUNameAnalyzer:
|
|||||||
columns=['word', 'word_token', 'class', 'type',
|
columns=['word', 'word_token', 'class', 'type',
|
||||||
'operator', 'search_name_count'])
|
'operator', 'search_name_count'])
|
||||||
|
|
||||||
if to_delete:
|
if to_delete and should_replace:
|
||||||
psycopg2.extras.execute_values(
|
psycopg2.extras.execute_values(
|
||||||
cur,
|
cur,
|
||||||
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
||||||
|
|||||||
@@ -314,7 +314,7 @@ class LegacyNameAnalyzer:
|
|||||||
FROM location_postcode) x""")
|
FROM location_postcode) x""")
|
||||||
|
|
||||||
|
|
||||||
def update_special_phrases(self, phrases):
|
def update_special_phrases(self, phrases, should_replace):
|
||||||
""" Replace the search index for special phrases with the new phrases.
|
""" Replace the search index for special phrases with the new phrases.
|
||||||
"""
|
"""
|
||||||
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
|
norm_phrases = set(((self.normalize(p[0]), p[1], p[2], p[3])
|
||||||
@@ -343,7 +343,7 @@ class LegacyNameAnalyzer:
|
|||||||
FROM (VALUES %s) as v(name, class, type, op))""",
|
FROM (VALUES %s) as v(name, class, type, op))""",
|
||||||
to_add)
|
to_add)
|
||||||
|
|
||||||
if to_delete:
|
if to_delete and should_replace:
|
||||||
psycopg2.extras.execute_values(
|
psycopg2.extras.execute_values(
|
||||||
cur,
|
cur,
|
||||||
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
||||||
|
|||||||
@@ -23,7 +23,7 @@ LOG = logging.getLogger()
|
|||||||
class SPImporter():
|
class SPImporter():
|
||||||
# pylint: disable-msg=too-many-instance-attributes
|
# pylint: disable-msg=too-many-instance-attributes
|
||||||
"""
|
"""
|
||||||
Class handling the process of special phrases importations into the database.
|
Class handling the process of special phrases importation into the database.
|
||||||
|
|
||||||
Take a sp loader which load the phrases from an external source.
|
Take a sp loader which load the phrases from an external source.
|
||||||
"""
|
"""
|
||||||
@@ -42,10 +42,14 @@ class SPImporter():
|
|||||||
#special phrases class/type on the wiki.
|
#special phrases class/type on the wiki.
|
||||||
self.table_phrases_to_delete = set()
|
self.table_phrases_to_delete = set()
|
||||||
|
|
||||||
def import_phrases(self, tokenizer):
|
def import_phrases(self, tokenizer, should_replace):
|
||||||
"""
|
"""
|
||||||
Iterate through all specified languages and
|
Iterate through all SpecialPhrases extracted from the
|
||||||
extract corresponding special phrases from the wiki.
|
loader and import them into the database.
|
||||||
|
|
||||||
|
If should_replace is set to True only the loaded phrases
|
||||||
|
will be kept into the database. All other phrases already
|
||||||
|
in the database will be removed.
|
||||||
"""
|
"""
|
||||||
LOG.warning('Special phrases importation starting')
|
LOG.warning('Special phrases importation starting')
|
||||||
self._fetch_existing_place_classtype_tables()
|
self._fetch_existing_place_classtype_tables()
|
||||||
@@ -60,11 +64,12 @@ class SPImporter():
|
|||||||
class_type_pairs.update(result)
|
class_type_pairs.update(result)
|
||||||
|
|
||||||
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
self._create_place_classtype_table_and_indexes(class_type_pairs)
|
||||||
self._remove_non_existent_tables_from_db()
|
if should_replace:
|
||||||
|
self._remove_non_existent_tables_from_db()
|
||||||
self.db_connection.commit()
|
self.db_connection.commit()
|
||||||
|
|
||||||
with tokenizer.name_analyzer() as analyzer:
|
with tokenizer.name_analyzer() as analyzer:
|
||||||
analyzer.update_special_phrases(self.word_phrases)
|
analyzer.update_special_phrases(self.word_phrases, should_replace)
|
||||||
|
|
||||||
LOG.warning('Import done.')
|
LOG.warning('Import done.')
|
||||||
self.statistics_handler.notify_import_done()
|
self.statistics_handler.notify_import_done()
|
||||||
|
|||||||
@@ -54,7 +54,7 @@ class DummyNameAnalyzer:
|
|||||||
def add_postcodes_from_db(self):
|
def add_postcodes_from_db(self):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def update_special_phrases(self, phrases):
|
def update_special_phrases(self, phrases, should_replace):
|
||||||
self.analyser_cache['special_phrases'] = phrases
|
self.analyser_cache['special_phrases'] = phrases
|
||||||
|
|
||||||
def add_country_names(self, code, names):
|
def add_country_names(self, code, names):
|
||||||
|
|||||||
@@ -255,18 +255,27 @@ def test_index_command(mock_func_factory, temp_db_cursor, tokenizer_mock,
|
|||||||
assert bnd_mock.called == do_bnds
|
assert bnd_mock.called == do_bnds
|
||||||
assert rank_mock.called == do_ranks
|
assert rank_mock.called == do_ranks
|
||||||
|
|
||||||
def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock):
|
@pytest.mark.parametrize("no_replace", [(True), (False)])
|
||||||
|
def test_special_phrases_wiki_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
|
||||||
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
|
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
|
||||||
|
|
||||||
call_nominatim('special-phrases', '--import-from-wiki')
|
if no_replace:
|
||||||
|
call_nominatim('special-phrases', '--import-from-wiki', '--no-replace')
|
||||||
|
else:
|
||||||
|
call_nominatim('special-phrases', '--import-from-wiki')
|
||||||
|
|
||||||
assert func.called == 1
|
assert func.called == 1
|
||||||
|
|
||||||
def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock):
|
@pytest.mark.parametrize("no_replace", [(True), (False)])
|
||||||
|
def test_special_phrases_csv_command(temp_db, mock_func_factory, tokenizer_mock, no_replace):
|
||||||
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
|
func = mock_func_factory(nominatim.clicmd.special_phrases.SPImporter, 'import_phrases')
|
||||||
testdata = Path('__file__') / '..' / '..' / 'testdb'
|
testdata = SRC_DIR / 'test' / 'testdb'
|
||||||
csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
|
csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
|
||||||
call_nominatim('special-phrases', '--import-from-csv', csv_path)
|
|
||||||
|
if no_replace:
|
||||||
|
call_nominatim('special-phrases', '--import-from-csv', csv_path, '--no-replace')
|
||||||
|
else:
|
||||||
|
call_nominatim('special-phrases', '--import-from-csv', csv_path)
|
||||||
|
|
||||||
assert func.called == 1
|
assert func.called == 1
|
||||||
|
|
||||||
|
|||||||
@@ -209,7 +209,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor,
|
|||||||
("König bei", "amenity", "royal", "near"),
|
("König bei", "amenity", "royal", "near"),
|
||||||
("Könige", "amenity", "royal", "-"),
|
("Könige", "amenity", "royal", "-"),
|
||||||
("strasse", "highway", "primary", "in")
|
("strasse", "highway", "primary", "in")
|
||||||
])
|
], True)
|
||||||
|
|
||||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||||
FROM word WHERE class != 'place'""") \
|
FROM word WHERE class != 'place'""") \
|
||||||
@@ -226,11 +226,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor,
|
|||||||
|
|
||||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||||
|
|
||||||
analyzer.update_special_phrases([])
|
analyzer.update_special_phrases([], True)
|
||||||
|
|
||||||
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,
|
||||||
|
make_standard_name):
|
||||||
|
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||||
|
VALUES (' foo', 'foo', 'amenity', 'prison', 'in'),
|
||||||
|
(' bar', 'bar', 'highway', 'road', null)""")
|
||||||
|
|
||||||
|
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||||
|
|
||||||
|
analyzer.update_special_phrases([], False)
|
||||||
|
|
||||||
|
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||||
|
|
||||||
|
|
||||||
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
|
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
|
||||||
make_standard_name):
|
make_standard_name):
|
||||||
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||||
@@ -243,7 +256,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor,
|
|||||||
('prison', 'amenity', 'prison', 'in'),
|
('prison', 'amenity', 'prison', 'in'),
|
||||||
('bar', 'highway', 'road', '-'),
|
('bar', 'highway', 'road', '-'),
|
||||||
('garden', 'leisure', 'garden', 'near')
|
('garden', 'leisure', 'garden', 'near')
|
||||||
])
|
], True)
|
||||||
|
|
||||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||||
FROM word WHERE class != 'place'""") \
|
FROM word WHERE class != 'place'""") \
|
||||||
|
|||||||
@@ -159,7 +159,7 @@ def test_update_special_phrase_empty_table(analyzer, word_table, temp_db_cursor)
|
|||||||
("König bei", "amenity", "royal", "near"),
|
("König bei", "amenity", "royal", "near"),
|
||||||
("Könige", "amenity", "royal", "-"),
|
("Könige", "amenity", "royal", "-"),
|
||||||
("street", "highway", "primary", "in")
|
("street", "highway", "primary", "in")
|
||||||
])
|
], True)
|
||||||
|
|
||||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||||
FROM word WHERE class != 'place'""") \
|
FROM word WHERE class != 'place'""") \
|
||||||
@@ -176,11 +176,24 @@ def test_update_special_phrase_delete_all(analyzer, word_table, temp_db_cursor):
|
|||||||
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||||
|
|
||||||
with analyzer() as a:
|
with analyzer() as a:
|
||||||
a.update_special_phrases([])
|
a.update_special_phrases([], True)
|
||||||
|
|
||||||
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
assert 0 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||||
|
|
||||||
|
|
||||||
|
def test_update_special_phrases_no_replace(analyzer, word_table, temp_db_cursor,):
|
||||||
|
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||||
|
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
|
||||||
|
(' BAR', 'bar', 'highway', 'road', null)""")
|
||||||
|
|
||||||
|
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||||
|
|
||||||
|
with analyzer() as a:
|
||||||
|
a.update_special_phrases([], False)
|
||||||
|
|
||||||
|
assert 2 == temp_db_cursor.scalar("SELECT count(*) FROM word WHERE class != 'place'""")
|
||||||
|
|
||||||
|
|
||||||
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
|
def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
|
||||||
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
temp_db_cursor.execute("""INSERT INTO word (word_token, word, class, type, operator)
|
||||||
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
|
VALUES (' FOO', 'foo', 'amenity', 'prison', 'in'),
|
||||||
@@ -193,7 +206,7 @@ def test_update_special_phrase_modify(analyzer, word_table, temp_db_cursor):
|
|||||||
('prison', 'amenity', 'prison', 'in'),
|
('prison', 'amenity', 'prison', 'in'),
|
||||||
('bar', 'highway', 'road', '-'),
|
('bar', 'highway', 'road', '-'),
|
||||||
('garden', 'leisure', 'garden', 'near')
|
('garden', 'leisure', 'garden', 'near')
|
||||||
])
|
], True)
|
||||||
|
|
||||||
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
assert temp_db_cursor.row_set("""SELECT word_token, word, class, type, operator
|
||||||
FROM word WHERE class != 'place'""") \
|
FROM word WHERE class != 'place'""") \
|
||||||
|
|||||||
@@ -185,8 +185,9 @@ def test_remove_non_existent_tables_from_db(sp_importer, default_phrases,
|
|||||||
tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
|
tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("should_replace", [(True), (False)])
|
||||||
def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
||||||
placex_table, tokenizer_mock):
|
placex_table, tokenizer_mock, should_replace):
|
||||||
"""
|
"""
|
||||||
Check that the main import_phrases() method is well executed.
|
Check that the main import_phrases() method is well executed.
|
||||||
It should create the place_classtype table, the place_id and centroid indexes,
|
It should create the place_classtype table, the place_id and centroid indexes,
|
||||||
@@ -202,10 +203,10 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
|||||||
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
|
CREATE TABLE place_classtype_wrongclass_wrongtype();""")
|
||||||
|
|
||||||
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
|
monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
|
||||||
mock_get_wiki_content)
|
mock_get_wiki_content)
|
||||||
|
|
||||||
tokenizer = tokenizer_mock()
|
tokenizer = tokenizer_mock()
|
||||||
sp_importer.import_phrases(tokenizer)
|
sp_importer.import_phrases(tokenizer, should_replace)
|
||||||
|
|
||||||
assert len(tokenizer.analyser_cache['special_phrases']) == 18
|
assert len(tokenizer.analyser_cache['special_phrases']) == 18
|
||||||
|
|
||||||
@@ -216,7 +217,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
|||||||
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
|
assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
|
||||||
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
|
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
|
||||||
assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
|
assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
|
||||||
assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
|
if should_replace:
|
||||||
|
assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
|
||||||
|
|
||||||
#Format (query, should_return_something_bool) use to easily execute all asserts
|
#Format (query, should_return_something_bool) use to easily execute all asserts
|
||||||
queries_tests = set()
|
queries_tests = set()
|
||||||
@@ -237,7 +239,8 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
|||||||
WHERE table_schema='public'
|
WHERE table_schema='public'
|
||||||
AND table_name = 'place_classtype_wrongclass_wrongtype';
|
AND table_name = 'place_classtype_wrongclass_wrongtype';
|
||||||
"""
|
"""
|
||||||
queries_tests.add((query_wrong_table, False))
|
if should_replace:
|
||||||
|
queries_tests.add((query_wrong_table, False))
|
||||||
|
|
||||||
with temp_db_conn.cursor() as temp_db_cursor:
|
with temp_db_conn.cursor() as temp_db_cursor:
|
||||||
for query in queries_tests:
|
for query in queries_tests:
|
||||||
@@ -247,7 +250,7 @@ def test_import_phrases(monkeypatch, temp_db_conn, def_config, sp_importer,
|
|||||||
else:
|
else:
|
||||||
assert not temp_db_cursor.fetchone()
|
assert not temp_db_cursor.fetchone()
|
||||||
|
|
||||||
def mock_get_wiki_content(lang):
|
def mock_get_wiki_content(self, lang):
|
||||||
"""
|
"""
|
||||||
Mock the _get_wiki_content() method to return
|
Mock the _get_wiki_content() method to return
|
||||||
static xml test file content.
|
static xml test file content.
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ def test_parse_csv(sp_csv_loader):
|
|||||||
phrases = sp_csv_loader.parse_csv()
|
phrases = sp_csv_loader.parse_csv()
|
||||||
assert check_phrases_content(phrases)
|
assert check_phrases_content(phrases)
|
||||||
|
|
||||||
|
|
||||||
def test_next(sp_csv_loader):
|
def test_next(sp_csv_loader):
|
||||||
"""
|
"""
|
||||||
Test objects returned from the next() method.
|
Test objects returned from the next() method.
|
||||||
|
|||||||
@@ -47,7 +47,7 @@ def sp_wiki_loader(monkeypatch, def_config):
|
|||||||
mock_get_wiki_content)
|
mock_get_wiki_content)
|
||||||
return loader
|
return loader
|
||||||
|
|
||||||
def mock_get_wiki_content(lang):
|
def mock_get_wiki_content(self, lang):
|
||||||
"""
|
"""
|
||||||
Mock the _get_wiki_content() method to return
|
Mock the _get_wiki_content() method to return
|
||||||
static xml test file content.
|
static xml test file content.
|
||||||
|
|||||||
Reference in New Issue
Block a user