move country name generation to country_info module

This commit is contained in:
Sarah Hoffmann
2021-09-01 22:08:39 +02:00
parent 284645f505
commit 78fcabade8
5 changed files with 43 additions and 68 deletions

View File

@@ -113,8 +113,8 @@ class SetupAll:
database_import.create_search_indices(conn, args.config, database_import.create_search_indices(conn, args.config,
drop=args.no_updates) drop=args.no_updates)
LOG.warning('Create search index for default country names.') LOG.warning('Create search index for default country names.')
database_import.create_country_names(conn, tokenizer, country_info.create_country_names(conn, tokenizer,
args.config.LANGUAGES) args.config.LANGUAGES)
conn.commit() conn.commit()
if args.no_updates: if args.no_updates:
freeze.drop_update_tables(conn) freeze.drop_update_tables(conn)

View File

@@ -1,6 +1,8 @@
""" """
Functions for importing and managing static country information. Functions for importing and managing static country information.
""" """
import psycopg2.extras
from nominatim.db import utils as db_utils from nominatim.db import utils as db_utils
from nominatim.db.connection import connect from nominatim.db.connection import connect
@@ -16,3 +18,38 @@ def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute('UPDATE country_name SET partition = 0') cur.execute('UPDATE country_name SET partition = 0')
conn.commit() conn.commit()
def create_country_names(conn, tokenizer, languages=None):
""" Add default country names to search index. `languages` is a comma-
separated list of language codes as used in OSM. If `languages` is not
empty then only name translations for the given languages are added
to the index.
"""
if languages:
languages = languages.split(',')
def _include_key(key):
return key == 'name' or \
(key.startswith('name:') and (not languages or key[5:] in languages))
with conn.cursor() as cur:
psycopg2.extras.register_hstore(cur)
cur.execute("""SELECT country_code, name FROM country_name
WHERE country_code is not null""")
with tokenizer.name_analyzer() as analyzer:
for code, name in cur:
names = {'countrycode': code}
if code == 'gb':
names['short_name'] = 'UK'
if code == 'us':
names['short_name'] = 'United States'
# country names (only in languages as provided)
if name:
names.update(((k, v) for k, v in name.items() if _include_key(k)))
analyzer.add_country_names(code, names)
conn.commit()

View File

@@ -8,7 +8,6 @@ import subprocess
from pathlib import Path from pathlib import Path
import psutil import psutil
import psycopg2.extras
from psycopg2 import sql as pysql from psycopg2 import sql as pysql
from nominatim.db.connection import connect, get_pg_env from nominatim.db.connection import connect, get_pg_env
@@ -235,38 +234,3 @@ def create_search_indices(conn, config, drop=False):
sql = SQLPreprocessor(conn, config) sql = SQLPreprocessor(conn, config)
sql.run_sql_file(conn, 'indices.sql', drop=drop) sql.run_sql_file(conn, 'indices.sql', drop=drop)
def create_country_names(conn, tokenizer, languages=None):
""" Add default country names to search index. `languages` is a comma-
separated list of language codes as used in OSM. If `languages` is not
empty then only name translations for the given languages are added
to the index.
"""
if languages:
languages = languages.split(',')
def _include_key(key):
return key == 'name' or \
(key.startswith('name:') and (not languages or key[5:] in languages))
with conn.cursor() as cur:
psycopg2.extras.register_hstore(cur)
cur.execute("""SELECT country_code, name FROM country_name
WHERE country_code is not null""")
with tokenizer.name_analyzer() as analyzer:
for code, name in cur:
names = {'countrycode': code}
if code == 'gb':
names['short_name'] = 'UK'
if code == 'us':
names['short_name'] = 'United States'
# country names (only in languages as provided)
if name:
names.update(((k, v) for k, v in name.items() if _include_key(k)))
analyzer.add_country_names(code, names)
conn.commit()

View File

@@ -180,7 +180,7 @@ class TestCliWithDb:
mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'), mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'),
mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'), mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'), mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'), mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'), mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
@@ -204,7 +204,7 @@ class TestCliWithDb:
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'), mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'), mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'), mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.tools.refresh, 'setup_website'),
@@ -222,7 +222,7 @@ class TestCliWithDb:
temp_db_conn): temp_db_conn):
mocks = [ mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'), mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property') mock_func_factory(nominatim.db.properties, 'set_property')
@@ -243,7 +243,7 @@ class TestCliWithDb:
def test_import_continue_postprocess(self, mock_func_factory): def test_import_continue_postprocess(self, mock_func_factory):
mocks = [ mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'), mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property') mock_func_factory(nominatim.db.properties, 'set_property')
] ]

View File

@@ -156,29 +156,3 @@ def test_load_data(dsn, place_row, placex_table, osmline_table,
assert temp_db_cursor.table_rows('placex') == 30 assert temp_db_cursor.table_rows('placex') == 30
assert temp_db_cursor.table_rows('location_property_osmline') == 1 assert temp_db_cursor.table_rows('location_property_osmline') == 1
@pytest.mark.parametrize("languages", (None, ' fr,en'))
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
table_factory, tokenizer_mock, languages):
table_factory('country_name', 'country_code varchar(2), name hstore',
content=(('us', '"name"=>"us1","name:af"=>"us2"'),
('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
tokenizer = tokenizer_mock()
database_import.create_country_names(temp_db_conn, tokenizer, languages)
assert len(tokenizer.analyser_cache['countries']) == 2
result_set = {k: set(v.values()) for k, v in tokenizer.analyser_cache['countries']}
if languages:
assert result_set == {'us' : set(('us', 'us1', 'United States')),
'fr' : set(('fr', 'Fra', 'Fren'))}
else:
assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')),
'fr' : set(('fr', 'Fra', 'Fren'))}