mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-11 21:34:06 +00:00
move country name generation to country_info module
This commit is contained in:
@@ -113,8 +113,8 @@ class SetupAll:
|
|||||||
database_import.create_search_indices(conn, args.config,
|
database_import.create_search_indices(conn, args.config,
|
||||||
drop=args.no_updates)
|
drop=args.no_updates)
|
||||||
LOG.warning('Create search index for default country names.')
|
LOG.warning('Create search index for default country names.')
|
||||||
database_import.create_country_names(conn, tokenizer,
|
country_info.create_country_names(conn, tokenizer,
|
||||||
args.config.LANGUAGES)
|
args.config.LANGUAGES)
|
||||||
conn.commit()
|
conn.commit()
|
||||||
if args.no_updates:
|
if args.no_updates:
|
||||||
freeze.drop_update_tables(conn)
|
freeze.drop_update_tables(conn)
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
"""
|
"""
|
||||||
Functions for importing and managing static country information.
|
Functions for importing and managing static country information.
|
||||||
"""
|
"""
|
||||||
|
import psycopg2.extras
|
||||||
|
|
||||||
from nominatim.db import utils as db_utils
|
from nominatim.db import utils as db_utils
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
|
|
||||||
@@ -16,3 +18,38 @@ def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
|
|||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute('UPDATE country_name SET partition = 0')
|
cur.execute('UPDATE country_name SET partition = 0')
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|
||||||
|
def create_country_names(conn, tokenizer, languages=None):
|
||||||
|
""" Add default country names to search index. `languages` is a comma-
|
||||||
|
separated list of language codes as used in OSM. If `languages` is not
|
||||||
|
empty then only name translations for the given languages are added
|
||||||
|
to the index.
|
||||||
|
"""
|
||||||
|
if languages:
|
||||||
|
languages = languages.split(',')
|
||||||
|
|
||||||
|
def _include_key(key):
|
||||||
|
return key == 'name' or \
|
||||||
|
(key.startswith('name:') and (not languages or key[5:] in languages))
|
||||||
|
|
||||||
|
with conn.cursor() as cur:
|
||||||
|
psycopg2.extras.register_hstore(cur)
|
||||||
|
cur.execute("""SELECT country_code, name FROM country_name
|
||||||
|
WHERE country_code is not null""")
|
||||||
|
|
||||||
|
with tokenizer.name_analyzer() as analyzer:
|
||||||
|
for code, name in cur:
|
||||||
|
names = {'countrycode': code}
|
||||||
|
if code == 'gb':
|
||||||
|
names['short_name'] = 'UK'
|
||||||
|
if code == 'us':
|
||||||
|
names['short_name'] = 'United States'
|
||||||
|
|
||||||
|
# country names (only in languages as provided)
|
||||||
|
if name:
|
||||||
|
names.update(((k, v) for k, v in name.items() if _include_key(k)))
|
||||||
|
|
||||||
|
analyzer.add_country_names(code, names)
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ import subprocess
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
import psycopg2.extras
|
|
||||||
from psycopg2 import sql as pysql
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
from nominatim.db.connection import connect, get_pg_env
|
from nominatim.db.connection import connect, get_pg_env
|
||||||
@@ -235,38 +234,3 @@ def create_search_indices(conn, config, drop=False):
|
|||||||
sql = SQLPreprocessor(conn, config)
|
sql = SQLPreprocessor(conn, config)
|
||||||
|
|
||||||
sql.run_sql_file(conn, 'indices.sql', drop=drop)
|
sql.run_sql_file(conn, 'indices.sql', drop=drop)
|
||||||
|
|
||||||
|
|
||||||
def create_country_names(conn, tokenizer, languages=None):
|
|
||||||
""" Add default country names to search index. `languages` is a comma-
|
|
||||||
separated list of language codes as used in OSM. If `languages` is not
|
|
||||||
empty then only name translations for the given languages are added
|
|
||||||
to the index.
|
|
||||||
"""
|
|
||||||
if languages:
|
|
||||||
languages = languages.split(',')
|
|
||||||
|
|
||||||
def _include_key(key):
|
|
||||||
return key == 'name' or \
|
|
||||||
(key.startswith('name:') and (not languages or key[5:] in languages))
|
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
|
||||||
psycopg2.extras.register_hstore(cur)
|
|
||||||
cur.execute("""SELECT country_code, name FROM country_name
|
|
||||||
WHERE country_code is not null""")
|
|
||||||
|
|
||||||
with tokenizer.name_analyzer() as analyzer:
|
|
||||||
for code, name in cur:
|
|
||||||
names = {'countrycode': code}
|
|
||||||
if code == 'gb':
|
|
||||||
names['short_name'] = 'UK'
|
|
||||||
if code == 'us':
|
|
||||||
names['short_name'] = 'United States'
|
|
||||||
|
|
||||||
# country names (only in languages as provided)
|
|
||||||
if name:
|
|
||||||
names.update(((k, v) for k, v in name.items() if _include_key(k)))
|
|
||||||
|
|
||||||
analyzer.add_country_names(code, names)
|
|
||||||
|
|
||||||
conn.commit()
|
|
||||||
|
|||||||
@@ -180,7 +180,7 @@ class TestCliWithDb:
|
|||||||
mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'),
|
mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
|
mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
|
||||||
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
|
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
|
||||||
mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
|
mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
|
||||||
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
||||||
@@ -204,7 +204,7 @@ class TestCliWithDb:
|
|||||||
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'load_data'),
|
mock_func_factory(nominatim.tools.database_import, 'load_data'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
|
||||||
mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
|
mock_func_factory(nominatim.tools.postcodes, 'update_postcodes'),
|
||||||
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
||||||
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
||||||
@@ -222,7 +222,7 @@ class TestCliWithDb:
|
|||||||
temp_db_conn):
|
temp_db_conn):
|
||||||
mocks = [
|
mocks = [
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
|
||||||
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
|
||||||
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
||||||
mock_func_factory(nominatim.db.properties, 'set_property')
|
mock_func_factory(nominatim.db.properties, 'set_property')
|
||||||
@@ -243,7 +243,7 @@ class TestCliWithDb:
|
|||||||
def test_import_continue_postprocess(self, mock_func_factory):
|
def test_import_continue_postprocess(self, mock_func_factory):
|
||||||
mocks = [
|
mocks = [
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
|
||||||
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
|
mock_func_factory(nominatim.tools.country_info, 'create_country_names'),
|
||||||
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
|
||||||
mock_func_factory(nominatim.db.properties, 'set_property')
|
mock_func_factory(nominatim.db.properties, 'set_property')
|
||||||
]
|
]
|
||||||
|
|||||||
@@ -156,29 +156,3 @@ def test_load_data(dsn, place_row, placex_table, osmline_table,
|
|||||||
|
|
||||||
assert temp_db_cursor.table_rows('placex') == 30
|
assert temp_db_cursor.table_rows('placex') == 30
|
||||||
assert temp_db_cursor.table_rows('location_property_osmline') == 1
|
assert temp_db_cursor.table_rows('location_property_osmline') == 1
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("languages", (None, ' fr,en'))
|
|
||||||
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
|
|
||||||
table_factory, tokenizer_mock, languages):
|
|
||||||
|
|
||||||
table_factory('country_name', 'country_code varchar(2), name hstore',
|
|
||||||
content=(('us', '"name"=>"us1","name:af"=>"us2"'),
|
|
||||||
('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
|
|
||||||
|
|
||||||
assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
|
|
||||||
|
|
||||||
tokenizer = tokenizer_mock()
|
|
||||||
|
|
||||||
database_import.create_country_names(temp_db_conn, tokenizer, languages)
|
|
||||||
|
|
||||||
assert len(tokenizer.analyser_cache['countries']) == 2
|
|
||||||
|
|
||||||
result_set = {k: set(v.values()) for k, v in tokenizer.analyser_cache['countries']}
|
|
||||||
|
|
||||||
if languages:
|
|
||||||
assert result_set == {'us' : set(('us', 'us1', 'United States')),
|
|
||||||
'fr' : set(('fr', 'Fra', 'Fren'))}
|
|
||||||
else:
|
|
||||||
assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')),
|
|
||||||
'fr' : set(('fr', 'Fra', 'Fren'))}
|
|
||||||
|
|||||||
Reference in New Issue
Block a user