read partition and languages from config file

This commit is contained in:
Sarah Hoffmann
2021-09-01 23:51:53 +02:00
parent 78fcabade8
commit 79da96b369
6 changed files with 110 additions and 10 deletions

View File

@@ -5,7 +5,7 @@
CREATE TABLE public.country_name ( CREATE TABLE public.country_name (
country_code character varying(2), country_code character varying(2),
name public.hstore, name public.hstore,
country_default_language_code character varying(2), country_default_language_code text,
partition integer partition integer
); );

View File

@@ -55,6 +55,8 @@ class SetupAll:
from ..tools import database_import, refresh, postcodes, freeze, country_info from ..tools import database_import, refresh, postcodes, freeze, country_info
from ..indexer.indexer import Indexer from ..indexer.indexer import Indexer
country_info.setup_country_config(args.config.config_dir / 'country_settings.yaml')
if args.continue_at is None: if args.continue_at is None:
files = args.get_osm_file_list() files = args.get_osm_file_list()

View File

@@ -2,10 +2,37 @@
Functions for importing and managing static country information. Functions for importing and managing static country information.
""" """
import psycopg2.extras import psycopg2.extras
import yaml
from nominatim.db import utils as db_utils from nominatim.db import utils as db_utils
from nominatim.db.connection import connect from nominatim.db.connection import connect
class _CountryInfo:
""" Caches country-specific properties from the configuration file.
"""
def __init__(self):
self._info = {}
def load(self, configfile):
if not self._info:
self._info = yaml.safe_load(configfile.read_text())
def items(self):
return self._info.items()
_COUNTRY_INFO = _CountryInfo()
def setup_country_config(configfile):
""" Load country properties from the configuration file.
Needs to be called before using any other functions in this
file.
"""
_COUNTRY_INFO.load(configfile)
print(_COUNTRY_INFO._info)
def setup_country_tables(dsn, sql_dir, ignore_partitions=False): def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides """ Create and populate the tables with basic static data that provides
the background for geocoding. Data is assumed to not yet exist. the background for geocoding. Data is assumed to not yet exist.
@@ -13,11 +40,27 @@ def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
db_utils.execute_file(dsn, sql_dir / 'country_name.sql') db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz') db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
if ignore_partitions: params = []
with connect(dsn) as conn: for ccode, props in _COUNTRY_INFO.items():
with conn.cursor() as cur: if ccode is not None and props is not None:
cur.execute('UPDATE country_name SET partition = 0') if ignore_partitions:
conn.commit() partition = 0
else:
partition = props.get('partition')
if ',' in (props.get('languages', ',') or ','):
lang = None
else:
lang = props['languages']
params.append((ccode, partition, lang))
with connect(dsn) as conn:
with conn.cursor() as cur:
cur.execute_values(
""" UPDATE country_name
SET partition = part, country_default_language_code = lang
FROM (VALUES %s) AS v (cc, part, lang)
WHERE country_code = v.cc""", params)
conn.commit()
def create_country_names(conn, tokenizer, languages=None): def create_country_names(conn, tokenizer, languages=None):

View File

@@ -36,7 +36,7 @@ am:
# Netherlands Antilles (De Nederlandse Antillen) # Netherlands Antilles (De Nederlandse Antillen)
an: an:
partition: 58 partition: 58
languages: languages: nl, en, pap
# Angola (Angola) # Angola (Angola)
ao: ao:
@@ -834,7 +834,7 @@ nl:
languages: nl languages: nl
# Norway (Norge) # Norway (Norge)
no: "no":
partition: 60 partition: 60
languages: nb, nn, no, se languages: nb, nn, no, se
@@ -1226,7 +1226,7 @@ ws:
# Kosovo (Kosova / Kosovo) # Kosovo (Kosova / Kosovo)
xk: xk:
partition: 59 partition: 59
languages:sq, sr languages: sq, sr
# Yemen (اليمن) # Yemen (اليمن)
ye: ye:

View File

@@ -5,7 +5,7 @@ from pathlib import Path
import psycopg2 import psycopg2
import pytest import pytest
SRC_DIR = Path(__file__) / '..' / '..' / '..' SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve()
# always test against the source # always test against the source
sys.path.insert(0, str(SRC_DIR.resolve())) sys.path.insert(0, str(SRC_DIR.resolve()))

View File

@@ -0,0 +1,55 @@
"""
Tests for function that handle country properties.
"""
import pytest
from nominatim.tools import country_info
@pytest.fixture(autouse=True)
def read_config(def_config):
country_info.setup_country_config(def_config.config_dir / 'country_settings.yaml')
@pytest.mark.parametrize("no_partitions", (True, False))
def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cursor,
def_config, no_partitions):
country_info.setup_country_tables(dsn, src_dir / 'data', no_partitions)
assert temp_db_cursor.table_exists('country_name')
assert temp_db_cursor.table_rows('country_name') == \
temp_db_cursor.scalar('SELECT count(DISTINCT country_code) FROM country_name')
partitions = temp_db_cursor.row_set("SELECT DISTINCT partition FROM country_name")
if no_partitions:
assert partitions == {(0, )}
else:
assert len(partitions) > 10
assert temp_db_cursor.table_exists('country_osm_grid')
assert temp_db_cursor.table_rows('country_osm_grid') > 100
@pytest.mark.parametrize("languages", (None, ' fr,en'))
def test_create_country_names(temp_db_with_extensions, temp_db_conn, temp_db_cursor,
table_factory, tokenizer_mock, languages):
table_factory('country_name', 'country_code varchar(2), name hstore',
content=(('us', '"name"=>"us1","name:af"=>"us2"'),
('fr', '"name"=>"Fra", "name:en"=>"Fren"')))
assert temp_db_cursor.scalar("SELECT count(*) FROM country_name") == 2
tokenizer = tokenizer_mock()
country_info.create_country_names(temp_db_conn, tokenizer, languages)
assert len(tokenizer.analyser_cache['countries']) == 2
result_set = {k: set(v.values()) for k, v in tokenizer.analyser_cache['countries']}
if languages:
assert result_set == {'us' : set(('us', 'us1', 'United States')),
'fr' : set(('fr', 'Fra', 'Fren'))}
else:
assert result_set == {'us' : set(('us', 'us1', 'us2', 'United States')),
'fr' : set(('fr', 'Fra', 'Fren'))}