modify data import logic to load country names from yaml

This commit is contained in:
Tareq Al-Ahdal
2022-03-12 15:20:57 +08:00
parent 8b6652a40b
commit 377cf36be3
2 changed files with 20 additions and 16 deletions

View File

@@ -1,11 +0,0 @@
--
-- PostgreSQL database dump
--
CREATE TABLE public.country_name (
country_code character varying(2),
name public.hstore,
derived_name public.hstore,
country_default_language_code text,
partition integer
);

View File

@@ -11,6 +11,8 @@ import psycopg2.extras
from nominatim.db import utils as db_utils from nominatim.db import utils as db_utils
from nominatim.db.connection import connect from nominatim.db.connection import connect
from io import StringIO
import json
class _CountryInfo: class _CountryInfo:
""" Caches country-specific properties from the configuration file. """ Caches country-specific properties from the configuration file.
@@ -33,7 +35,8 @@ class _CountryInfo:
elif not isinstance(prop['languages'], list): elif not isinstance(prop['languages'], list):
prop['languages'] = [x.strip() prop['languages'] = [x.strip()
for x in prop['languages'].split(',')] for x in prop['languages'].split(',')]
if 'name' not in prop:
prop['name'] = {}
def items(self): def items(self):
""" Return tuples of (country_code, property dict) as iterable. """ Return tuples of (country_code, property dict) as iterable.
@@ -61,10 +64,9 @@ def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides """ Create and populate the tables with basic static data that provides
the background for geocoding. Data is assumed to not yet exist. the background for geocoding. Data is assumed to not yet exist.
""" """
db_utils.execute_file(dsn, sql_dir / 'country_name.sql')
db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz') db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz')
params = [] params, country_names_data = [], ''
for ccode, props in _COUNTRY_INFO.items(): for ccode, props in _COUNTRY_INFO.items():
if ccode is not None and props is not None: if ccode is not None and props is not None:
if ignore_partitions: if ignore_partitions:
@@ -74,8 +76,21 @@ def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
lang = props['languages'][0] if len(props['languages']) == 1 else None lang = props['languages'][0] if len(props['languages']) == 1 else None
params.append((ccode, partition, lang)) params.append((ccode, partition, lang))
name = json.dumps(props.get('name'), ensure_ascii=False, separators=(', ', '=>'))
country_names_data += ccode + '\t' + name[1:-1] + '\n'
with connect(dsn) as conn: with connect(dsn) as conn:
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute(
"""CREATE TABLE public.country_name (
country_code character varying(2),
name public.hstore,
derived_name public.hstore,
country_default_language_code text,
partition integer
);""")
data = StringIO(country_names_data)
cur.copy_from(data, 'country_name', columns=('country_code', 'name'))
cur.execute_values( cur.execute_values(
""" UPDATE country_name """ UPDATE country_name
SET partition = part, country_default_language_code = lang SET partition = part, country_default_language_code = lang
@@ -94,8 +109,8 @@ def create_country_names(conn, tokenizer, languages=None):
languages = languages.split(',') languages = languages.split(',')
def _include_key(key): def _include_key(key):
return key == 'name' or \ return key == 'default' or \
(key.startswith('name:') and (not languages or key[5:] in languages)) (not languages or key in languages)
with conn.cursor() as cur: with conn.cursor() as cur:
psycopg2.extras.register_hstore(cur) psycopg2.extras.register_hstore(cur)