move default country name creation to tokenizer

The new function is also used, when a country us updated. All SQL
function related to country names have been removed.
This commit is contained in:
Sarah Hoffmann
2021-04-27 11:37:18 +02:00
parent dc700c25b6
commit bef300305e
8 changed files with 105 additions and 134 deletions

View File

@@ -8,6 +8,7 @@ import subprocess
from pathlib import Path
import psutil
import psycopg2.extras
from nominatim.db.connection import connect, get_pg_env
from nominatim.db import utils as db_utils
@@ -250,34 +251,37 @@ def create_search_indices(conn, config, drop=False):
sql.run_sql_file(conn, 'indices.sql', drop=drop)
def create_country_names(conn, config):
""" Create search index for default country names.
def create_country_names(conn, tokenizer, languages=None):
""" Add default country names to search index. `languages` is a comma-
separated list of language codes as used in OSM. If `languages` is not
empty then only name translations for the given languages are added
to the index.
"""
if languages:
languages = languages.split(',')
def _include_key(key):
return key == 'name' or \
(key.startswith('name:') \
and (not languages or key[5:] in languages))
with conn.cursor() as cur:
cur.execute("""SELECT getorcreate_country(make_standard_name('uk'), 'gb')""")
cur.execute("""SELECT getorcreate_country(make_standard_name('united states'), 'us')""")
cur.execute("""SELECT COUNT(*) FROM
(SELECT getorcreate_country(make_standard_name(country_code),
country_code) FROM country_name WHERE country_code is not null) AS x""")
cur.execute("""SELECT COUNT(*) FROM
(SELECT getorcreate_country(make_standard_name(name->'name'), country_code)
FROM country_name WHERE name ? 'name') AS x""")
sql_statement = """SELECT COUNT(*) FROM (SELECT getorcreate_country(make_standard_name(v),
country_code) FROM (SELECT country_code, skeys(name)
AS k, svals(name) AS v FROM country_name) x WHERE k"""
psycopg2.extras.register_hstore(cur)
cur.execute("""SELECT country_code, name FROM country_name
WHERE country_code is not null""")
languages = config.LANGUAGES
with tokenizer.name_analyzer() as analyzer:
for code, name in cur:
names = [code]
if code == 'gb':
names.append('UK')
if code == 'us':
names.append('United States')
# country names (only in languages as provided)
if name:
names.extend((v for k, v in name.items() if _include_key(k)))
analyzer.add_country_names(code, names)
if languages:
sql_statement = "{} IN (".format(sql_statement)
delim = ''
for language in languages.split(','):
sql_statement = "{}{}'name:{}'".format(sql_statement, delim, language)
delim = ', '
sql_statement = '{})'.format(sql_statement)
else:
sql_statement = "{} LIKE 'name:%'".format(sql_statement)
sql_statement = "{}) v".format(sql_statement)
cur.execute(sql_statement)
conn.commit()