forked from hans/Nominatim
move default country name creation to tokenizer
The new function is also used, when a country us updated. All SQL function related to country names have been removed.
This commit is contained in:
@@ -133,7 +133,8 @@ class SetupAll:
|
||||
database_import.create_search_indices(conn, args.config,
|
||||
drop=args.no_updates)
|
||||
LOG.warning('Create search index for default country names.')
|
||||
database_import.create_country_names(conn, args.config)
|
||||
database_import.create_country_names(conn, tokenizer,
|
||||
args.config.LANGUAGES)
|
||||
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setup website at %s', webdir)
|
||||
|
||||
@@ -223,6 +223,21 @@ class LegacyNameAnalyzer:
|
||||
FROM (SELECT distinct(postcode) as pc
|
||||
FROM location_postcode) x""")
|
||||
|
||||
|
||||
def add_country_names(self, country_code, names):
|
||||
""" Add names for the given country to the search index.
|
||||
"""
|
||||
with self.conn.cursor() as cur:
|
||||
cur.execute(
|
||||
"""INSERT INTO word (word_id, word_token, country_code)
|
||||
(SELECT nextval('seq_word'), lookup_token, %s
|
||||
FROM (SELECT ' ' || make_standard_name(n) as lookup_token
|
||||
FROM unnest(%s)n) y
|
||||
WHERE NOT EXISTS(SELECT * FROM word
|
||||
WHERE word_token = lookup_token and country_code = %s))
|
||||
""", (country_code, names, country_code))
|
||||
|
||||
|
||||
def process_place(self, place):
|
||||
""" Determine tokenizer information about the given place.
|
||||
|
||||
@@ -231,7 +246,14 @@ class LegacyNameAnalyzer:
|
||||
"""
|
||||
token_info = _TokenInfo(self._cache)
|
||||
|
||||
token_info.add_names(self.conn, place.get('name'), place.get('country_feature'))
|
||||
names = place.get('name')
|
||||
|
||||
if names:
|
||||
token_info.add_names(self.conn, names)
|
||||
|
||||
country_feature = place.get('country_feature')
|
||||
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
|
||||
self.add_country_names(country_feature.lower(), list(names.values()))
|
||||
|
||||
address = place.get('address')
|
||||
|
||||
@@ -279,22 +301,14 @@ class _TokenInfo:
|
||||
self.data = {}
|
||||
|
||||
|
||||
def add_names(self, conn, names, country_feature):
|
||||
def add_names(self, conn, names):
|
||||
""" Add token information for the names of the place.
|
||||
"""
|
||||
if not names:
|
||||
return
|
||||
|
||||
with conn.cursor() as cur:
|
||||
# Create the token IDs for all names.
|
||||
self.data['names'] = cur.scalar("SELECT make_keywords(%s)::text",
|
||||
(names, ))
|
||||
|
||||
# Add country tokens to word table if necessary.
|
||||
if country_feature and re.fullmatch(r'[A-Za-z][A-Za-z]', country_feature):
|
||||
cur.execute("SELECT create_country(%s, %s)",
|
||||
(names, country_feature.lower()))
|
||||
|
||||
|
||||
def add_housenumbers(self, conn, hnrs):
|
||||
""" Extract housenumber information from the address.
|
||||
@@ -334,7 +348,8 @@ class _TokenInfo:
|
||||
"""
|
||||
def _get_place(name):
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT (addr_ids_from_name(%s) || getorcreate_name_id(make_standard_name(%s), ''))::text,
|
||||
cur.execute("""SELECT (addr_ids_from_name(%s)
|
||||
|| getorcreate_name_id(make_standard_name(%s), ''))::text,
|
||||
word_ids_from_name(%s)::text""",
|
||||
(name, name, name))
|
||||
return cur.fetchone()
|
||||
|
||||
@@ -8,6 +8,7 @@ import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import psutil
|
||||
import psycopg2.extras
|
||||
|
||||
from nominatim.db.connection import connect, get_pg_env
|
||||
from nominatim.db import utils as db_utils
|
||||
@@ -250,34 +251,37 @@ def create_search_indices(conn, config, drop=False):
|
||||
|
||||
sql.run_sql_file(conn, 'indices.sql', drop=drop)
|
||||
|
||||
def create_country_names(conn, config):
|
||||
""" Create search index for default country names.
|
||||
def create_country_names(conn, tokenizer, languages=None):
|
||||
""" Add default country names to search index. `languages` is a comma-
|
||||
separated list of language codes as used in OSM. If `languages` is not
|
||||
empty then only name translations for the given languages are added
|
||||
to the index.
|
||||
"""
|
||||
if languages:
|
||||
languages = languages.split(',')
|
||||
|
||||
def _include_key(key):
|
||||
return key == 'name' or \
|
||||
(key.startswith('name:') \
|
||||
and (not languages or key[5:] in languages))
|
||||
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT getorcreate_country(make_standard_name('uk'), 'gb')""")
|
||||
cur.execute("""SELECT getorcreate_country(make_standard_name('united states'), 'us')""")
|
||||
cur.execute("""SELECT COUNT(*) FROM
|
||||
(SELECT getorcreate_country(make_standard_name(country_code),
|
||||
country_code) FROM country_name WHERE country_code is not null) AS x""")
|
||||
cur.execute("""SELECT COUNT(*) FROM
|
||||
(SELECT getorcreate_country(make_standard_name(name->'name'), country_code)
|
||||
FROM country_name WHERE name ? 'name') AS x""")
|
||||
sql_statement = """SELECT COUNT(*) FROM (SELECT getorcreate_country(make_standard_name(v),
|
||||
country_code) FROM (SELECT country_code, skeys(name)
|
||||
AS k, svals(name) AS v FROM country_name) x WHERE k"""
|
||||
psycopg2.extras.register_hstore(cur)
|
||||
cur.execute("""SELECT country_code, name FROM country_name
|
||||
WHERE country_code is not null""")
|
||||
|
||||
languages = config.LANGUAGES
|
||||
with tokenizer.name_analyzer() as analyzer:
|
||||
for code, name in cur:
|
||||
names = [code]
|
||||
if code == 'gb':
|
||||
names.append('UK')
|
||||
if code == 'us':
|
||||
names.append('United States')
|
||||
|
||||
# country names (only in languages as provided)
|
||||
if name:
|
||||
names.extend((v for k, v in name.items() if _include_key(k)))
|
||||
|
||||
analyzer.add_country_names(code, names)
|
||||
|
||||
if languages:
|
||||
sql_statement = "{} IN (".format(sql_statement)
|
||||
delim = ''
|
||||
for language in languages.split(','):
|
||||
sql_statement = "{}{}'name:{}'".format(sql_statement, delim, language)
|
||||
delim = ', '
|
||||
sql_statement = '{})'.format(sql_statement)
|
||||
else:
|
||||
sql_statement = "{} LIKE 'name:%'".format(sql_statement)
|
||||
sql_statement = "{}) v".format(sql_statement)
|
||||
cur.execute(sql_statement)
|
||||
conn.commit()
|
||||
|
||||
Reference in New Issue
Block a user