port address level computation to Python

Also adds simple tests for correct table creation.
This commit is contained in:
Sarah Hoffmann
2021-01-23 17:25:14 +01:00
parent e6c2842b66
commit 94fa7162be
13 changed files with 268 additions and 171 deletions

View File

@@ -372,33 +372,38 @@ class UpdateRefresh:
def run(args):
import nominatim.tools.refresh
with psycopg2.connect(args.config.get_libpq_dsn()) as conn:
if args.postcodes:
LOG.warning("Update postcodes centroid")
nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
if args.word_counts:
LOG.warning('Recompute frequency of full-word search terms')
nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
if args.address_levels:
run_legacy_script('update.php', '--update-address-levels',
nominatim_env=args, throw_on_fail=True)
if args.functions:
params = ['setup.php', '--create-functions', '--create-partition-functions']
if args.diffs:
params.append('--enable-diff-updates')
if args.enable_debug_statements:
params.append('--enable-debug-statements')
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
nominatim_env=args, throw_on_fail=True)
# Attention: importance MUST come after wiki data import.
if args.importance:
run_legacy_script('update.php', '--recompute-importance',
nominatim_env=args, throw_on_fail=True)
if args.website:
run_legacy_script('setup.php', '--setup-website',
nominatim_env=args, throw_on_fail=True)
conn = psycopg2.connect(args.config.get_libpq_dsn())
if args.postcodes:
LOG.warning("Update postcodes centroid")
nominatim.tools.refresh.update_postcodes(conn, args.data_dir)
if args.word_counts:
LOG.warning('Recompute frequency of full-word search terms')
nominatim.tools.refresh.recompute_word_counts(conn, args.data_dir)
if args.address_levels:
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
LOG.warning('Updating address levels from %s', cfg)
nominatim.tools.refresh.load_address_levels_from_file(conn, cfg)
if args.functions:
params = ['setup.php', '--create-functions', '--create-partition-functions']
if args.diffs:
params.append('--enable-diff-updates')
if args.enable_debug_statements:
params.append('--enable-debug-statements')
run_legacy_script(*params, nominatim_env=args, throw_on_fail=True)
if args.wiki_data:
run_legacy_script('setup.php', '--import-wikipedia-articles',
nominatim_env=args, throw_on_fail=True)
# Attention: importance MUST come after wiki data import.
if args.importance:
run_legacy_script('update.php', '--recompute-importance',
nominatim_env=args, throw_on_fail=True)
if args.website:
run_legacy_script('setup.php', '--setup-website',
nominatim_env=args, throw_on_fail=True)
conn.close()
return 0

View File

@@ -24,6 +24,13 @@ class Configuration:
if project_dir is not None:
self._config.update(dotenv_values(str((project_dir / '.env').resolve())))
# Add defaults for variables that are left empty to set the default.
# They may still be overwritten by environment variables.
if not self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG']:
self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \
str(config_dir / 'address-levels.json')
def __getattr__(self, name):
name = 'NOMINATIM_' + name

View File

@@ -9,3 +9,4 @@ def execute_file(conn, fname):
sql = fdesc.read()
with conn.cursor() as cur:
cur.execute(sql)
conn.commit()

View File

@@ -1,6 +1,10 @@
"""
Functions for bringing auxiliary data in the database up-to-date.
"""
import json
from psycopg2.extras import execute_values
from ..db.utils import execute_file
def update_postcodes(conn, datadir):
@@ -14,3 +18,54 @@ def recompute_word_counts(conn, datadir):
""" Compute the frequency of full-word search terms.
"""
execute_file(conn, datadir / 'sql' / 'words_from_search_name.sql')
def _add_address_level_rows_from_entry(rows, entry):
""" Converts a single entry from the JSON format for address rank
descriptions into a flat format suitable for inserting into a
PostgreSQL table and adds these lines to `rows`.
"""
countries = entry.get('countries') or (None, )
for key, values in entry['tags'].items():
for value, ranks in values.items():
if isinstance(ranks, list):
rank_search, rank_address = ranks
else:
rank_search = rank_address = ranks
if not value:
value = None
for country in countries:
rows.append((country, key, value, rank_search, rank_address))
def load_address_levels(conn, table, levels):
""" Replace the `address_levels` table with the contents of `levels'.
A new table is created any previously existing table is dropped.
The table has the following columns:
country, class, type, rank_search, rank_address
"""
rows = []
for entry in levels:
_add_address_level_rows_from_entry(rows, entry)
with conn.cursor() as cur:
cur.execute('DROP TABLE IF EXISTS {}'.format(table))
cur.execute("""CREATE TABLE {} (country_code varchar(2),
class TEXT,
type TEXT,
rank_search SMALLINT,
rank_address SMALLINT)""".format(table))
execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
conn.commit()
def load_address_levels_from_file(conn, config_file):
""" Replace the `address_levels` table with the contents of the config
file.
"""
with config_file.open('r') as fdesc:
load_address_levels(conn, 'address_levels', json.load(fdesc))