mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-11 13:24:07 +00:00
port wikipedia importance functions to python
This commit is contained in:
@@ -5,7 +5,6 @@ import logging
|
||||
from pathlib import Path
|
||||
|
||||
from ..db.connection import connect
|
||||
from ..tools.exec_utils import run_legacy_script
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
@@ -69,12 +68,20 @@ class UpdateRefresh:
|
||||
args.diffs, args.enable_debug_statements)
|
||||
|
||||
if args.wiki_data:
|
||||
run_legacy_script('setup.php', '--import-wikipedia-articles',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
|
||||
or args.project_dir)
|
||||
LOG.warning('Import wikipdia article importance from %s', data_path)
|
||||
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
|
||||
data_path) > 0:
|
||||
LOG.fatal('FATAL: Wikipedia importance dump file not found')
|
||||
return 1
|
||||
|
||||
# Attention: importance MUST come after wiki data import.
|
||||
if args.importance:
|
||||
run_legacy_script('update.php', '--recompute-importance',
|
||||
nominatim_env=args, throw_on_fail=True)
|
||||
LOG.warning('Update importance values for database')
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
refresh.recompute_importance(conn)
|
||||
|
||||
if args.website:
|
||||
webdir = args.project_dir / 'website'
|
||||
LOG.warning('Setting up website directory at %s', webdir)
|
||||
|
||||
@@ -21,9 +21,12 @@ def _pipe_to_proc(proc, fdesc):
|
||||
|
||||
return len(chunk)
|
||||
|
||||
def execute_file(dsn, fname, ignore_errors=False):
|
||||
def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None):
|
||||
""" Read an SQL file and run its contents against the given database
|
||||
using psql.
|
||||
using psql. Use `pre_code` and `post_code` to run extra commands
|
||||
before or after executing the file. The commands are run within the
|
||||
same session, so they may be used to wrap the file execution in a
|
||||
transaction.
|
||||
"""
|
||||
cmd = ['psql']
|
||||
if not ignore_errors:
|
||||
@@ -33,6 +36,9 @@ def execute_file(dsn, fname, ignore_errors=False):
|
||||
if not LOG.isEnabledFor(logging.INFO):
|
||||
proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8'))
|
||||
|
||||
if pre_code:
|
||||
proc.stdin.write((pre_code + ';').encode('utf-8'))
|
||||
|
||||
if fname.suffix == '.gz':
|
||||
with gzip.open(str(fname), 'rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
@@ -40,6 +46,9 @@ def execute_file(dsn, fname, ignore_errors=False):
|
||||
with fname.open('rb') as fdesc:
|
||||
remain = _pipe_to_proc(proc, fdesc)
|
||||
|
||||
if remain == 0 and post_code:
|
||||
proc.stdin.write((';' + post_code).encode('utf-8'))
|
||||
|
||||
proc.stdin.close()
|
||||
|
||||
ret = proc.wait()
|
||||
|
||||
@@ -200,6 +200,53 @@ PHP_CONST_DEFS = (
|
||||
)
|
||||
|
||||
|
||||
def import_wikipedia_articles(dsn, data_path, ignore_errors=False):
|
||||
""" Replaces the wikipedia importance tables with new data.
|
||||
The import is run in a single transaction so that the new data
|
||||
is replace seemlessly.
|
||||
|
||||
Returns 0 if all was well and 1 if the importance file could not
|
||||
be found. Throws an exception if there was an error reading the file.
|
||||
"""
|
||||
datafile = data_path / 'wikimedia-importance.sql.gz'
|
||||
|
||||
if not datafile.exists():
|
||||
return 1
|
||||
|
||||
pre_code = """BEGIN;
|
||||
DROP TABLE IF EXISTS "wikipedia_article";
|
||||
DROP TABLE IF EXISTS "wikipedia_redirect"
|
||||
"""
|
||||
post_code = "COMMIT"
|
||||
execute_file(dsn, datafile, ignore_errors=ignore_errors,
|
||||
pre_code=pre_code, post_code=post_code)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
def recompute_importance(conn):
|
||||
""" Recompute wikipedia links and importance for all entries in placex.
|
||||
This is a long-running operations that must not be executed in
|
||||
parallel with updates.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL')
|
||||
cur.execute("""
|
||||
UPDATE placex SET (wikipedia, importance) =
|
||||
(SELECT wikipedia, importance
|
||||
FROM compute_importance(extratags, country_code, osm_type, osm_id))
|
||||
""")
|
||||
cur.execute("""
|
||||
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
|
||||
FROM placex d
|
||||
WHERE s.place_id = d.linked_place_id and d.wikipedia is not null
|
||||
and (s.wikipedia is null or s.importance < d.importance);
|
||||
""")
|
||||
|
||||
cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL')
|
||||
conn.commit()
|
||||
|
||||
|
||||
def setup_website(basedir, phplib_dir, config):
|
||||
""" Create the website script stubs.
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user