mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
implementaion of 'nominatim index'
This commit is contained in:
@@ -11,6 +11,17 @@ from pathlib import Path
|
||||
from .config import Configuration
|
||||
from .admin.exec_utils import run_legacy_script
|
||||
|
||||
from .indexer.indexer import Indexer
|
||||
|
||||
def _num_system_cpus():
|
||||
try:
|
||||
cpus = len(os.sched_getaffinity(0))
|
||||
except NotImplementedError:
|
||||
cpus = None
|
||||
|
||||
return cpus or os.cpu_count()
|
||||
|
||||
|
||||
class CommandlineParser:
|
||||
""" Wraps some of the common functions for parsing the command line
|
||||
and setting up subcommands.
|
||||
@@ -297,11 +308,27 @@ class UpdateIndex:
|
||||
|
||||
@staticmethod
|
||||
def add_args(parser):
|
||||
pass
|
||||
group = parser.add_argument_group('Filter arguments')
|
||||
group.add_argument('--boundaries-only', action='store_true',
|
||||
help="""Index only administrative boundaries.""")
|
||||
group.add_argument('--no-boundaries', action='store_true',
|
||||
help="""Index everything except administrative boundaries.""")
|
||||
group.add_argument('--minrank', '-r', type=int, metavar='RANK', default=0,
|
||||
help='Minimum/starting rank')
|
||||
group.add_argument('--maxrank', '-R', type=int, metavar='RANK', default=30,
|
||||
help='Maximum/finishing rank')
|
||||
|
||||
@staticmethod
|
||||
def run(args):
|
||||
return run_legacy_script('update.php', '--index', nominatim_env=args)
|
||||
indexer = Indexer(args.config.get_libpq_dsn(),
|
||||
args.threads or _num_system_cpus() or 1)
|
||||
|
||||
if not args.no_boundaries:
|
||||
indexer.index_boundaries(args.minrank, args.maxrank)
|
||||
if not args.boundaries_only:
|
||||
indexer.index_by_rank(args.minrank, args.maxrank)
|
||||
|
||||
return 0
|
||||
|
||||
|
||||
class UpdateRefresh:
|
||||
|
||||
@@ -11,26 +11,14 @@ from psycopg2.extras import wait_select
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
def make_connection(options, asynchronous=False):
|
||||
""" Create a psycopg2 connection from the given options.
|
||||
"""
|
||||
params = {'dbname' : options.dbname,
|
||||
'user' : options.user,
|
||||
'password' : options.password,
|
||||
'host' : options.host,
|
||||
'port' : options.port,
|
||||
'async' : asynchronous}
|
||||
|
||||
return psycopg2.connect(**params)
|
||||
|
||||
class DBConnection:
|
||||
""" A single non-blocking database connection.
|
||||
"""
|
||||
|
||||
def __init__(self, options):
|
||||
def __init__(self, dsn):
|
||||
self.current_query = None
|
||||
self.current_params = None
|
||||
self.options = options
|
||||
self.dsn = dsn
|
||||
|
||||
self.conn = None
|
||||
self.cursor = None
|
||||
@@ -46,7 +34,9 @@ class DBConnection:
|
||||
self.cursor.close()
|
||||
self.conn.close()
|
||||
|
||||
self.conn = make_connection(self.options, asynchronous=True)
|
||||
# Use a dict to hand in the parameters because async is a reserved
|
||||
# word in Python3.
|
||||
self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
|
||||
self.wait()
|
||||
|
||||
self.cursor = self.conn.cursor()
|
||||
|
||||
@@ -5,8 +5,10 @@ Main work horse for indexing (computing addresses) the database.
|
||||
import logging
|
||||
import select
|
||||
|
||||
import psycopg2
|
||||
|
||||
from .progress import ProgressLogger
|
||||
from db.async_connection import DBConnection, make_connection
|
||||
from ..db.async_connection import DBConnection
|
||||
|
||||
LOG = logging.getLogger()
|
||||
|
||||
@@ -94,34 +96,33 @@ class Indexer:
|
||||
""" Main indexing routine.
|
||||
"""
|
||||
|
||||
def __init__(self, opts):
|
||||
self.minrank = max(1, opts.minrank)
|
||||
self.maxrank = min(30, opts.maxrank)
|
||||
self.conn = make_connection(opts)
|
||||
self.threads = [DBConnection(opts) for _ in range(opts.threads)]
|
||||
def __init__(self, dsn, num_threads):
|
||||
self.conn = psycopg2.connect(dsn)
|
||||
self.threads = [DBConnection(dsn) for _ in range(num_threads)]
|
||||
|
||||
def index_boundaries(self):
|
||||
def index_boundaries(self, minrank, maxrank):
|
||||
LOG.warning("Starting indexing boundaries using %s threads",
|
||||
len(self.threads))
|
||||
|
||||
for rank in range(max(self.minrank, 5), min(self.maxrank, 26)):
|
||||
for rank in range(max(minrank, 5), min(maxrank, 26)):
|
||||
self.index(BoundaryRunner(rank))
|
||||
|
||||
def index_by_rank(self):
|
||||
def index_by_rank(self, minrank, maxrank):
|
||||
""" Run classic indexing by rank.
|
||||
"""
|
||||
maxrank = min(maxrank, 30)
|
||||
LOG.warning("Starting indexing rank (%i to %i) using %i threads",
|
||||
self.minrank, self.maxrank, len(self.threads))
|
||||
minrank, maxrank, len(self.threads))
|
||||
|
||||
for rank in range(max(1, self.minrank), self.maxrank):
|
||||
for rank in range(max(1, minrank), maxrank):
|
||||
self.index(RankRunner(rank))
|
||||
|
||||
if self.maxrank == 30:
|
||||
if maxrank == 30:
|
||||
self.index(RankRunner(0))
|
||||
self.index(InterpolationRunner(), 20)
|
||||
self.index(RankRunner(self.maxrank), 20)
|
||||
self.index(RankRunner(30), 20)
|
||||
else:
|
||||
self.index(RankRunner(self.maxrank))
|
||||
self.index(RankRunner(maxrank))
|
||||
|
||||
def index(self, obj, batch=1):
|
||||
""" Index a single rank or table. `obj` describes the SQL to use
|
||||
|
||||
Reference in New Issue
Block a user