index admin boundaries before everything else

Avoids irregularities that might happen because the address
rank of a boundary is changed through linking.
This commit is contained in:
Sarah Hoffmann
2020-08-14 16:13:06 +02:00
parent fc50eb8688
commit a4b30fc649
3 changed files with 61 additions and 7 deletions

View File

@@ -573,12 +573,20 @@ class SetupFunctions
} }
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE'); if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index administrative boundaries');
$oCmd = (clone $oBaseCmd)->addParams('-b');
$iStatus = $oCmd->run();
if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!');
}
info('Index ranks 5 - 25'); info('Index ranks 5 - 25');
$oCmd = (clone $oBaseCmd)->addParams('--minrank', 5, '--maxrank', 25); $oCmd = (clone $oBaseCmd)->addParams('--minrank', 5, '--maxrank', 25);
$iStatus = $oCmd->run(); $iStatus = $oCmd->run();
if ($iStatus != 0) { if ($iStatus != 0) {
fail('error status ' . $iStatus . ' running nominatim!'); fail('error status ' . $iStatus . ' running nominatim!');
} }
if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE'); if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE');
info('Index ranks 26 - 30'); info('Index ranks 26 - 30');

View File

@@ -82,6 +82,29 @@ class InterpolationRunner(object):
SET indexed_status = 0 WHERE place_id IN ({})"""\ SET indexed_status = 0 WHERE place_id IN ({})"""\
.format(','.join((str(i) for i in ids))) .format(','.join((str(i) for i in ids)))
class BoundaryRunner(object):
""" Returns SQL commands for indexing the administrative boundaries
by partition.
"""
def name(self):
return "boundaries"
def sql_count_objects(self):
return """SELECT count(*) FROM placex
WHERE indexed_status > 0
AND rank_search < 26
AND class = 'boundary' and type = 'administrative'"""
def sql_get_objects(self):
return """SELECT place_id FROM placex
WHERE indexed_status > 0 and rank_search < 26
and class = 'boundary' and type = 'administrative'
ORDER BY partition, admin_level"""
def sql_index_place(self, ids):
return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\
.format(','.join((str(i) for i in ids)))
class Indexer(object): class Indexer(object):
""" Main indexing routine. """ Main indexing routine.
@@ -93,8 +116,14 @@ class Indexer(object):
self.conn = make_connection(options) self.conn = make_connection(options)
self.threads = [DBConnection(options) for i in range(options.threads)] self.threads = [DBConnection(options) for i in range(options.threads)]
def run(self): def index_boundaries(self):
""" Run indexing over the entire database. log.warning("Starting indexing boundaries using {} threads".format(
len(self.threads)))
self.index(BoundaryRunner())
def index_by_rank(self):
""" Run classic indexing by rank.
""" """
log.warning("Starting indexing rank ({} to {}) using {} threads".format( log.warning("Starting indexing rank ({} to {}) using {} threads".format(
self.minrank, self.maxrank, len(self.threads))) self.minrank, self.maxrank, len(self.threads)))
@@ -198,6 +227,9 @@ def nominatim_arg_parser():
p.add_argument('-P', '--port', p.add_argument('-P', '--port',
dest='port', action='store', dest='port', action='store',
help='PostgreSQL server port') help='PostgreSQL server port')
p.add_argument('-b', '--boundary-only',
dest='boundary_only', action='store_true',
help='Only index administrative boundaries (ignores min/maxrank).')
p.add_argument('-r', '--minrank', p.add_argument('-r', '--minrank',
dest='minrank', type=int, metavar='RANK', default=0, dest='minrank', type=int, metavar='RANK', default=0,
help='Minimum/starting rank.') help='Minimum/starting rank.')
@@ -225,4 +257,7 @@ if __name__ == '__main__':
password = getpass.getpass("Database password: ") password = getpass.getpass("Database password: ")
options.password = password options.password = password
Indexer(options).run() if options.boundary_only:
Indexer(options).index_boundaries()
else:
Indexer(options).index_by_rank()

View File

@@ -278,9 +278,11 @@ if ($aResult['recompute-word-counts']) {
if ($aResult['index']) { if ($aResult['index']) {
$oCmd = (clone $oIndexCmd) $oCmd = (clone $oIndexCmd)
->addParams('--minrank', $aResult['index-rank']); ->addParams('--minrank', $aResult['index-rank'], '-b');
$oCmd->run();
// echo $oCmd->escapedCmd()."\n"; $oCmd = (clone $oIndexCmd)
->addParams('--minrank', $aResult['index-rank']);
$oCmd->run(); $oCmd->run();
$oDB->exec('update import_status set indexed = true'); $oDB->exec('update import_status set indexed = true');
@@ -421,9 +423,18 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
// Index file // Index file
if (!$aResult['no-index']) { if (!$aResult['no-index']) {
$oThisIndexCmd = clone($oIndexCmd);
$fCMDStartTime = time(); $fCMDStartTime = time();
$oThisIndexCmd = clone($oIndexCmd);
$oThisIndexCmd->addParams('-b');
echo $oThisIndexCmd->escapedCmd()."\n";
$iErrorLevel = $oThisIndexCmd->run();
if ($iErrorLevel) {
echo "Error: $iErrorLevel\n";
exit($iErrorLevel);
}
$oThisIndexCmd = clone($oIndexCmd);
echo $oThisIndexCmd->escapedCmd()."\n"; echo $oThisIndexCmd->escapedCmd()."\n";
$iErrorLevel = $oThisIndexCmd->run(); $iErrorLevel = $oThisIndexCmd->run();
if ($iErrorLevel) { if ($iErrorLevel) {