mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
fix SQL and some other stuff
This commit is contained in:
@@ -96,18 +96,16 @@ class Indexer(object):
|
|||||||
self.conn = make_connection(options)
|
self.conn = make_connection(options)
|
||||||
|
|
||||||
self.threads = []
|
self.threads = []
|
||||||
self.poll = select.poll()
|
|
||||||
for i in range(options.threads):
|
for i in range(options.threads):
|
||||||
t = IndexingThread(i, options)
|
t = IndexingThread(i, options)
|
||||||
self.threads.append(t)
|
self.threads.append(t)
|
||||||
self.poll.register(t, select.EPOLLIN)
|
|
||||||
|
|
||||||
def run(self):
|
def run(self):
|
||||||
log.info("Starting indexing rank ({} to {}) using {} threads".format(
|
log.info("Starting indexing rank ({} to {}) using {} threads".format(
|
||||||
self.options.minrank, self.options.maxrank,
|
self.options.minrank, self.options.maxrank,
|
||||||
self.options.threads))
|
self.options.threads))
|
||||||
|
|
||||||
for rank in range(self.options.minrank, 30):
|
for rank in range(self.options.minrank, min(self.options.maxrank, 30)):
|
||||||
self.index(RankRunner(rank))
|
self.index(RankRunner(rank))
|
||||||
|
|
||||||
if self.options.maxrank >= 30:
|
if self.options.maxrank >= 30:
|
||||||
@@ -117,7 +115,7 @@ class Indexer(object):
|
|||||||
def index(self, obj):
|
def index(self, obj):
|
||||||
log.info("Starting {}".format(obj.name()))
|
log.info("Starting {}".format(obj.name()))
|
||||||
|
|
||||||
cur = self.conn.cursor(name="main")
|
cur = self.conn.cursor(name='main')
|
||||||
cur.execute(obj.sql_index_sectors())
|
cur.execute(obj.sql_index_sectors())
|
||||||
|
|
||||||
total_tuples = 0
|
total_tuples = 0
|
||||||
@@ -130,25 +128,29 @@ class Indexer(object):
|
|||||||
next_thread = self.find_free_thread()
|
next_thread = self.find_free_thread()
|
||||||
done_tuples = 0
|
done_tuples = 0
|
||||||
rank_start_time = datetime.now()
|
rank_start_time = datetime.now()
|
||||||
|
|
||||||
|
sector_sql = obj.sql_sector_places()
|
||||||
|
index_sql = obj.sql_index_place()
|
||||||
|
min_grouped_tuples = total_tuples - len(self.threads) * 1000
|
||||||
for r in cur:
|
for r in cur:
|
||||||
sector = r[0]
|
sector = r[0]
|
||||||
|
|
||||||
# Should we do the remaining ones together?
|
# Should we do the remaining ones together?
|
||||||
do_all = total_tuples - done_tuples < len(self.threads) * 1000
|
do_all = done_tuples > min_grouped_tuples
|
||||||
|
|
||||||
pcur = self.conn.cursor(name='places')
|
pcur = self.conn.cursor(name='places')
|
||||||
|
|
||||||
if do_all:
|
if do_all:
|
||||||
pcur.execute(obj.sql_nosector_places())
|
pcur.execute(obj.sql_nosector_places())
|
||||||
else:
|
else:
|
||||||
pcur.execute(obj.sql_sector_places(), (sector, ))
|
pcur.execute(sector_sql, (sector, ))
|
||||||
|
|
||||||
for place in pcur:
|
for place in pcur:
|
||||||
place_id = place[0]
|
place_id = place[0]
|
||||||
log.debug("Processing place {}".format(place_id))
|
log.debug("Processing place {}".format(place_id))
|
||||||
thread = next(next_thread)
|
thread = next(next_thread)
|
||||||
|
|
||||||
thread.perform(obj.sql_index_place(), (place_id,))
|
thread.perform(index_sql, (place_id,))
|
||||||
done_tuples += 1
|
done_tuples += 1
|
||||||
|
|
||||||
pcur.close()
|
pcur.close()
|
||||||
@@ -164,24 +166,19 @@ class Indexer(object):
|
|||||||
rank_end_time = datetime.now()
|
rank_end_time = datetime.now()
|
||||||
diff_seconds = (rank_end_time-rank_start_time).total_seconds()
|
diff_seconds = (rank_end_time-rank_start_time).total_seconds()
|
||||||
|
|
||||||
log.info("Done {} in {} @ {} per second - FINISHED {}\n".format(
|
log.info("Done {}/{} in {} @ {} per second - FINISHED {}\n".format(
|
||||||
done_tuples, int(diff_seconds),
|
done_tuples, total_tuples, int(diff_seconds),
|
||||||
done_tuples/diff_seconds, obj.name()))
|
done_tuples/diff_seconds, obj.name()))
|
||||||
|
|
||||||
def find_free_thread(self):
|
def find_free_thread(self):
|
||||||
thread_lookup = { t.fileno() : t for t in self.threads}
|
ready = self.threads
|
||||||
|
|
||||||
done_fids = [ t.fileno() for t in self.threads ]
|
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
for fid in done_fids:
|
for thread in ready:
|
||||||
thread = thread_lookup[fid]
|
|
||||||
if thread.is_done():
|
if thread.is_done():
|
||||||
yield thread
|
yield thread
|
||||||
else:
|
|
||||||
print("not good", fid)
|
|
||||||
|
|
||||||
done_fids = [ x[0] for x in self.poll.poll()]
|
ready, _, _ = select.select(self.threads, [], [])
|
||||||
|
|
||||||
assert(False, "Unreachable code")
|
assert(False, "Unreachable code")
|
||||||
|
|
||||||
@@ -212,8 +209,8 @@ class RankRunner(object):
|
|||||||
|
|
||||||
def sql_sector_places(self):
|
def sql_sector_places(self):
|
||||||
return """SELECT place_id FROM placex
|
return """SELECT place_id FROM placex
|
||||||
WHERE indexed_status > 0 and geometry_sector = %s
|
WHERE indexed_status > 0 and rank_search = {}
|
||||||
ORDER BY geometry_sector"""
|
and geometry_sector = %s""".format(self.rank)
|
||||||
|
|
||||||
def sql_index_place(self):
|
def sql_index_place(self):
|
||||||
return "EXECUTE rnk_index(%s)"
|
return "EXECUTE rnk_index(%s)"
|
||||||
|
|||||||
Reference in New Issue
Block a user