mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-11 05:14:07 +00:00
fix Python linitin errors
This commit is contained in:
@@ -79,7 +79,6 @@ class LegacyICUTokenizer:
|
|||||||
""" Do any required postprocessing to make the tokenizer data ready
|
""" Do any required postprocessing to make the tokenizer data ready
|
||||||
for use.
|
for use.
|
||||||
"""
|
"""
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
def update_sql_functions(self, config):
|
def update_sql_functions(self, config):
|
||||||
@@ -156,25 +155,12 @@ class LegacyICUTokenizer:
|
|||||||
LOG.warning("Precomputing word tokens")
|
LOG.warning("Precomputing word tokens")
|
||||||
|
|
||||||
# get partial words and their frequencies
|
# get partial words and their frequencies
|
||||||
words = Counter()
|
words = self._count_partial_terms(conn)
|
||||||
name_proc = ICUNameProcessor(self.naming_rules)
|
|
||||||
with conn.cursor(name="words") as cur:
|
|
||||||
cur.execute(""" SELECT v, count(*) FROM
|
|
||||||
(SELECT svals(name) as v FROM place)x
|
|
||||||
WHERE length(v) < 75 GROUP BY v""")
|
|
||||||
|
|
||||||
for name, cnt in cur:
|
|
||||||
terms = set()
|
|
||||||
for word in name_proc.get_variants_ascii(name_proc.get_normalized(name)):
|
|
||||||
if ' ' in word:
|
|
||||||
terms.update(word.split())
|
|
||||||
for term in terms:
|
|
||||||
words[term] += cnt
|
|
||||||
|
|
||||||
# copy them back into the word table
|
# copy them back into the word table
|
||||||
with CopyBuffer() as copystr:
|
with CopyBuffer() as copystr:
|
||||||
for k, v in words.items():
|
for term, cnt in words.items():
|
||||||
copystr.add('w', k, json.dumps({'count': v}))
|
copystr.add('w', term, json.dumps({'count': cnt}))
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
copystr.copy_out(cur, 'word',
|
copystr.copy_out(cur, 'word',
|
||||||
@@ -184,6 +170,27 @@ class LegacyICUTokenizer:
|
|||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
def _count_partial_terms(self, conn):
|
||||||
|
""" Count the partial terms from the names in the place table.
|
||||||
|
"""
|
||||||
|
words = Counter()
|
||||||
|
name_proc = ICUNameProcessor(self.naming_rules)
|
||||||
|
|
||||||
|
with conn.cursor(name="words") as cur:
|
||||||
|
cur.execute(""" SELECT v, count(*) FROM
|
||||||
|
(SELECT svals(name) as v FROM place)x
|
||||||
|
WHERE length(v) < 75 GROUP BY v""")
|
||||||
|
|
||||||
|
for name, cnt in cur:
|
||||||
|
terms = set()
|
||||||
|
for word in name_proc.get_variants_ascii(name_proc.get_normalized(name)):
|
||||||
|
if ' ' in word:
|
||||||
|
terms.update(word.split())
|
||||||
|
for term in terms:
|
||||||
|
words[term] += cnt
|
||||||
|
|
||||||
|
return words
|
||||||
|
|
||||||
|
|
||||||
class LegacyICUNameAnalyzer:
|
class LegacyICUNameAnalyzer:
|
||||||
""" The legacy analyzer uses the ICU library for splitting names.
|
""" The legacy analyzer uses the ICU library for splitting names.
|
||||||
|
|||||||
Reference in New Issue
Block a user