mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
reorganize keyword creation for legacy tokenizer
- only save partial words without internal spaces - consider comma and semicolon a separator of full words - consider parts before an opening bracket a full word (but not the part after the bracket) Fixes #244.
This commit is contained in:
@@ -423,8 +423,7 @@ class LegacyICUNameAnalyzer:
|
||||
names = place.get('name')
|
||||
|
||||
if names:
|
||||
full_names = set((self.make_standard_word(name) for name in names.values()))
|
||||
full_names.discard('')
|
||||
full_names = self._compute_full_names(names)
|
||||
|
||||
token_info.add_names(self.conn, full_names)
|
||||
|
||||
@@ -461,6 +460,25 @@ class LegacyICUNameAnalyzer:
|
||||
return token_info.data
|
||||
|
||||
|
||||
def _compute_full_names(self, names):
|
||||
""" Return the set of all full name word ids to be used with the
|
||||
given dictionary of names.
|
||||
"""
|
||||
full_names = set()
|
||||
for name in (n for ns in names.values() for n in re.split('[;,]', ns)):
|
||||
word = self.make_standard_word(name)
|
||||
if word:
|
||||
full_names.add(word)
|
||||
|
||||
brace_split = name.split('(', 2)
|
||||
if len(brace_split) > 1:
|
||||
word = self.make_standard_word(brace_split[0])
|
||||
if word:
|
||||
full_names.add(word)
|
||||
|
||||
return full_names
|
||||
|
||||
|
||||
def _add_postcode(self, postcode):
|
||||
""" Make sure the normalized postcode is present in the word table.
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user