mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-09 11:34:07 +00:00
switch special phrases to new word table format
This commit is contained in:
@@ -147,7 +147,9 @@ class Tokenizer
|
|||||||
{
|
{
|
||||||
// Check which tokens we have, get the ID numbers
|
// Check which tokens we have, get the ID numbers
|
||||||
$sSQL = 'SELECT word_id, word_token, type';
|
$sSQL = 'SELECT word_id, word_token, type';
|
||||||
$sSQL .= " info->>'cc' as country, info->>'postcode' as postcode";
|
$sSQL .= " info->>'cc' as country, info->>'postcode' as postcode,";
|
||||||
|
$sSQL .= " info->>'word' as word, info->>'op' as operator,";
|
||||||
|
$sSQL .= " info->>'class' as class, info->>'type' as type";
|
||||||
$sSQL .= ' FROM word WHERE word_token in (';
|
$sSQL .= ' FROM word WHERE word_token in (';
|
||||||
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
|
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
|
||||||
|
|
||||||
@@ -180,7 +182,26 @@ class Tokenizer
|
|||||||
) {
|
) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
$sNormPostcode = $this->normalizeString($aWord['postcode']);
|
||||||
|
if (strpos($sNormQuery, $sNormPostcode) === false) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
$oToken = new Token\Postcode($iId, $aWord['postcode'], null);
|
$oToken = new Token\Postcode($iId, $aWord['postcode'], null);
|
||||||
|
break;
|
||||||
|
'S': // tokens for classification terms (special phrases)
|
||||||
|
if ($aWord['class'] === null || $aWord['type'] === null
|
||||||
|
|| $aWord['word'] === null
|
||||||
|
|| strpos($sNormQuery, $aWord['word']) === false
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$oToken = new Token\SpecialTerm(
|
||||||
|
$iId,
|
||||||
|
$aWord['class'],
|
||||||
|
$aWord['type'],
|
||||||
|
$aWord['op'] ? Operator::NEAR : Operator::NONE
|
||||||
|
);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -299,6 +299,9 @@ class LegacyICUNameAnalyzer:
|
|||||||
|
|
||||||
def update_special_phrases(self, phrases, should_replace):
|
def update_special_phrases(self, phrases, should_replace):
|
||||||
""" Replace the search index for special phrases with the new phrases.
|
""" Replace the search index for special phrases with the new phrases.
|
||||||
|
If `should_replace` is True, then the previous set of will be
|
||||||
|
completely replaced. Otherwise the phrases are added to the
|
||||||
|
already existing ones.
|
||||||
"""
|
"""
|
||||||
norm_phrases = set(((self.name_processor.get_normalized(p[0]), p[1], p[2], p[3])
|
norm_phrases = set(((self.name_processor.get_normalized(p[0]), p[1], p[2], p[3])
|
||||||
for p in phrases))
|
for p in phrases))
|
||||||
@@ -306,11 +309,10 @@ class LegacyICUNameAnalyzer:
|
|||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
# Get the old phrases.
|
# Get the old phrases.
|
||||||
existing_phrases = set()
|
existing_phrases = set()
|
||||||
cur.execute("""SELECT word, class, type, operator FROM word
|
cur.execute("SELECT info FROM word WHERE type = 'S'")
|
||||||
WHERE class != 'place'
|
for (info, ) in cur:
|
||||||
OR (type != 'house' AND type != 'postcode')""")
|
existing_phrases.add((info['word'], info['class'], info['type'],
|
||||||
for label, cls, typ, oper in cur:
|
info.get('op') or '-'))
|
||||||
existing_phrases.add((label, cls, typ, oper or '-'))
|
|
||||||
|
|
||||||
added = self._add_special_phrases(cur, norm_phrases, existing_phrases)
|
added = self._add_special_phrases(cur, norm_phrases, existing_phrases)
|
||||||
if should_replace:
|
if should_replace:
|
||||||
@@ -333,13 +335,13 @@ class LegacyICUNameAnalyzer:
|
|||||||
for word, cls, typ, oper in to_add:
|
for word, cls, typ, oper in to_add:
|
||||||
term = self.name_processor.get_search_normalized(word)
|
term = self.name_processor.get_search_normalized(word)
|
||||||
if term:
|
if term:
|
||||||
copystr.add(word, ' ' + term, cls, typ,
|
copystr.add(term, 'S',
|
||||||
oper if oper in ('in', 'near') else None, 0)
|
{'word': word, 'class': cls, 'type': typ,
|
||||||
|
'op': oper if oper in ('in', 'near') else None})
|
||||||
added += 1
|
added += 1
|
||||||
|
|
||||||
copystr.copy_out(cursor, 'word',
|
copystr.copy_out(cursor, 'word',
|
||||||
columns=['word', 'word_token', 'class', 'type',
|
columns=['word_token', 'type', 'info'])
|
||||||
'operator', 'search_name_count'])
|
|
||||||
|
|
||||||
return added
|
return added
|
||||||
|
|
||||||
@@ -354,9 +356,10 @@ class LegacyICUNameAnalyzer:
|
|||||||
if to_delete:
|
if to_delete:
|
||||||
cursor.execute_values(
|
cursor.execute_values(
|
||||||
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
||||||
WHERE word = name and class = in_class and type = in_type
|
WHERE info->>'word' = name
|
||||||
and ((op = '-' and operator is null) or op = operator)""",
|
and info->>'class' = in_class and info->>'type' = in_type
|
||||||
to_delete)
|
and ((op = '-' and info->>'op' is null) or op = info->>'op')
|
||||||
|
""", to_delete)
|
||||||
|
|
||||||
return len(to_delete)
|
return len(to_delete)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user