move tokenization in query into tokenizer

This commit is contained in:
Sarah Hoffmann
2021-04-28 14:08:24 +02:00
parent 3eb4d88057
commit 044bb6afa5
7 changed files with 315 additions and 311 deletions

View File

@@ -95,88 +95,6 @@ class TokenList
return $ids;
}
/**
* Add token information from the word table in the database.
*
* @param object $oDB Nominatim::DB instance.
* @param string[] $aTokens List of tokens to look up in the database.
* @param string[] $aCountryCodes List of country restrictions.
* @param string $sNormQuery Normalized query string.
* @param object $oNormalizer Normalizer function to use on tokens.
*
* @return void
*/
public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
{
// Check which tokens we have, get the ID numbers
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
$sSQL .= ' operator, coalesce(search_name_count, 0) as count';
$sSQL .= ' FROM word WHERE word_token in (';
$sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
Debug::printSQL($sSQL);
$aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
foreach ($aDBWords as $aWord) {
$oToken = null;
$iId = (int) $aWord['word_id'];
if ($aWord['class']) {
// Special terms need to appear in their normalized form.
if ($aWord['word']) {
$sNormWord = $aWord['word'];
if ($oNormalizer != null) {
$sNormWord = $oNormalizer->transliterate($aWord['word']);
}
if (strpos($sNormQuery, $sNormWord) === false) {
continue;
}
}
if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
$oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
} elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
if ($aWord['word']
&& pg_escape_string($aWord['word']) == $aWord['word']
) {
$oToken = new Token\Postcode(
$iId,
$aWord['word'],
$aWord['country_code']
);
}
} else {
// near and in operator the same at the moment
$oToken = new Token\SpecialTerm(
$iId,
$aWord['class'],
$aWord['type'],
$aWord['operator'] ? Operator::NEAR : Operator::NONE
);
}
} elseif ($aWord['country_code']) {
// Filter country tokens that do not match restricted countries.
if (!$aCountryCodes
|| in_array($aWord['country_code'], $aCountryCodes)
) {
$oToken = new Token\Country($iId, $aWord['country_code']);
}
} else {
$oToken = new Token\Word(
$iId,
$aWord['word_token'][0] != ' ',
(int) $aWord['count'],
substr_count($aWord['word_token'], ' ')
);
}
if ($oToken) {
$this->addToken($aWord['word_token'], $oToken);
}
}
}
/**
* Add a new token for the given word.
*