mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 05:18:00 +00:00
fix warming for ICU tokenizer
Running the warm-up search requests requires querying the most frequent words. This must be done via the tokenizer to honor the different formats of the word table.
This commit is contained in:
@@ -86,8 +86,13 @@ if (!$aResult['reverse-only']) {
|
||||
if ($bVerbose) {
|
||||
echo "\n";
|
||||
}
|
||||
|
||||
$oTokenizer = new \Nominatim\Tokenizer($oDB);
|
||||
|
||||
$aWords = $oTokenizer->mostFrequentWords(1000);
|
||||
|
||||
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
|
||||
foreach ($oDB->getCol($sSQL) as $sWord) {
|
||||
foreach ($aWords as $sWord) {
|
||||
if ($bVerbose) {
|
||||
echo "$sWord = ";
|
||||
}
|
||||
|
||||
@@ -40,6 +40,15 @@ class Tokenizer
|
||||
return $this->oNormalizer->transliterate($sTerm);
|
||||
}
|
||||
|
||||
|
||||
public function mostFrequentWords($iNum)
|
||||
{
|
||||
$sSQL = "SELECT word FROM word WHERE type = 'W'";
|
||||
$sSQL .= "ORDER BY info->'count' DESC LIMIT ".$iNum;
|
||||
return $this->oDB->getCol($sSQL);
|
||||
}
|
||||
|
||||
|
||||
private function makeStandardWord($sTerm)
|
||||
{
|
||||
return trim($this->oTransliterator->transliterate(' '.$sTerm.' '));
|
||||
|
||||
@@ -48,6 +48,14 @@ class Tokenizer
|
||||
}
|
||||
|
||||
|
||||
public function mostFrequentWords($iNum)
|
||||
{
|
||||
$sSQL = 'SELECT word FROM word WHERE word is not null ';
|
||||
$sSQL .= 'ORDER BY search_name_count DESC LIMIT '.$iNum;
|
||||
return $this->oDB->getCol($sSQL);
|
||||
}
|
||||
|
||||
|
||||
public function tokensForSpecialTerm($sTerm)
|
||||
{
|
||||
$aResults = array();
|
||||
|
||||
Reference in New Issue
Block a user