forked from hans/Nominatim
fix warming for ICU tokenizer
Running the warm-up search requests requires querying the most frequent words. This must be done via the tokenizer to honor the different formats of the word table.
This commit is contained in:
@@ -86,8 +86,13 @@ if (!$aResult['reverse-only']) {
|
|||||||
if ($bVerbose) {
|
if ($bVerbose) {
|
||||||
echo "\n";
|
echo "\n";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$oTokenizer = new \Nominatim\Tokenizer($oDB);
|
||||||
|
|
||||||
|
$aWords = $oTokenizer->mostFrequentWords(1000);
|
||||||
|
|
||||||
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
|
$sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
|
||||||
foreach ($oDB->getCol($sSQL) as $sWord) {
|
foreach ($aWords as $sWord) {
|
||||||
if ($bVerbose) {
|
if ($bVerbose) {
|
||||||
echo "$sWord = ";
|
echo "$sWord = ";
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,6 +40,15 @@ class Tokenizer
|
|||||||
return $this->oNormalizer->transliterate($sTerm);
|
return $this->oNormalizer->transliterate($sTerm);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public function mostFrequentWords($iNum)
|
||||||
|
{
|
||||||
|
$sSQL = "SELECT word FROM word WHERE type = 'W'";
|
||||||
|
$sSQL .= "ORDER BY info->'count' DESC LIMIT ".$iNum;
|
||||||
|
return $this->oDB->getCol($sSQL);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
private function makeStandardWord($sTerm)
|
private function makeStandardWord($sTerm)
|
||||||
{
|
{
|
||||||
return trim($this->oTransliterator->transliterate(' '.$sTerm.' '));
|
return trim($this->oTransliterator->transliterate(' '.$sTerm.' '));
|
||||||
|
|||||||
@@ -48,6 +48,14 @@ class Tokenizer
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
public function mostFrequentWords($iNum)
|
||||||
|
{
|
||||||
|
$sSQL = 'SELECT word FROM word WHERE word is not null ';
|
||||||
|
$sSQL .= 'ORDER BY search_name_count DESC LIMIT '.$iNum;
|
||||||
|
return $this->oDB->getCol($sSQL);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
public function tokensForSpecialTerm($sTerm)
|
public function tokensForSpecialTerm($sTerm)
|
||||||
{
|
{
|
||||||
$aResults = array();
|
$aResults = array();
|
||||||
|
|||||||
Reference in New Issue
Block a user