fix warming for ICU tokenizer

Running the warm-up search requests requires querying
the most frequent words. This must be done via the tokenizer
to honor the different formats of the word table.
This commit is contained in:
Sarah Hoffmann
2021-10-25 13:08:16 +02:00
parent 13e7398566
commit 16cc395f78
3 changed files with 23 additions and 1 deletions

View File

@@ -40,6 +40,15 @@ class Tokenizer
return $this->oNormalizer->transliterate($sTerm);
}
public function mostFrequentWords($iNum)
{
$sSQL = "SELECT word FROM word WHERE type = 'W'";
$sSQL .= "ORDER BY info->'count' DESC LIMIT ".$iNum;
return $this->oDB->getCol($sSQL);
}
private function makeStandardWord($sTerm)
{
return trim($this->oTransliterator->transliterate(' '.$sTerm.' '));