remove country restriction from tokenizer

Restricting tokens due to the search context is better done in
the generic search part instead of repeating the same test in
every tokenizer implementation.
This commit is contained in:
Sarah Hoffmann
2021-07-29 21:25:59 +02:00
parent 78d11fe628
commit 0fb8eade13
5 changed files with 21 additions and 26 deletions

View File

@@ -8,7 +8,6 @@ class Tokenizer
private $oNormalizer;
private $oTransliterator;
private $aCountryRestriction;
public function __construct(&$oDB)
{
@@ -30,12 +29,6 @@ class Tokenizer
}
public function setCountryRestriction($aCountries)
{
$this->aCountryRestriction = $aCountries;
}
public function normalizeString($sTerm)
{
if ($this->oNormalizer === null) {
@@ -162,10 +155,7 @@ class Tokenizer
switch ($aWord['type']) {
case 'C': // country name tokens
if ($aWord['word'] !== null
&& (!$this->aCountryRestriction
|| in_array($aWord['word'], $this->aCountryRestriction))
) {
if ($aWord['word'] !== null) {
$oValidTokens->addToken(
$sTok,
new Token\Country($iId, $aWord['word'])

View File

@@ -7,7 +7,6 @@ class Tokenizer
private $oDB;
private $oNormalizer = null;
private $aCountryRestriction = null;
public function __construct(&$oDB)
{
@@ -37,12 +36,6 @@ class Tokenizer
}
public function setCountryRestriction($aCountries)
{
$this->aCountryRestriction = $aCountries;
}
public function normalizeString($sTerm)
{
if ($this->oNormalizer === null) {
@@ -206,12 +199,7 @@ class Tokenizer
);
}
} elseif ($aWord['country_code']) {
// Filter country tokens that do not match restricted countries.
if (!$this->aCountryRestriction
|| in_array($aWord['country_code'], $this->aCountryRestriction)
) {
$oToken = new Token\Country($iId, $aWord['country_code']);
}
$oToken = new Token\Country($iId, $aWord['country_code']);
} elseif ($aWord['word_token'][0] == ' ') {
$oToken = new Token\Word(
$iId,