Merge pull request #1033 from lonvia/remove-word-frequency-scores

Replace word frequency hash
This commit is contained in:
Sarah Hoffmann
2018-05-07 20:59:20 +02:00
committed by GitHub
2 changed files with 21 additions and 19 deletions

View File

@@ -658,7 +658,6 @@ class Geocode
$this->oDB->getAll($sSQL), $this->oDB->getAll($sSQL),
'Could not get word tokens.' 'Could not get word tokens.'
); );
$aWordFrequencyScores = array();
foreach ($aDatabaseWords as $aToken) { foreach ($aDatabaseWords as $aToken) {
// Filter country tokens that do not match restricted countries. // Filter country tokens that do not match restricted countries.
if ($this->aCountryCodes if ($this->aCountryCodes
@@ -681,7 +680,6 @@ class Geocode
} else { } else {
$aValidTokens[$aToken['word_token']] = array($aToken); $aValidTokens[$aToken['word_token']] = array($aToken);
} }
$aWordFrequencyScores[$aToken['word_id']] = $aToken['search_name_count'] + 1;
} }
// US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
@@ -781,7 +779,6 @@ class Geocode
$aResults += $oSearch->query( $aResults += $oSearch->query(
$this->oDB, $this->oDB,
$aWordFrequencyScores,
$this->iMinAddressRank, $this->iMinAddressRank,
$this->iMaxAddressRank, $this->iMaxAddressRank,
$this->iLimit $this->iLimit

View File

@@ -17,6 +17,8 @@ class SearchDescription
private $sCountryCode = ''; private $sCountryCode = '';
/// List of word ids making up the name of the object. /// List of word ids making up the name of the object.
private $aName = array(); private $aName = array();
/// True if the name is rare enough to force index use on name.
private $bRareName = false;
/// List of word ids making up the address of the object. /// List of word ids making up the address of the object.
private $aAddress = array(); private $aAddress = array();
/// Subset of word ids of full words making up the address. /// Subset of word ids of full words making up the address.
@@ -292,6 +294,11 @@ class SearchDescription
$oSearch = clone $this; $oSearch = clone $this;
$oSearch->iSearchRank++; $oSearch->iSearchRank++;
$oSearch->aName = array($iWordID => $iWordID); $oSearch->aName = array($iWordID => $iWordID);
if (CONST_Search_NameOnlySearchFrequencyThreshold) {
$oSearch->bRareName =
$aSearchTerm['search_name_count'] + 1
< CONST_Search_NameOnlySearchFrequencyThreshold;
}
$aNewSearches[] = $oSearch; $aNewSearches[] = $oSearch;
} }
} }
@@ -368,6 +375,13 @@ class SearchDescription
$oSearch->iSearchRank += 2; $oSearch->iSearchRank += 2;
} }
if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) { if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
if (empty($this->aName) && CONST_Search_NameOnlySearchFrequencyThreshold) {
$oSearch->bRareName =
$aSearchTerm['search_name_count'] + 1
< CONST_Search_NameOnlySearchFrequencyThreshold;
} else {
$oSearch->bRareName = false;
}
$oSearch->aName[$iWordID] = $iWordID; $oSearch->aName[$iWordID] = $iWordID;
} else { } else {
$oSearch->aNameNonSearch[$iWordID] = $iWordID; $oSearch->aNameNonSearch[$iWordID] = $iWordID;
@@ -385,20 +399,16 @@ class SearchDescription
/** /**
* Query database for places that match this search. * Query database for places that match this search.
* *
* @param object $oDB Database connection to use. * @param object $oDB Database connection to use.
* @param mixed[] $aWordFrequencyScores Number of times tokens appears * @param integer $iMinRank Minimum address rank to restrict search to.
* overall in a planet database. * @param integer $iMaxRank Maximum address rank to restrict search to.
* @param integer $iMinRank Minimum address rank to restrict * @param integer $iLimit Maximum number of results.
* search to.
* @param integer $iMaxRank Maximum address rank to restrict
* search to.
* @param integer $iLimit Maximum number of results.
* *
* @return mixed[] An array with two fields: IDs contains the list of * @return mixed[] An array with two fields: IDs contains the list of
* matching place IDs and houseNumber the houseNumber * matching place IDs and houseNumber the houseNumber
* if appicable or -1 if not. * if appicable or -1 if not.
*/ */
public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit) public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
{ {
$aResults = array(); $aResults = array();
$iHousenumber = -1; $iHousenumber = -1;
@@ -427,7 +437,6 @@ class SearchDescription
// First search for places according to name and address. // First search for places according to name and address.
$aResults = $this->queryNamedPlace( $aResults = $this->queryNamedPlace(
$oDB, $oDB,
$aWordFrequencyScores,
$iMinRank, $iMinRank,
$iMaxRank, $iMaxRank,
$iLimit $iLimit
@@ -579,7 +588,7 @@ class SearchDescription
return $aResults; return $aResults;
} }
private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit) private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit)
{ {
$aTerms = array(); $aTerms = array();
$aOrder = array(); $aOrder = array();
@@ -615,11 +624,7 @@ class SearchDescription
} }
if (!empty($this->aAddress)) { if (!empty($this->aAddress)) {
// For infrequent name terms disable index usage for address // For infrequent name terms disable index usage for address
if (CONST_Search_NameOnlySearchFrequencyThreshold if ($this->bRareName) {
&& count($this->aName) == 1
&& $aWordFrequencyScores[$this->aName[reset($this->aName)]]
< CONST_Search_NameOnlySearchFrequencyThreshold
) {
$aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress); $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
} else { } else {
$aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress); $aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);