mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
Merge pull request #1033 from lonvia/remove-word-frequency-scores
Replace word frequency hash
This commit is contained in:
@@ -658,7 +658,6 @@ class Geocode
|
|||||||
$this->oDB->getAll($sSQL),
|
$this->oDB->getAll($sSQL),
|
||||||
'Could not get word tokens.'
|
'Could not get word tokens.'
|
||||||
);
|
);
|
||||||
$aWordFrequencyScores = array();
|
|
||||||
foreach ($aDatabaseWords as $aToken) {
|
foreach ($aDatabaseWords as $aToken) {
|
||||||
// Filter country tokens that do not match restricted countries.
|
// Filter country tokens that do not match restricted countries.
|
||||||
if ($this->aCountryCodes
|
if ($this->aCountryCodes
|
||||||
@@ -681,7 +680,6 @@ class Geocode
|
|||||||
} else {
|
} else {
|
||||||
$aValidTokens[$aToken['word_token']] = array($aToken);
|
$aValidTokens[$aToken['word_token']] = array($aToken);
|
||||||
}
|
}
|
||||||
$aWordFrequencyScores[$aToken['word_id']] = $aToken['search_name_count'] + 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
|
// US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
|
||||||
@@ -781,7 +779,6 @@ class Geocode
|
|||||||
|
|
||||||
$aResults += $oSearch->query(
|
$aResults += $oSearch->query(
|
||||||
$this->oDB,
|
$this->oDB,
|
||||||
$aWordFrequencyScores,
|
|
||||||
$this->iMinAddressRank,
|
$this->iMinAddressRank,
|
||||||
$this->iMaxAddressRank,
|
$this->iMaxAddressRank,
|
||||||
$this->iLimit
|
$this->iLimit
|
||||||
|
|||||||
@@ -17,6 +17,8 @@ class SearchDescription
|
|||||||
private $sCountryCode = '';
|
private $sCountryCode = '';
|
||||||
/// List of word ids making up the name of the object.
|
/// List of word ids making up the name of the object.
|
||||||
private $aName = array();
|
private $aName = array();
|
||||||
|
/// True if the name is rare enough to force index use on name.
|
||||||
|
private $bRareName = false;
|
||||||
/// List of word ids making up the address of the object.
|
/// List of word ids making up the address of the object.
|
||||||
private $aAddress = array();
|
private $aAddress = array();
|
||||||
/// Subset of word ids of full words making up the address.
|
/// Subset of word ids of full words making up the address.
|
||||||
@@ -292,6 +294,11 @@ class SearchDescription
|
|||||||
$oSearch = clone $this;
|
$oSearch = clone $this;
|
||||||
$oSearch->iSearchRank++;
|
$oSearch->iSearchRank++;
|
||||||
$oSearch->aName = array($iWordID => $iWordID);
|
$oSearch->aName = array($iWordID => $iWordID);
|
||||||
|
if (CONST_Search_NameOnlySearchFrequencyThreshold) {
|
||||||
|
$oSearch->bRareName =
|
||||||
|
$aSearchTerm['search_name_count'] + 1
|
||||||
|
< CONST_Search_NameOnlySearchFrequencyThreshold;
|
||||||
|
}
|
||||||
$aNewSearches[] = $oSearch;
|
$aNewSearches[] = $oSearch;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -368,6 +375,13 @@ class SearchDescription
|
|||||||
$oSearch->iSearchRank += 2;
|
$oSearch->iSearchRank += 2;
|
||||||
}
|
}
|
||||||
if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
|
if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
|
||||||
|
if (empty($this->aName) && CONST_Search_NameOnlySearchFrequencyThreshold) {
|
||||||
|
$oSearch->bRareName =
|
||||||
|
$aSearchTerm['search_name_count'] + 1
|
||||||
|
< CONST_Search_NameOnlySearchFrequencyThreshold;
|
||||||
|
} else {
|
||||||
|
$oSearch->bRareName = false;
|
||||||
|
}
|
||||||
$oSearch->aName[$iWordID] = $iWordID;
|
$oSearch->aName[$iWordID] = $iWordID;
|
||||||
} else {
|
} else {
|
||||||
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
|
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
|
||||||
@@ -385,20 +399,16 @@ class SearchDescription
|
|||||||
/**
|
/**
|
||||||
* Query database for places that match this search.
|
* Query database for places that match this search.
|
||||||
*
|
*
|
||||||
* @param object $oDB Database connection to use.
|
* @param object $oDB Database connection to use.
|
||||||
* @param mixed[] $aWordFrequencyScores Number of times tokens appears
|
* @param integer $iMinRank Minimum address rank to restrict search to.
|
||||||
* overall in a planet database.
|
* @param integer $iMaxRank Maximum address rank to restrict search to.
|
||||||
* @param integer $iMinRank Minimum address rank to restrict
|
* @param integer $iLimit Maximum number of results.
|
||||||
* search to.
|
|
||||||
* @param integer $iMaxRank Maximum address rank to restrict
|
|
||||||
* search to.
|
|
||||||
* @param integer $iLimit Maximum number of results.
|
|
||||||
*
|
*
|
||||||
* @return mixed[] An array with two fields: IDs contains the list of
|
* @return mixed[] An array with two fields: IDs contains the list of
|
||||||
* matching place IDs and houseNumber the houseNumber
|
* matching place IDs and houseNumber the houseNumber
|
||||||
* if appicable or -1 if not.
|
* if appicable or -1 if not.
|
||||||
*/
|
*/
|
||||||
public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit)
|
public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
|
||||||
{
|
{
|
||||||
$aResults = array();
|
$aResults = array();
|
||||||
$iHousenumber = -1;
|
$iHousenumber = -1;
|
||||||
@@ -427,7 +437,6 @@ class SearchDescription
|
|||||||
// First search for places according to name and address.
|
// First search for places according to name and address.
|
||||||
$aResults = $this->queryNamedPlace(
|
$aResults = $this->queryNamedPlace(
|
||||||
$oDB,
|
$oDB,
|
||||||
$aWordFrequencyScores,
|
|
||||||
$iMinRank,
|
$iMinRank,
|
||||||
$iMaxRank,
|
$iMaxRank,
|
||||||
$iLimit
|
$iLimit
|
||||||
@@ -579,7 +588,7 @@ class SearchDescription
|
|||||||
return $aResults;
|
return $aResults;
|
||||||
}
|
}
|
||||||
|
|
||||||
private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit)
|
private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit)
|
||||||
{
|
{
|
||||||
$aTerms = array();
|
$aTerms = array();
|
||||||
$aOrder = array();
|
$aOrder = array();
|
||||||
@@ -615,11 +624,7 @@ class SearchDescription
|
|||||||
}
|
}
|
||||||
if (!empty($this->aAddress)) {
|
if (!empty($this->aAddress)) {
|
||||||
// For infrequent name terms disable index usage for address
|
// For infrequent name terms disable index usage for address
|
||||||
if (CONST_Search_NameOnlySearchFrequencyThreshold
|
if ($this->bRareName) {
|
||||||
&& count($this->aName) == 1
|
|
||||||
&& $aWordFrequencyScores[$this->aName[reset($this->aName)]]
|
|
||||||
< CONST_Search_NameOnlySearchFrequencyThreshold
|
|
||||||
) {
|
|
||||||
$aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
|
$aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
|
||||||
} else {
|
} else {
|
||||||
$aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);
|
$aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);
|
||||||
|
|||||||
Reference in New Issue
Block a user