mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
fully evaluate reverse order of queries
Factors out computation of search group and calls the code once for forward evaluation and wants with reversed word order.
This commit is contained in:
596
lib/Geocode.php
596
lib/Geocode.php
@@ -441,6 +441,290 @@
|
|||||||
return $aSearchResults;
|
return $aSearchResults;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
Calculate all searches using aValidTokens i.e.
|
||||||
|
'Wodsworth Road, Sheffield' =>
|
||||||
|
|
||||||
|
Phrase Wordset
|
||||||
|
0 0 (wodsworth road)
|
||||||
|
0 1 (wodsworth)(road)
|
||||||
|
1 0 (sheffield)
|
||||||
|
|
||||||
|
Score how good the search is so they can be ordered
|
||||||
|
*/
|
||||||
|
foreach($aPhrases as $iPhrase => $sPhrase)
|
||||||
|
{
|
||||||
|
$aNewPhraseSearches = array();
|
||||||
|
if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
|
||||||
|
else $sPhraseType = '';
|
||||||
|
|
||||||
|
foreach($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset)
|
||||||
|
{
|
||||||
|
// Too many permutations - too expensive
|
||||||
|
if ($iWordSet > 120) break;
|
||||||
|
|
||||||
|
$aWordsetSearches = $aSearches;
|
||||||
|
|
||||||
|
// Add all words from this wordset
|
||||||
|
foreach($aWordset as $iToken => $sToken)
|
||||||
|
{
|
||||||
|
//echo "<br><b>$sToken</b>";
|
||||||
|
$aNewWordsetSearches = array();
|
||||||
|
|
||||||
|
foreach($aWordsetSearches as $aCurrentSearch)
|
||||||
|
{
|
||||||
|
//echo "<i>";
|
||||||
|
//var_dump($aCurrentSearch);
|
||||||
|
//echo "</i>";
|
||||||
|
|
||||||
|
// If the token is valid
|
||||||
|
if (isset($aValidTokens[' '.$sToken]))
|
||||||
|
{
|
||||||
|
foreach($aValidTokens[' '.$sToken] as $aSearchTerm)
|
||||||
|
{
|
||||||
|
$aSearch = $aCurrentSearch;
|
||||||
|
$aSearch['iSearchRank']++;
|
||||||
|
if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0')
|
||||||
|
{
|
||||||
|
if ($aSearch['sCountryCode'] === false)
|
||||||
|
{
|
||||||
|
$aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']);
|
||||||
|
// Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation)
|
||||||
|
if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases)))
|
||||||
|
{
|
||||||
|
$aSearch['iSearchRank'] += 5;
|
||||||
|
}
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null)
|
||||||
|
{
|
||||||
|
if ($aSearch['fLat'] === '')
|
||||||
|
{
|
||||||
|
$aSearch['fLat'] = $aSearchTerm['lat'];
|
||||||
|
$aSearch['fLon'] = $aSearchTerm['lon'];
|
||||||
|
$aSearch['fRadius'] = $aSearchTerm['radius'];
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elseif ($sPhraseType == 'postalcode')
|
||||||
|
{
|
||||||
|
// We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
|
||||||
|
if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
|
||||||
|
{
|
||||||
|
// If we already have a name try putting the postcode first
|
||||||
|
if (sizeof($aSearch['aName']))
|
||||||
|
{
|
||||||
|
$aNewSearch = $aSearch;
|
||||||
|
$aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
|
||||||
|
$aNewSearch['aName'] = array();
|
||||||
|
$aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (sizeof($aSearch['aName']))
|
||||||
|
{
|
||||||
|
if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
|
||||||
|
{
|
||||||
|
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
$aSearch['iSearchRank'] += 1000; // skip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
//$aSearch['iNamePhrase'] = $iPhrase;
|
||||||
|
}
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house')
|
||||||
|
{
|
||||||
|
if ($aSearch['sHouseNumber'] === '')
|
||||||
|
{
|
||||||
|
$aSearch['sHouseNumber'] = $sToken;
|
||||||
|
// sanity check: if the housenumber is not mainly made
|
||||||
|
// up of numbers, add a penalty
|
||||||
|
if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++;
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
/*
|
||||||
|
// Fall back to not searching for this item (better than nothing)
|
||||||
|
$aSearch = $aCurrentSearch;
|
||||||
|
$aSearch['iSearchRank'] += 1;
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
*/
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null)
|
||||||
|
{
|
||||||
|
if ($aSearch['sClass'] === '')
|
||||||
|
{
|
||||||
|
$aSearch['sOperator'] = $aSearchTerm['operator'];
|
||||||
|
$aSearch['sClass'] = $aSearchTerm['class'];
|
||||||
|
$aSearch['sType'] = $aSearchTerm['type'];
|
||||||
|
if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
|
||||||
|
else $aSearch['sOperator'] = 'near'; // near = in for the moment
|
||||||
|
if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
|
||||||
|
|
||||||
|
// Do we have a shortcut id?
|
||||||
|
if ($aSearch['sOperator'] == 'name')
|
||||||
|
{
|
||||||
|
$sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')";
|
||||||
|
if ($iAmenityID = $this->oDB->getOne($sSQL))
|
||||||
|
{
|
||||||
|
$aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID);
|
||||||
|
$aSearch['aName'][$iAmenityID] = $iAmenityID;
|
||||||
|
$aSearch['sClass'] = '';
|
||||||
|
$aSearch['sType'] = '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
|
||||||
|
{
|
||||||
|
if (sizeof($aSearch['aName']))
|
||||||
|
{
|
||||||
|
if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
|
||||||
|
{
|
||||||
|
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
$aSearch['iSearchRank'] += 1000; // skip;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
//$aSearch['iNamePhrase'] = $iPhrase;
|
||||||
|
}
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (isset($aValidTokens[$sToken]))
|
||||||
|
{
|
||||||
|
// Allow searching for a word - but at extra cost
|
||||||
|
foreach($aValidTokens[$sToken] as $aSearchTerm)
|
||||||
|
{
|
||||||
|
if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
|
||||||
|
{
|
||||||
|
if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false)
|
||||||
|
{
|
||||||
|
$aSearch = $aCurrentSearch;
|
||||||
|
$aSearch['iSearchRank'] += 1;
|
||||||
|
if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
|
||||||
|
{
|
||||||
|
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version?
|
||||||
|
{
|
||||||
|
$aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
$aSearch['iSearchRank'] += 1;
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
foreach($aValidTokens[' '.$sToken] as $aSearchTermToken)
|
||||||
|
{
|
||||||
|
if (empty($aSearchTermToken['country_code'])
|
||||||
|
&& empty($aSearchTermToken['lat'])
|
||||||
|
&& empty($aSearchTermToken['class']))
|
||||||
|
{
|
||||||
|
$aSearch = $aCurrentSearch;
|
||||||
|
$aSearch['iSearchRank'] += 1;
|
||||||
|
$aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
$aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
|
||||||
|
{
|
||||||
|
$aSearch = $aCurrentSearch;
|
||||||
|
$aSearch['iSearchRank'] += 1;
|
||||||
|
if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1;
|
||||||
|
if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
|
||||||
|
if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
|
||||||
|
$aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
else
|
||||||
|
$aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
$aSearch['iNamePhrase'] = $iPhrase;
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Allow skipping a word - but at EXTREAM cost
|
||||||
|
//$aSearch = $aCurrentSearch;
|
||||||
|
//$aSearch['iSearchRank']+=100;
|
||||||
|
//$aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// Sort and cut
|
||||||
|
usort($aNewWordsetSearches, 'bySearchRank');
|
||||||
|
$aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50);
|
||||||
|
}
|
||||||
|
//var_Dump('<hr>',sizeof($aWordsetSearches)); exit;
|
||||||
|
|
||||||
|
$aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches);
|
||||||
|
usort($aNewPhraseSearches, 'bySearchRank');
|
||||||
|
|
||||||
|
$aSearchHash = array();
|
||||||
|
foreach($aNewPhraseSearches as $iSearch => $aSearch)
|
||||||
|
{
|
||||||
|
$sHash = serialize($aSearch);
|
||||||
|
if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]);
|
||||||
|
else $aSearchHash[$sHash] = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
$aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Re-group the searches by their score, junk anything over 20 as just not worth trying
|
||||||
|
$aGroupedSearches = array();
|
||||||
|
foreach($aNewPhraseSearches as $aSearch)
|
||||||
|
{
|
||||||
|
if ($aSearch['iSearchRank'] < $this->iMaxRank)
|
||||||
|
{
|
||||||
|
if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array();
|
||||||
|
$aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ksort($aGroupedSearches);
|
||||||
|
|
||||||
|
$iSearchCount = 0;
|
||||||
|
$aSearches = array();
|
||||||
|
foreach($aGroupedSearches as $iScore => $aNewSearches)
|
||||||
|
{
|
||||||
|
$iSearchCount += sizeof($aNewSearches);
|
||||||
|
$aSearches = array_merge($aSearches, $aNewSearches);
|
||||||
|
if ($iSearchCount > 50) break;
|
||||||
|
}
|
||||||
|
|
||||||
|
//if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
|
||||||
|
|
||||||
|
}
|
||||||
|
return $aGroupedSearches;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
/* Perform the actual query lookup.
|
/* Perform the actual query lookup.
|
||||||
|
|
||||||
Returns an ordered list of results, each with the following fields:
|
Returns an ordered list of results, each with the following fields:
|
||||||
@@ -755,287 +1039,38 @@
|
|||||||
// Start the search process
|
// Start the search process
|
||||||
$aResultPlaceIDs = array();
|
$aResultPlaceIDs = array();
|
||||||
|
|
||||||
/*
|
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases);
|
||||||
Calculate all searches using aValidTokens i.e.
|
|
||||||
'Wodsworth Road, Sheffield' =>
|
|
||||||
|
|
||||||
Phrase Wordset
|
if ($this->bReverseInPlan)
|
||||||
0 0 (wodsworth road)
|
|
||||||
0 1 (wodsworth)(road)
|
|
||||||
1 0 (sheffield)
|
|
||||||
|
|
||||||
Score how good the search is so they can be ordered
|
|
||||||
*/
|
|
||||||
foreach($aPhrases as $iPhrase => $sPhrase)
|
|
||||||
{
|
{
|
||||||
$aNewPhraseSearches = array();
|
// Reverse phrase array and also reverse the order of the wordsets in
|
||||||
if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
|
// the first and final phrase. Don't bother about phrases in the middle
|
||||||
else $sPhraseType = '';
|
// because order in the address doesn't matter.
|
||||||
|
$aPhrases = array_reverse($aPhrases);
|
||||||
foreach($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset)
|
$aPhrases[0]['wordsets'] = getInverseWordSets($aPhrases[0]['words'], 0);
|
||||||
|
if (sizeof($aPhrases) > 1)
|
||||||
{
|
{
|
||||||
// Too many permutations - too expensive
|
$aFinalPhrase = end($aPhrases);
|
||||||
if ($iWordSet > 120) break;
|
$aFinalPhrase['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
|
||||||
|
}
|
||||||
|
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, false);
|
||||||
|
|
||||||
$aWordsetSearches = $aSearches;
|
foreach($aGroupedSearches as $aSearches)
|
||||||
|
{
|
||||||
// Add all words from this wordset
|
foreach($aSearches as $aSearch)
|
||||||
foreach($aWordset as $iToken => $sToken)
|
|
||||||
{
|
{
|
||||||
//echo "<br><b>$sToken</b>";
|
if ($aSearch['iSearchRank'] < $this->iMaxRank)
|
||||||
$aNewWordsetSearches = array();
|
|
||||||
|
|
||||||
foreach($aWordsetSearches as $aCurrentSearch)
|
|
||||||
{
|
{
|
||||||
//echo "<i>";
|
if (!isset($aReverseGroupedSearches[$aSearch['iSearchRank']])) $aReverseGroupedSearches[$aSearch['iSearchRank']] = array();
|
||||||
//var_dump($aCurrentSearch);
|
$aReverseGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
|
||||||
//echo "</i>";
|
|
||||||
|
|
||||||
// If the token is valid
|
|
||||||
if (isset($aValidTokens[' '.$sToken]))
|
|
||||||
{
|
|
||||||
foreach($aValidTokens[' '.$sToken] as $aSearchTerm)
|
|
||||||
{
|
|
||||||
$aSearch = $aCurrentSearch;
|
|
||||||
$aSearch['iSearchRank']++;
|
|
||||||
if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0')
|
|
||||||
{
|
|
||||||
if ($aSearch['sCountryCode'] === false)
|
|
||||||
{
|
|
||||||
$aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']);
|
|
||||||
// Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation)
|
|
||||||
// If reverse order is enabled, it may appear at the beginning as well.
|
|
||||||
if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases)) &&
|
|
||||||
(!$this->bReverseInPlan || $iToken > 0 || $iPhrase > 0))
|
|
||||||
{
|
|
||||||
$aSearch['iSearchRank'] += 5;
|
|
||||||
}
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null)
|
|
||||||
{
|
|
||||||
if ($aSearch['fLat'] === '')
|
|
||||||
{
|
|
||||||
$aSearch['fLat'] = $aSearchTerm['lat'];
|
|
||||||
$aSearch['fLon'] = $aSearchTerm['lon'];
|
|
||||||
$aSearch['fRadius'] = $aSearchTerm['radius'];
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
elseif ($sPhraseType == 'postalcode')
|
|
||||||
{
|
|
||||||
// We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
|
|
||||||
if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
|
|
||||||
{
|
|
||||||
// If we already have a name try putting the postcode first
|
|
||||||
if (sizeof($aSearch['aName']))
|
|
||||||
{
|
|
||||||
$aNewSearch = $aSearch;
|
|
||||||
$aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
|
|
||||||
$aNewSearch['aName'] = array();
|
|
||||||
$aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (sizeof($aSearch['aName']))
|
|
||||||
{
|
|
||||||
if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
|
|
||||||
{
|
|
||||||
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
$aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
$aSearch['iSearchRank'] += 1000; // skip;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
$aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
//$aSearch['iNamePhrase'] = $iPhrase;
|
|
||||||
}
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house')
|
|
||||||
{
|
|
||||||
if ($aSearch['sHouseNumber'] === '')
|
|
||||||
{
|
|
||||||
$aSearch['sHouseNumber'] = $sToken;
|
|
||||||
// sanity check: if the housenumber is not mainly made
|
|
||||||
// up of numbers, add a penalty
|
|
||||||
if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++;
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
/*
|
|
||||||
// Fall back to not searching for this item (better than nothing)
|
|
||||||
$aSearch = $aCurrentSearch;
|
|
||||||
$aSearch['iSearchRank'] += 1;
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
*/
|
|
||||||
}
|
|
||||||
}
|
|
||||||
elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null)
|
|
||||||
{
|
|
||||||
if ($aSearch['sClass'] === '')
|
|
||||||
{
|
|
||||||
$aSearch['sOperator'] = $aSearchTerm['operator'];
|
|
||||||
$aSearch['sClass'] = $aSearchTerm['class'];
|
|
||||||
$aSearch['sType'] = $aSearchTerm['type'];
|
|
||||||
if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
|
|
||||||
else $aSearch['sOperator'] = 'near'; // near = in for the moment
|
|
||||||
if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
|
|
||||||
|
|
||||||
// Do we have a shortcut id?
|
|
||||||
if ($aSearch['sOperator'] == 'name')
|
|
||||||
{
|
|
||||||
$sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')";
|
|
||||||
if ($iAmenityID = $this->oDB->getOne($sSQL))
|
|
||||||
{
|
|
||||||
$aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID);
|
|
||||||
$aSearch['aName'][$iAmenityID] = $iAmenityID;
|
|
||||||
$aSearch['sClass'] = '';
|
|
||||||
$aSearch['sType'] = '';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
|
|
||||||
{
|
|
||||||
if (sizeof($aSearch['aName']))
|
|
||||||
{
|
|
||||||
if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
|
|
||||||
{
|
|
||||||
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
$aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
$aSearch['iSearchRank'] += 1000; // skip;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
$aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
//$aSearch['iNamePhrase'] = $iPhrase;
|
|
||||||
}
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (isset($aValidTokens[$sToken]))
|
|
||||||
{
|
|
||||||
// Allow searching for a word - but at extra cost
|
|
||||||
foreach($aValidTokens[$sToken] as $aSearchTerm)
|
|
||||||
{
|
|
||||||
if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
|
|
||||||
{
|
|
||||||
if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false)
|
|
||||||
{
|
|
||||||
$aSearch = $aCurrentSearch;
|
|
||||||
$aSearch['iSearchRank'] += 1;
|
|
||||||
if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
|
|
||||||
{
|
|
||||||
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version?
|
|
||||||
{
|
|
||||||
$aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
$aSearch['iSearchRank'] += 1;
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
foreach($aValidTokens[' '.$sToken] as $aSearchTermToken)
|
|
||||||
{
|
|
||||||
if (empty($aSearchTermToken['country_code'])
|
|
||||||
&& empty($aSearchTermToken['lat'])
|
|
||||||
&& empty($aSearchTermToken['class']))
|
|
||||||
{
|
|
||||||
$aSearch = $aCurrentSearch;
|
|
||||||
$aSearch['iSearchRank'] += 1;
|
|
||||||
$aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
$aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
|
|
||||||
{
|
|
||||||
$aSearch = $aCurrentSearch;
|
|
||||||
$aSearch['iSearchRank'] += 1;
|
|
||||||
if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1;
|
|
||||||
if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
|
|
||||||
if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
|
|
||||||
$aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
else
|
|
||||||
$aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
|
||||||
$aSearch['iNamePhrase'] = $iPhrase;
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Allow skipping a word - but at EXTREAM cost
|
|
||||||
//$aSearch = $aCurrentSearch;
|
|
||||||
//$aSearch['iSearchRank']+=100;
|
|
||||||
//$aNewWordsetSearches[] = $aSearch;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
// Sort and cut
|
|
||||||
usort($aNewWordsetSearches, 'bySearchRank');
|
|
||||||
$aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50);
|
|
||||||
}
|
|
||||||
//var_Dump('<hr>',sizeof($aWordsetSearches)); exit;
|
|
||||||
|
|
||||||
$aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches);
|
|
||||||
usort($aNewPhraseSearches, 'bySearchRank');
|
|
||||||
|
|
||||||
$aSearchHash = array();
|
|
||||||
foreach($aNewPhraseSearches as $iSearch => $aSearch)
|
|
||||||
{
|
|
||||||
$sHash = serialize($aSearch);
|
|
||||||
if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]);
|
|
||||||
else $aSearchHash[$sHash] = 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
$aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Re-group the searches by their score, junk anything over 20 as just not worth trying
|
|
||||||
$aGroupedSearches = array();
|
|
||||||
foreach($aNewPhraseSearches as $aSearch)
|
|
||||||
{
|
|
||||||
if ($aSearch['iSearchRank'] < $this->iMaxRank)
|
|
||||||
{
|
|
||||||
if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array();
|
|
||||||
$aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
$aGroupedSearches = $aReverseGroupedSearches;
|
||||||
ksort($aGroupedSearches);
|
ksort($aGroupedSearches);
|
||||||
|
|
||||||
$iSearchCount = 0;
|
|
||||||
$aSearches = array();
|
|
||||||
foreach($aGroupedSearches as $iScore => $aNewSearches)
|
|
||||||
{
|
|
||||||
$iSearchCount += sizeof($aNewSearches);
|
|
||||||
$aSearches = array_merge($aSearches, $aNewSearches);
|
|
||||||
if ($iSearchCount > 50) break;
|
|
||||||
}
|
|
||||||
|
|
||||||
//if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -1054,29 +1089,6 @@
|
|||||||
|
|
||||||
if (CONST_Debug) var_Dump($aGroupedSearches);
|
if (CONST_Debug) var_Dump($aGroupedSearches);
|
||||||
|
|
||||||
if ($this->bReverseInPlan)
|
|
||||||
{
|
|
||||||
$aCopyGroupedSearches = $aGroupedSearches;
|
|
||||||
foreach($aCopyGroupedSearches as $iGroup => $aSearches)
|
|
||||||
{
|
|
||||||
foreach($aSearches as $iSearch => $aSearch)
|
|
||||||
{
|
|
||||||
if (sizeof($aSearch['aAddress']))
|
|
||||||
{
|
|
||||||
$iReverseItem = array_pop($aSearch['aAddress']);
|
|
||||||
if (isset($aPossibleMainWordIDs[$iReverseItem]))
|
|
||||||
{
|
|
||||||
$aSearch['aAddress'] = array_merge($aSearch['aAddress'], $aSearch['aName']);
|
|
||||||
$aSearch['aName'] = array($iReverseItem);
|
|
||||||
$aGroupedSearches[$iGroup][] = $aSearch;
|
|
||||||
}
|
|
||||||
//$aReverseSearch['aName'][$iReverseItem] = $iReverseItem;
|
|
||||||
//$aGroupedSearches[$iGroup][] = $aReverseSearch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (CONST_Search_TryDroppedAddressTerms && sizeof($aStructuredQuery) > 0)
|
if (CONST_Search_TryDroppedAddressTerms && sizeof($aStructuredQuery) > 0)
|
||||||
{
|
{
|
||||||
$aCopyGroupedSearches = $aGroupedSearches;
|
$aCopyGroupedSearches = $aGroupedSearches;
|
||||||
|
|||||||
20
lib/lib.php
20
lib/lib.php
@@ -193,6 +193,26 @@
|
|||||||
return $aResult;
|
return $aResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function getInverseWordSets($aWords, $iDepth)
|
||||||
|
{
|
||||||
|
$aResult = array(array(join(' ',$aWords)));
|
||||||
|
$sFirstToken = '';
|
||||||
|
if ($iDepth < 8)
|
||||||
|
{
|
||||||
|
while(sizeof($aWords) > 1)
|
||||||
|
{
|
||||||
|
$sWord = array_pop($aWords);
|
||||||
|
$sFirstToken = $sWord.($sFirstToken?' ':'').$sFirstToken;
|
||||||
|
$aRest = getInverseWordSets($aWords, $iDepth+1);
|
||||||
|
foreach($aRest as $aSet)
|
||||||
|
{
|
||||||
|
$aResult[] = array_merge(array($sFirstToken),$aSet);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return $aResult;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
function getTokensFromSets($aSets)
|
function getTokensFromSets($aSets)
|
||||||
{
|
{
|
||||||
|
|||||||
Reference in New Issue
Block a user