deduplicate search term graph slightly earlier

This commit is contained in:
Brian Quinion
2011-03-07 16:16:05 +00:00
parent c20103a461
commit 95d50f35d0

View File

@@ -84,7 +84,7 @@
$hLog = logStart($oDB, 'search', $sQuery, $aLangPrefOrder); $hLog = logStart($oDB, 'search', $sQuery, $aLangPrefOrder);
// Hack to make it handle "new york, ny" (and variants) correctly // Hack to make it handle "new york, ny" (and variants) correctly
$sQuery = str_ireplace(array('New York, ny','new york, new york', 'New York ny','new york new york'), 'new york city, new york', $sQuery); $sQuery = str_ireplace(array('New York, ny','new york, new york', 'New York ny','new york new york'), 'new york city, ny', $sQuery);
// If we have a view box create the SQL // If we have a view box create the SQL
// Small is the actual view box, Large is double (on each axis) that // Small is the actual view box, Large is double (on each axis) that
@@ -452,7 +452,7 @@
{ {
if (sizeof($aSearch['aName'])) if (sizeof($aSearch['aName']))
{ {
if (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4) if (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false)
{ {
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
} }
@@ -470,14 +470,14 @@
} }
} }
} }
if (isset($aValidTokens[$sToken]) && strlen($sToken) >= 4) if (isset($aValidTokens[$sToken]))
{ {
// Allow searching for a word - but at extra cost // Allow searching for a word - but at extra cost
foreach($aValidTokens[$sToken] as $aSearchTerm) foreach($aValidTokens[$sToken] as $aSearchTerm)
{ {
//var_Dump('<hr>',$aSearch['aName']); //var_Dump('<hr>',$aSearch['aName']);
if (sizeof($aCurrentSearch['aName'])) if (sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
{ {
$aSearch = $aCurrentSearch; $aSearch = $aCurrentSearch;
$aSearch['iSearchRank'] += 1; $aSearch['iSearchRank'] += 1;
@@ -488,7 +488,8 @@
if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
{ {
$aSearch = $aCurrentSearch; $aSearch = $aCurrentSearch;
$aSearch['iSearchRank'] += 4; $aSearch['iSearchRank'] += 2;
if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
$aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
$aSearch['iNamePhrase'] = $iPhrase; $aSearch['iNamePhrase'] = $iPhrase;
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
@@ -511,6 +512,21 @@
$aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches);
usort($aNewPhraseSearches, 'bySearchRank'); usort($aNewPhraseSearches, 'bySearchRank');
$aSearchHash = array();
foreach($aNewPhraseSearches as $iSearch => $aSearch)
{
$sHash = serialize($aSearch);
if (isset($aSearchHash[$sHash]))
{
unset($aNewPhraseSearches[$iSearch]);
}
else
{
$aSearchHash[$sHash] = 1;
}
}
$aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50); $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50);
} }