mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-06 18:14:16 +00:00
avoid dropping tokens completely just because they are expensive. Use ' ' token in preference to just dropping
This commit is contained in:
20
lib/lib.php
20
lib/lib.php
@@ -646,7 +646,7 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
echo "<table border=\"1\">";
|
echo "<table border=\"1\">";
|
||||||
echo "<tr><th>rank</th><th>Name Tokens</th><th>Address Tokens</th><th>country</th><th>operator</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr>";
|
echo "<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th><th>Address Tokens</th><th>Address Not</th><th>country</th><th>operator</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr>";
|
||||||
foreach($aData as $iRank => $aRankedSet)
|
foreach($aData as $iRank => $aRankedSet)
|
||||||
{
|
{
|
||||||
foreach($aRankedSet as $aRow)
|
foreach($aRankedSet as $aRow)
|
||||||
@@ -663,6 +663,15 @@
|
|||||||
}
|
}
|
||||||
echo "</td>";
|
echo "</td>";
|
||||||
|
|
||||||
|
echo "<td>";
|
||||||
|
$sSep = '';
|
||||||
|
foreach($aRow['aNameNonSearch'] as $iWordID)
|
||||||
|
{
|
||||||
|
echo $sSep.'#'.$aWordsIDs[$iWordID].'#';
|
||||||
|
$sSep = ', ';
|
||||||
|
}
|
||||||
|
echo "</td>";
|
||||||
|
|
||||||
echo "<td>";
|
echo "<td>";
|
||||||
$sSep = '';
|
$sSep = '';
|
||||||
foreach($aRow['aAddress'] as $iWordID)
|
foreach($aRow['aAddress'] as $iWordID)
|
||||||
@@ -672,6 +681,15 @@
|
|||||||
}
|
}
|
||||||
echo "</td>";
|
echo "</td>";
|
||||||
|
|
||||||
|
echo "<td>";
|
||||||
|
$sSep = '';
|
||||||
|
foreach($aRow['aAddressNonSearch'] as $iWordID)
|
||||||
|
{
|
||||||
|
echo $sSep.'#'.$aWordsIDs[$iWordID].'#';
|
||||||
|
$sSep = ', ';
|
||||||
|
}
|
||||||
|
echo "</td>";
|
||||||
|
|
||||||
echo "<td>".$aRow['sCountryCode']."</td>";
|
echo "<td>".$aRow['sCountryCode']."</td>";
|
||||||
|
|
||||||
echo "<td>".$aRow['sOperator']."</td>";
|
echo "<td>".$aRow['sOperator']."</td>";
|
||||||
|
|||||||
@@ -638,10 +638,30 @@
|
|||||||
$aSearch = $aCurrentSearch;
|
$aSearch = $aCurrentSearch;
|
||||||
$aSearch['iSearchRank'] += 1;
|
$aSearch['iSearchRank'] += 1;
|
||||||
if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
|
if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
|
||||||
|
{
|
||||||
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
$aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
|
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version?
|
||||||
|
{
|
||||||
|
foreach($aValidTokens[' '.$sToken] as $aSearchTermToken)
|
||||||
|
{
|
||||||
|
if (empty($aSearchTermToken['country_code'])
|
||||||
|
&& empty($aSearchTermToken['lat'])
|
||||||
|
&& empty($aSearchTermToken['class']))
|
||||||
|
{
|
||||||
|
$aSearch = $aCurrentSearch;
|
||||||
|
$aSearch['iSearchRank'] += 1;
|
||||||
|
$aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
|
||||||
|
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
$aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
$aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
|
||||||
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
|
if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
|
||||||
@@ -900,6 +920,7 @@
|
|||||||
// TODO: filter out the pointless search terms (2 letter name tokens and less)
|
// TODO: filter out the pointless search terms (2 letter name tokens and less)
|
||||||
// they might be right - but they are just too darned expensive to run
|
// they might be right - but they are just too darned expensive to run
|
||||||
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
|
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
|
||||||
|
if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]";
|
||||||
if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress'])
|
if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress'])
|
||||||
{
|
{
|
||||||
// For infrequent name terms disable index usage for address
|
// For infrequent name terms disable index usage for address
|
||||||
@@ -907,11 +928,12 @@
|
|||||||
sizeof($aSearch['aName']) == 1 &&
|
sizeof($aSearch['aName']) == 1 &&
|
||||||
$aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
|
$aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
|
||||||
{
|
{
|
||||||
$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
|
$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
$aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
|
$aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
|
||||||
|
if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]";
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
|
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
|
||||||
|
|||||||
Reference in New Issue
Block a user