mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
@@ -71,6 +71,21 @@ class Debug
|
||||
echo "</table>\n";
|
||||
}
|
||||
|
||||
public static function printGroupedSearch($aSearches, $aWordsIDs)
|
||||
{
|
||||
echo '<table border="1">';
|
||||
echo '<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th>';
|
||||
echo '<th>Address Tokens</th><th>Address Not</th>';
|
||||
echo '<th>country</th><th>operator</th>';
|
||||
echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
|
||||
foreach ($aSearches as $iRank => $aRankedSet) {
|
||||
foreach ($aRankedSet as $aRow) {
|
||||
$aRow->dumpAsHtmlTableRow($aWordsIDs);
|
||||
}
|
||||
}
|
||||
echo '</table>';
|
||||
}
|
||||
|
||||
public static function printGroupTable($sHeading, $aVar)
|
||||
{
|
||||
echo '<b>'.$sHeading.":</b>\n";
|
||||
|
||||
138
lib/Geocode.php
138
lib/Geocode.php
@@ -7,6 +7,7 @@ require_once(CONST_BasePath.'/lib/Phrase.php');
|
||||
require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
|
||||
require_once(CONST_BasePath.'/lib/SearchDescription.php');
|
||||
require_once(CONST_BasePath.'/lib/SearchContext.php');
|
||||
require_once(CONST_BasePath.'/lib/TokenList.php');
|
||||
|
||||
class Geocode
|
||||
{
|
||||
@@ -332,10 +333,10 @@ class Geocode
|
||||
return false;
|
||||
}
|
||||
|
||||
public function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bIsStructured)
|
||||
public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured)
|
||||
{
|
||||
/*
|
||||
Calculate all searches using aValidTokens i.e.
|
||||
Calculate all searches using oValidTokens i.e.
|
||||
'Wodsworth Road, Sheffield' =>
|
||||
|
||||
Phrase Wordset
|
||||
@@ -365,38 +366,37 @@ class Geocode
|
||||
//var_dump($oCurrentSearch);
|
||||
//echo "</i>";
|
||||
|
||||
// If the token is valid
|
||||
if (isset($aValidTokens[' '.$sToken])) {
|
||||
foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) {
|
||||
$aNewSearches = $oCurrentSearch->extendWithFullTerm(
|
||||
$aSearchTerm,
|
||||
isset($aValidTokens[$sToken])
|
||||
&& strpos($sToken, ' ') === false,
|
||||
$sPhraseType,
|
||||
$iToken == 0 && $iPhrase == 0,
|
||||
$iPhrase == 0,
|
||||
$iToken + 1 == count($aWordset)
|
||||
&& $iPhrase + 1 == count($aPhrases)
|
||||
);
|
||||
// Tokens with full name matches.
|
||||
foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
|
||||
$aNewSearches = $oCurrentSearch->extendWithFullTerm(
|
||||
$oSearchTerm,
|
||||
$oValidTokens->contains($sToken)
|
||||
&& strpos($sToken, ' ') === false,
|
||||
$sPhraseType,
|
||||
$iToken == 0 && $iPhrase == 0,
|
||||
$iPhrase == 0,
|
||||
$iToken + 1 == count($aWordset)
|
||||
&& $iPhrase + 1 == count($aPhrases)
|
||||
);
|
||||
|
||||
foreach ($aNewSearches as $oSearch) {
|
||||
if ($oSearch->getRank() < $this->iMaxRank) {
|
||||
$aNewWordsetSearches[] = $oSearch;
|
||||
}
|
||||
foreach ($aNewSearches as $oSearch) {
|
||||
if ($oSearch->getRank() < $this->iMaxRank) {
|
||||
$aNewWordsetSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
}
|
||||
// Look for partial matches.
|
||||
// Note that there is no point in adding country terms here
|
||||
// because country is omitted in the address.
|
||||
if (isset($aValidTokens[$sToken]) && $sPhraseType != 'country') {
|
||||
if ($sPhraseType != 'country') {
|
||||
// Allow searching for a word - but at extra cost
|
||||
foreach ($aValidTokens[$sToken] as $aSearchTerm) {
|
||||
foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
|
||||
$aNewSearches = $oCurrentSearch->extendWithPartialTerm(
|
||||
$aSearchTerm,
|
||||
$sToken,
|
||||
$oSearchTerm,
|
||||
$bIsStructured,
|
||||
$iPhrase,
|
||||
isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array()
|
||||
$oValidTokens->get(' '.$sToken)
|
||||
);
|
||||
|
||||
foreach ($aNewSearches as $oSearch) {
|
||||
@@ -645,73 +645,51 @@ class Geocode
|
||||
Debug::printDebugTable('Phrases', $aPhrases);
|
||||
Debug::printVar('Tokens', $aTokens);
|
||||
|
||||
$oValidTokens = new TokenList();
|
||||
|
||||
if (!empty($aTokens)) {
|
||||
// Check which tokens we have, get the ID numbers
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
|
||||
$sSQL .= ' FROM word ';
|
||||
$sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')';
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
$aValidTokens = array();
|
||||
$aDatabaseWords = chksql(
|
||||
$this->oDB->getAll($sSQL),
|
||||
'Could not get word tokens.'
|
||||
$oValidTokens->addTokensFromDB(
|
||||
$this->oDB,
|
||||
$aTokens,
|
||||
$this->aCountryCodes,
|
||||
$sNormQuery,
|
||||
$this->oNormalizer
|
||||
);
|
||||
foreach ($aDatabaseWords as $aToken) {
|
||||
// Filter country tokens that do not match restricted countries.
|
||||
if ($this->aCountryCodes
|
||||
&& $aToken['country_code']
|
||||
&& !in_array($aToken['country_code'], $this->aCountryCodes)
|
||||
) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// Special terms need to appear in their normalized form.
|
||||
if ($aToken['word'] && $aToken['class']) {
|
||||
$sNormWord = $this->normTerm($aToken['word']);
|
||||
if (strpos($sNormQuery, $sNormWord) === false) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (isset($aValidTokens[$aToken['word_token']])) {
|
||||
$aValidTokens[$aToken['word_token']][] = $aToken;
|
||||
} else {
|
||||
$aValidTokens[$aToken['word_token']] = array($aToken);
|
||||
}
|
||||
}
|
||||
|
||||
// US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
|
||||
// Try more interpretations for Tokens that could not be matched.
|
||||
foreach ($aTokens as $sToken) {
|
||||
if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
|
||||
if (isset($aValidTokens[$aData[1]])) {
|
||||
foreach ($aValidTokens[$aData[1]] as $aToken) {
|
||||
if (!$aToken['class']) {
|
||||
if (isset($aValidTokens[$sToken])) {
|
||||
$aValidTokens[$sToken][] = $aToken;
|
||||
} else {
|
||||
$aValidTokens[$sToken] = array($aToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
|
||||
if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
|
||||
// US ZIP+4 codes - merge in the 5-digit ZIP code
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\Postcode(null, $aData[1], 'us')
|
||||
);
|
||||
} elseif (preg_match('/^ [0-9]+$/', $sToken)) {
|
||||
// Unknown single word token with a number.
|
||||
// Assume it is a house number.
|
||||
$oValidTokens->addToken(
|
||||
$sToken,
|
||||
new Token\HouseNumber(null, trim($sToken))
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
foreach ($aTokens as $sToken) {
|
||||
// Unknown single word token with a number - assume it is a house number
|
||||
if (!isset($aValidTokens[' '.$sToken]) && strpos($sToken, ' ') === false && preg_match('/^[0-9]+$/', $sToken)) {
|
||||
$aValidTokens[' '.$sToken] = array(array('class' => 'place', 'type' => 'house', 'word_token' => ' '.$sToken));
|
||||
}
|
||||
}
|
||||
Debug::printGroupTable('Valid Tokens', $aValidTokens);
|
||||
|
||||
// Any words that have failed completely?
|
||||
// TODO: suggestions
|
||||
|
||||
Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
|
||||
|
||||
Debug::newSection('Search candidates');
|
||||
|
||||
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bStructuredPhrases);
|
||||
$aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
|
||||
|
||||
if ($this->bReverseInPlan) {
|
||||
// Reverse phrase array and also reverse the order of the wordsets in
|
||||
@@ -722,7 +700,7 @@ class Geocode
|
||||
if (count($aPhrases) > 1) {
|
||||
$aPhrases[count($aPhrases)-1]->invertWordSets();
|
||||
}
|
||||
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, false);
|
||||
$aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false);
|
||||
|
||||
foreach ($aGroupedSearches as $aSearches) {
|
||||
foreach ($aSearches as $aSearch) {
|
||||
@@ -762,7 +740,10 @@ class Geocode
|
||||
}
|
||||
}
|
||||
|
||||
if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
|
||||
Debug::printGroupedSearch(
|
||||
$aGroupedSearches,
|
||||
$oValidTokens->debugTokenByWordIdList()
|
||||
);
|
||||
|
||||
// Start the search process
|
||||
$iGroupLoop = 0;
|
||||
@@ -772,10 +753,11 @@ class Geocode
|
||||
foreach ($aSearches as $oSearch) {
|
||||
$iQueryLoop++;
|
||||
|
||||
if (CONST_Debug) {
|
||||
echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
|
||||
_debugDumpGroupedSearches(array($iGroupedRank => array($oSearch)), $aValidTokens);
|
||||
}
|
||||
Debug::newSection("Search Loop, group $iGroupLoop, loop $iQueryLoop");
|
||||
Debug::printGroupedSearch(
|
||||
array($iGroupedRank => array($oSearch)),
|
||||
$oValidTokens->debugTokenByWordIdList()
|
||||
);
|
||||
|
||||
$aResults += $oSearch->query(
|
||||
$this->oDB,
|
||||
|
||||
@@ -166,30 +166,29 @@ class SearchDescription
|
||||
/**
|
||||
* Derive new searches by adding a full term to the existing search.
|
||||
*
|
||||
* @param mixed[] $aSearchTerm Description of the token.
|
||||
* @param bool $bHasPartial True if there are also tokens of partial terms
|
||||
* with the same name.
|
||||
* @param string $sPhraseType Type of phrase the token is contained in.
|
||||
* @param bool $bFirstToken True if the token is at the beginning of the
|
||||
* query.
|
||||
* @param bool $bFirstPhrase True if the token is in the first phrase of
|
||||
* the query.
|
||||
* @param bool $bLastToken True if the token is at the end of the query.
|
||||
* @param object $oSearchTerm Description of the token.
|
||||
* @param bool $bHasPartial True if there are also tokens of partial terms
|
||||
* with the same name.
|
||||
* @param string $sPhraseType Type of phrase the token is contained in.
|
||||
* @param bool $bFirstToken True if the token is at the beginning of the
|
||||
* query.
|
||||
* @param bool $bFirstPhrase True if the token is in the first phrase of
|
||||
* the query.
|
||||
* @param bool $bLastToken True if the token is at the end of the query.
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendWithFullTerm($aSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
|
||||
public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
|
||||
{
|
||||
$aNewSearches = array();
|
||||
|
||||
if (($sPhraseType == '' || $sPhraseType == 'country')
|
||||
&& !empty($aSearchTerm['country_code'])
|
||||
&& $aSearchTerm['country_code'] != '0'
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Country')
|
||||
) {
|
||||
if (!$this->sCountryCode) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->sCountryCode = $aSearchTerm['country_code'];
|
||||
$oSearch->sCountryCode = $oSearchTerm->sCountryCode;
|
||||
// Country is almost always at the end of the string
|
||||
// - increase score for finding it anywhere else (optimisation)
|
||||
if (!$bLastToken) {
|
||||
@@ -198,15 +197,12 @@ class SearchDescription
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
|
||||
&& $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode'
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Postcode')
|
||||
) {
|
||||
// We need to try the case where the postal code is the primary element
|
||||
// (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode)
|
||||
// so try both.
|
||||
if (!$this->sPostcode
|
||||
&& $aSearchTerm['word']
|
||||
&& pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word']
|
||||
) {
|
||||
if (!$this->sPostcode) {
|
||||
// If we have structured search or this is the first term,
|
||||
// make the postcode the primary search element.
|
||||
if ($this->iOperator == Operator::NONE
|
||||
@@ -217,7 +213,7 @@ class SearchDescription
|
||||
$oSearch->iOperator = Operator::POSTCODE;
|
||||
$oSearch->aAddress = array_merge($this->aAddress, $this->aName);
|
||||
$oSearch->aName =
|
||||
array($aSearchTerm['word_id'] => $aSearchTerm['word']);
|
||||
array($oSearchTerm->iId => $oSearchTerm->sPostcode);
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
|
||||
@@ -228,23 +224,23 @@ class SearchDescription
|
||||
) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->sPostcode = $aSearchTerm['word'];
|
||||
$oSearch->sPostcode = $oSearchTerm->sPostcode;
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
} elseif (($sPhraseType == '' || $sPhraseType == 'street')
|
||||
&& $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house'
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
|
||||
) {
|
||||
if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->sHouseNumber = trim($aSearchTerm['word_token']);
|
||||
$oSearch->sHouseNumber = $oSearchTerm->sToken;
|
||||
// sanity check: if the housenumber is not mainly made
|
||||
// up of numbers, add a penalty
|
||||
if (preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
if (!isset($aSearchTerm['word_id'])) {
|
||||
if (empty($oSearchTerm->iId)) {
|
||||
$oSearch->iSearchRank++;
|
||||
}
|
||||
// also must not appear in the middle of the address
|
||||
@@ -256,27 +252,34 @@ class SearchDescription
|
||||
}
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif ($sPhraseType == '' && $aSearchTerm['class']) {
|
||||
} elseif ($sPhraseType == ''
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
|
||||
) {
|
||||
if ($this->iOperator == Operator::NONE) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
|
||||
$iOp = Operator::NEAR; // near == in for the moment
|
||||
if ($aSearchTerm['operator'] == '') {
|
||||
$iOp = $oSearchTerm->iOperator;
|
||||
if ($iOp == Operator::NONE) {
|
||||
if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
|
||||
$iOp = Operator::NAME;
|
||||
} else {
|
||||
$iOp = Operator::NEAR;
|
||||
}
|
||||
$oSearch->iSearchRank += 2;
|
||||
}
|
||||
|
||||
$oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']);
|
||||
$oSearch->setPoiSearch(
|
||||
$iOp,
|
||||
$oSearchTerm->sClass,
|
||||
$oSearchTerm->sType
|
||||
);
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
} elseif (isset($aSearchTerm['word_id'])
|
||||
&& $aSearchTerm['word_id']
|
||||
&& $sPhraseType != 'country'
|
||||
} elseif ($sPhraseType != 'country'
|
||||
&& is_a($oSearchTerm, '\Nominatim\Token\Word')
|
||||
) {
|
||||
$iWordID = $aSearchTerm['word_id'];
|
||||
$iWordID = $oSearchTerm->iId;
|
||||
// Full words can only be a name if they appear at the beginning
|
||||
// of the phrase. In structured search the name must forcably in
|
||||
// the first phrase. In unstructured search it may be in a later
|
||||
@@ -296,7 +299,7 @@ class SearchDescription
|
||||
$oSearch->aName = array($iWordID => $iWordID);
|
||||
if (CONST_Search_NameOnlySearchFrequencyThreshold) {
|
||||
$oSearch->bRareName =
|
||||
$aSearchTerm['search_name_count'] + 1
|
||||
$oSearchTerm->iSearchNameCount
|
||||
< CONST_Search_NameOnlySearchFrequencyThreshold;
|
||||
}
|
||||
$aNewSearches[] = $oSearch;
|
||||
@@ -309,7 +312,8 @@ class SearchDescription
|
||||
/**
|
||||
* Derive new searches by adding a partial term to the existing search.
|
||||
*
|
||||
* @param mixed[] $aSearchTerm Description of the token.
|
||||
* @param string $sToken Term for the token.
|
||||
* @param object $oSearchTerm Description of the token.
|
||||
* @param bool $bStructuredPhrases True if the search is structured.
|
||||
* @param integer $iPhrase Number of the phrase the token is in.
|
||||
* @param array[] $aFullTokens List of full term tokens with the
|
||||
@@ -317,21 +321,21 @@ class SearchDescription
|
||||
*
|
||||
* @return SearchDescription[] List of derived search descriptions.
|
||||
*/
|
||||
public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
|
||||
public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
|
||||
{
|
||||
// Only allow name terms.
|
||||
if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) {
|
||||
if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) {
|
||||
return array();
|
||||
}
|
||||
|
||||
$aNewSearches = array();
|
||||
$iWordID = $aSearchTerm['word_id'];
|
||||
$iWordID = $oSearchTerm->iId;
|
||||
|
||||
if ((!$bStructuredPhrases || $iPhrase > 0)
|
||||
&& (!empty($this->aName))
|
||||
&& strpos($aSearchTerm['word_token'], ' ') === false
|
||||
&& strpos($sToken, ' ') === false
|
||||
) {
|
||||
if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
|
||||
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank += 2;
|
||||
$oSearch->aAddress[$iWordID] = $iWordID;
|
||||
@@ -340,7 +344,7 @@ class SearchDescription
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
|
||||
if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
|
||||
if (preg_match('#^[0-9]+$#', $sToken)) {
|
||||
$oSearch->iSearchRank += 2;
|
||||
}
|
||||
if (!empty($aFullTokens)) {
|
||||
@@ -349,14 +353,12 @@ class SearchDescription
|
||||
$aNewSearches[] = $oSearch;
|
||||
|
||||
// revert to the token version?
|
||||
foreach ($aFullTokens as $aSearchTermToken) {
|
||||
if (empty($aSearchTermToken['country_code'])
|
||||
&& empty($aSearchTermToken['lat'])
|
||||
&& empty($aSearchTermToken['class'])
|
||||
) {
|
||||
foreach ($aFullTokens as $oSearchTermToken) {
|
||||
if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
|
||||
$oSearch = clone $this;
|
||||
$oSearch->iSearchRank++;
|
||||
$oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
|
||||
$oSearch->aAddress[$oSearchTermToken->iId]
|
||||
= $oSearchTermToken->iId;
|
||||
$aNewSearches[] = $oSearch;
|
||||
}
|
||||
}
|
||||
@@ -371,13 +373,15 @@ class SearchDescription
|
||||
if (empty($this->aName)) {
|
||||
$oSearch->iSearchRank += 1;
|
||||
}
|
||||
if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
|
||||
if (preg_match('#^[0-9]+$#', $sToken)) {
|
||||
$oSearch->iSearchRank += 2;
|
||||
}
|
||||
if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
|
||||
if (empty($this->aName) && CONST_Search_NameOnlySearchFrequencyThreshold) {
|
||||
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
|
||||
if (empty($this->aName)
|
||||
&& CONST_Search_NameOnlySearchFrequencyThreshold
|
||||
) {
|
||||
$oSearch->bRareName =
|
||||
$aSearchTerm['search_name_count'] + 1
|
||||
$oSearchTerm->iSearchNameCount
|
||||
< CONST_Search_NameOnlySearchFrequencyThreshold;
|
||||
} else {
|
||||
$oSearch->bRareName = false;
|
||||
|
||||
29
lib/TokenCountry.php
Normal file
29
lib/TokenCountry.php
Normal file
@@ -0,0 +1,29 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Token;
|
||||
|
||||
/**
|
||||
* A country token.
|
||||
*/
|
||||
class Country
|
||||
{
|
||||
/// Database word id, if available.
|
||||
public $iId;
|
||||
/// Two-letter country code (lower-cased).
|
||||
public $sCountryCode;
|
||||
|
||||
public function __construct($iId, $sCountryCode)
|
||||
{
|
||||
$this->iId = $iId;
|
||||
$this->sCountryCode = $sCountryCode;
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'country',
|
||||
'Info' => $this->sCountryCode
|
||||
);
|
||||
}
|
||||
}
|
||||
29
lib/TokenHousenumber.php
Normal file
29
lib/TokenHousenumber.php
Normal file
@@ -0,0 +1,29 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Token;
|
||||
|
||||
/**
|
||||
* A house number token.
|
||||
*/
|
||||
class HouseNumber
|
||||
{
|
||||
/// Database word id, if available.
|
||||
public $iId;
|
||||
/// Normalized house number.
|
||||
public $sToken;
|
||||
|
||||
public function __construct($iId, $sToken)
|
||||
{
|
||||
$this->iId = $iId;
|
||||
$this->sToken = $sToken;
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'house number',
|
||||
'Info' => array('nr' => $this->sToken)
|
||||
);
|
||||
}
|
||||
}
|
||||
177
lib/TokenList.php
Normal file
177
lib/TokenList.php
Normal file
@@ -0,0 +1,177 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
require_once(CONST_BasePath.'/lib/TokenCountry.php');
|
||||
require_once(CONST_BasePath.'/lib/TokenHousenumber.php');
|
||||
require_once(CONST_BasePath.'/lib/TokenPostcode.php');
|
||||
require_once(CONST_BasePath.'/lib/TokenSpecialTerm.php');
|
||||
require_once(CONST_BasePath.'/lib/TokenWord.php');
|
||||
require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
|
||||
|
||||
/**
|
||||
* Saves information about the tokens that appear in a search query.
|
||||
*
|
||||
* Tokens are sorted by their normalized form, the token word. There are different
|
||||
* kinds of tokens, represented by different Token* classes. Note that
|
||||
* tokens do not have a common base class. All tokens need to have a field
|
||||
* with the word id that points to an entry in the `word` database table
|
||||
* but otherwise the information saved about a token can be very different.
|
||||
*
|
||||
* There are two different kinds of token words: full words and partial terms.
|
||||
*
|
||||
* Full words start with a space. They represent a complete name of a place.
|
||||
* All special tokens are normally full words.
|
||||
*
|
||||
* Partial terms have no space at the beginning. They may represent a part of
|
||||
* a name of a place (e.g. in the name 'World Trade Center' a partial term
|
||||
* would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
|
||||
*/
|
||||
class TokenList
|
||||
{
|
||||
// List of list of tokens indexed by their word_token.
|
||||
private $aTokens = array();
|
||||
|
||||
/**
|
||||
* Check if there are tokens for the given token word.
|
||||
*
|
||||
* @param string $sWord Token word to look for.
|
||||
*
|
||||
* @return bool True if there is one or more token for the token word.
|
||||
*/
|
||||
public function contains($sWord)
|
||||
{
|
||||
return isset($this->aTokens[$sWord]);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the list of tokens for the given token word.
|
||||
*
|
||||
* @param string $sWord Token word to look for.
|
||||
*
|
||||
* @return object[] Array of tokens for the given token word or an
|
||||
* empty array if no tokens could be found.
|
||||
*/
|
||||
public function get($sWord)
|
||||
{
|
||||
return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array();
|
||||
}
|
||||
|
||||
/**
|
||||
* Add token information from the word table in the database.
|
||||
*
|
||||
* @param object $oDB Database connection.
|
||||
* @param string[] $aTokens List of tokens to look up in the database.
|
||||
* @param string[] $aCountryCodes List of country restrictions.
|
||||
* @param string $sNormQuery Normalized query string.
|
||||
* @param object $oNormalizer Normalizer function to use on tokens.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
|
||||
{
|
||||
// Check which tokens we have, get the ID numbers
|
||||
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
|
||||
$sSQL .= ' operator, coalesce(search_name_count, 0) as count';
|
||||
$sSQL .= ' FROM word WHERE word_token in (';
|
||||
$sSQL .= join(',', array_map('getDBQuoted', $aTokens)).')';
|
||||
|
||||
Debug::printSQL($sSQL);
|
||||
|
||||
$aDBWords = chksql($oDB->getAll($sSQL), 'Could not get word tokens.');
|
||||
|
||||
foreach ($aDBWords as $aWord) {
|
||||
$oToken = null;
|
||||
$iId = (int) $aWord['word_id'];
|
||||
|
||||
if ($aWord['class']) {
|
||||
// Special terms need to appear in their normalized form.
|
||||
if ($aWord['word']) {
|
||||
$sNormWord = $aWord['word'];
|
||||
if ($oNormalizer != null) {
|
||||
$sNormWord = $oNormalizer->transliterate($aWord['word']);
|
||||
}
|
||||
if (strpos($sNormQuery, $sNormWord) === false) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
|
||||
$oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
|
||||
} elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
|
||||
if ($aWord['word']
|
||||
&& pg_escape_string($aWord['word']) == $aWord['word']
|
||||
) {
|
||||
$oToken = new Token\Postcode(
|
||||
$iId,
|
||||
$aWord['word'],
|
||||
$aWord['country_code']
|
||||
);
|
||||
}
|
||||
} else {
|
||||
// near and in operator the same at the moment
|
||||
$oToken = new Token\SpecialTerm(
|
||||
$iId,
|
||||
$aWord['class'],
|
||||
$aWord['type'],
|
||||
$aWord['operator'] ? Operator::NONE : Operator::NEAR
|
||||
);
|
||||
}
|
||||
} elseif ($aWord['country_code']) {
|
||||
// Filter country tokens that do not match restricted countries.
|
||||
if (!$aCountryCodes
|
||||
|| in_array($aWord['country_code'], $aCountryCodes)
|
||||
) {
|
||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||
}
|
||||
} else {
|
||||
$oToken = new Token\Word(
|
||||
$iId,
|
||||
$aWord['word'][0] != ' ',
|
||||
(int) $aWord['count']
|
||||
);
|
||||
}
|
||||
|
||||
if ($oToken) {
|
||||
$this->addToken($aWord['word_token'], $oToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Add a new token for the given word.
|
||||
*
|
||||
* @param string $sWord Word the token describes.
|
||||
* @param object $oToken Token object to add.
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function addToken($sWord, $oToken)
|
||||
{
|
||||
if (isset($this->aTokens[$sWord])) {
|
||||
$this->aTokens[$sWord][] = $oToken;
|
||||
} else {
|
||||
$this->aTokens[$sWord] = array($oToken);
|
||||
}
|
||||
}
|
||||
|
||||
public function debugTokenByWordIdList()
|
||||
{
|
||||
$aWordsIDs = array();
|
||||
foreach ($this->aTokens as $sToken => $aWords) {
|
||||
foreach ($aWords as $aToken) {
|
||||
if ($aToken->iId !== null) {
|
||||
$aWordsIDs[$aToken->iId] =
|
||||
'#'.$sToken.'('.$aToken->iId.')#';
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return $aWordsIDs;
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return $this->aTokens;
|
||||
}
|
||||
}
|
||||
32
lib/TokenPostcode.php
Normal file
32
lib/TokenPostcode.php
Normal file
@@ -0,0 +1,32 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Token;
|
||||
|
||||
/**
|
||||
* A postcode token.
|
||||
*/
|
||||
class Postcode
|
||||
{
|
||||
/// Database word id, if available.
|
||||
public $iId;
|
||||
/// Full nomralized postcode (upper cased).
|
||||
public $sPostcode;
|
||||
// Optional country code the postcode belongs to (currently unused).
|
||||
public $sCountryCode;
|
||||
|
||||
public function __construct($iId, $sPostcode, $sCountryCode = '')
|
||||
{
|
||||
$this->iId = $iId;
|
||||
$this->sPostcode = $sPostcode;
|
||||
$this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'postcode',
|
||||
'Info' => $this->sPostcode.'('.$this->sCountryCode.')'
|
||||
);
|
||||
}
|
||||
}
|
||||
41
lib/TokenSpecialTerm.php
Normal file
41
lib/TokenSpecialTerm.php
Normal file
@@ -0,0 +1,41 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Token;
|
||||
|
||||
require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
|
||||
|
||||
/**
|
||||
* A word token describing a place type.
|
||||
*/
|
||||
class SpecialTerm
|
||||
{
|
||||
/// Database word id, if applicable.
|
||||
public $iId;
|
||||
/// Class (or OSM tag key) of the place to look for.
|
||||
public $sClass;
|
||||
/// Type (or OSM tag value) of the place to look for.
|
||||
public $sType;
|
||||
/// Relationship of the operator to the object (see Operator class).
|
||||
public $iOperator;
|
||||
|
||||
public function __construct($iID, $sClass, $sType, $iOperator)
|
||||
{
|
||||
$this->iId = $iID;
|
||||
$this->sClass = $sClass;
|
||||
$this->sType = $sType;
|
||||
$this->iOperator = $iOperator;
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'special term',
|
||||
'Info' => array(
|
||||
'class' => $this->sClass,
|
||||
'type' => $this->sType,
|
||||
'operator' => Operator::toString($this->iOperator)
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
35
lib/TokenWord.php
Normal file
35
lib/TokenWord.php
Normal file
@@ -0,0 +1,35 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Token;
|
||||
|
||||
/**
|
||||
* A standard word token.
|
||||
*/
|
||||
class Word
|
||||
{
|
||||
/// Database word id, if applicable.
|
||||
public $iId;
|
||||
/// If true, the word may represent only part of a place name.
|
||||
public $bPartial;
|
||||
/// Number of appearances in the database.
|
||||
public $iSearchNameCount;
|
||||
|
||||
public function __construct($iId, $bPartial, $iSearchNameCount)
|
||||
{
|
||||
$this->iId = $iId;
|
||||
$this->bPartial = $bPartial;
|
||||
$this->iSearchNameCount = $iSearchNameCount;
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'word',
|
||||
'Info' => array(
|
||||
'partial' => $this->bPartial,
|
||||
'count' => $this->iSearchNameCount
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
26
lib/lib.php
26
lib/lib.php
@@ -426,32 +426,6 @@ function javascript_renderData($xVal, $iOptions = 0)
|
||||
}
|
||||
|
||||
|
||||
function _debugDumpGroupedSearches($aData, $aTokens)
|
||||
{
|
||||
$aWordsIDs = array();
|
||||
if ($aTokens) {
|
||||
foreach ($aTokens as $sToken => $aWords) {
|
||||
if ($aWords) {
|
||||
foreach ($aWords as $aToken) {
|
||||
$aWordsIDs[$aToken['word_id']] =
|
||||
'#'.$sToken.'('.$aToken['word_id'].')#';
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
echo '<table border="1">';
|
||||
echo '<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th>';
|
||||
echo '<th>Address Tokens</th><th>Address Not</th><th>country</th><th>operator</th>';
|
||||
echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
|
||||
foreach ($aData as $iRank => $aRankedSet) {
|
||||
foreach ($aRankedSet as $aRow) {
|
||||
$aRow->dumpAsHtmlTableRow($aWordsIDs);
|
||||
}
|
||||
}
|
||||
echo '</table>';
|
||||
}
|
||||
|
||||
|
||||
function getAddressDetails(&$oDB, $sLanguagePrefArraySQL, $iPlaceID, $sCountryCode = false, $housenumber = -1, $bRaw = false)
|
||||
{
|
||||
$sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID, $housenumber)";
|
||||
|
||||
@@ -321,12 +321,15 @@ def send_api_query(endpoint, params, fmt, context):
|
||||
|
||||
(outp, err) = proc.communicate()
|
||||
outp = outp.decode('utf-8')
|
||||
err = err.decode("utf-8")
|
||||
|
||||
logger.debug("Result: \n===============================\n"
|
||||
+ outp + "\n===============================\n")
|
||||
|
||||
assert_equals(0, proc.returncode,
|
||||
"%s failed with message: %s\noutput: %s" % (env['SCRIPT_FILENAME'], err, outp))
|
||||
"%s failed with message: %s" % (
|
||||
os.path.basename(env['SCRIPT_FILENAME']),
|
||||
err))
|
||||
|
||||
assert_equals(0, len(err), "Unexpected PHP error: %s" % (err))
|
||||
|
||||
|
||||
Reference in New Issue
Block a user