diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index ec21a0dc..eda6df54 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -355,15 +355,15 @@ class Geocode $aNewWordsetSearches = array(); foreach ($aWordsetSearches as $oCurrentSearch) { - // Tokens with full name matches. - foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithFullTerm( + foreach ($oValidTokens->get($sToken) as $oSearchTerm) { + $aNewSearches = $oCurrentSearch->extendWithSearchTerm( + $sToken, $oSearchTerm, $sPhraseType, $iToken == 0 && $iPhrase == 0, - $iPhrase == 0, $iToken + 1 == count($aWordset) - && $iPhrase + 1 == count($aPhrases) + && $iPhrase + 1 == count($aPhrases), + $iPhrase ); foreach ($aNewSearches as $oSearch) { @@ -372,27 +372,6 @@ class Geocode } } } - // Look for partial matches. - // Note that there is no point in adding country terms here - // because country is omitted in the address. - if ($sPhraseType != 'country') { - // Allow searching for a word - but at extra cost - foreach ($oValidTokens->get($sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithPartialTerm( - $sToken, - $oSearchTerm, - (bool) $sPhraseType, - $iPhrase, - $oValidTokens->get(' '.$sToken) - ); - - foreach ($aNewSearches as $oSearch) { - if ($oSearch->getRank() < $this->iMaxRank) { - $aNewWordsetSearches[] = $oSearch; - } - } - } - } } // Sort and cut usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank')); diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 6091fd61..938beb61 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -152,17 +152,17 @@ class SearchDescription /** * Derive new searches by adding a full term to the existing search. * - * @param object $oSearchTerm Description of the token. - * @param string $sPhraseType Type of phrase the token is contained in. - * @param bool $bFirstToken True if the token is at the beginning of the - * query. - * @param bool $bFirstPhrase True if the token is in the first phrase of - * the query. - * @param bool $bLastToken True if the token is at the end of the query. + * @param string $sToken Term for the token. + * @param object $oSearchTerm Description of the token. + * @param string $sPhraseType Type of phrase the token is contained in. + * @param bool $bFirstToken True if the token is at the beginning of the + * query. + * @param bool $bLastToken True if the token is at the end of the query. + * @param integer $iPhrase Number of the phrase the token is in. * * @return SearchDescription[] List of derived search descriptions. */ - public function extendWithFullTerm($oSearchTerm, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken) + public function extendWithSearchTerm($sToken, $oSearchTerm, $sPhraseType, $bFirstToken, $bLastToken, $iPhrase) { $aNewSearches = array(); @@ -295,8 +295,8 @@ class SearchDescription // of the phrase. In structured search the name must forcably in // the first phrase. In unstructured search it may be in a later // phrase when the first phrase is a house number. - if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) { - if (($sPhraseType == '' || !$bFirstPhrase) && $oSearchTerm->iTermCount > 1) { + if (!empty($this->aName) || !($iPhrase == 0 || $sPhraseType == '')) { + if (($sPhraseType == '' || $iPhrase > 0) && $oSearchTerm->iTermCount > 1) { $oSearch = clone $this; $oSearch->iNamePhrase = -1; $oSearch->iSearchRank += 1; @@ -314,6 +314,16 @@ class SearchDescription } $aNewSearches[] = $oSearch; } + } elseif ($sPhraseType != 'country' + && is_a($oSearchTerm, '\Nominatim\Token\Partial') + && strpos($sToken, ' ') === false + ) { + $aNewSearches = $this->extendWithPartialTerm( + $sToken, + $oSearchTerm, + (bool) $sPhraseType, + $iPhrase + ); } return $aNewSearches; @@ -326,20 +336,11 @@ class SearchDescription * @param object $oSearchTerm Description of the token. * @param bool $bStructuredPhrases True if the search is structured. * @param integer $iPhrase Number of the phrase the token is in. - * @param array[] $aFullTokens List of full term tokens with the - * same name. * * @return SearchDescription[] List of derived search descriptions. */ - public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens) + private function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase) { - // Only allow name terms. - if (!(is_a($oSearchTerm, '\Nominatim\Token\Word')) - || strpos($sToken, ' ') !== false - ) { - return array(); - } - $aNewSearches = array(); $iWordID = $oSearchTerm->iId; @@ -355,9 +356,6 @@ class SearchDescription $oSearch->aAddress[$iWordID] = $iWordID; } else { $oSearch->aAddressNonSearch[$iWordID] = $iWordID; - if (!empty($aFullTokens)) { - $oSearch->iSearchRank++; - } } $aNewSearches[] = $oSearch; } @@ -385,9 +383,6 @@ class SearchDescription } $oSearch->aName[$iWordID] = $iWordID; } else { - if (!empty($aFullTokens)) { - $oSearch->iSearchRank++; - } $oSearch->aNameNonSearch[$iWordID] = $iWordID; } $oSearch->iNamePhrase = $iPhrase; diff --git a/lib-php/TokenList.php b/lib-php/TokenList.php index f310306d..bc8f9c3f 100644 --- a/lib-php/TokenList.php +++ b/lib-php/TokenList.php @@ -18,15 +18,6 @@ require_once(CONST_LibDir.'/SpecialSearchOperator.php'); * tokens do not have a common base class. All tokens need to have a field * with the word id that points to an entry in the `word` database table * but otherwise the information saved about a token can be very different. - * - * There are two different kinds of token words: full words and partial terms. - * - * Full words start with a space. They represent a complete name of a place. - * All special tokens are normally full words. - * - * Partial terms have no space at the beginning. They may represent a part of - * a name of a place (e.g. in the name 'World Trade Center' a partial term - * would be 'Trade' or 'Trade Center'). They are only used in TokenWord. */ class TokenList { @@ -65,7 +56,7 @@ class TokenList */ public function containsAny($sWord) { - return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]); + return isset($this->aTokens[$sWord]); } /** @@ -87,7 +78,7 @@ class TokenList foreach ($this->aTokens as $aTokenList) { foreach ($aTokenList as $oToken) { - if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) { + if (is_a($oToken, '\Nominatim\Token\Word')) { $ids[$oToken->iId] = $oToken->iId; } } diff --git a/lib-php/tokenizer/legacy_icu_tokenizer.php b/lib-php/tokenizer/legacy_icu_tokenizer.php index 8cff6f32..96a1d8a6 100644 --- a/lib-php/tokenizer/legacy_icu_tokenizer.php +++ b/lib-php/tokenizer/legacy_icu_tokenizer.php @@ -120,14 +120,14 @@ class Tokenizer // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { - if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { - if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { + if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) { + if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { // US ZIP+4 codes - merge in the 5-digit ZIP code $oValidTokens->addToken( $sToken, new Token\Postcode(null, $aData[1], 'us') ); - } elseif (preg_match('/^ [0-9]+$/', $sToken)) { + } elseif (preg_match('/^[0-9]+$/', $sToken)) { // Unknown single word token with a number. // Assume it is a house number. $oValidTokens->addToken( diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php index ec2d7e68..238fbcf4 100644 --- a/lib-php/tokenizer/legacy_tokenizer.php +++ b/lib-php/tokenizer/legacy_tokenizer.php @@ -137,14 +137,14 @@ class Tokenizer // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { - if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { - if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { + if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) { + if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { // US ZIP+4 codes - merge in the 5-digit ZIP code $oValidTokens->addToken( $sToken, new Token\Postcode(null, $aData[1], 'us') ); - } elseif (preg_match('/^ [0-9]+$/', $sToken)) { + } elseif (preg_match('/^[0-9]+$/', $sToken)) { // Unknown single word token with a number. // Assume it is a house number. $oValidTokens->addToken(