From 182f5f5d7b4e3e01fc0ab60a263fa213260bde57 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 11 Mar 2021 15:03:36 +0100 Subject: [PATCH 1/8] give preference to full words in address, too Full word terms are already preferred for the name part. Adding only one-word partials to the address, makes it impossible to give a similar preference for the address part. Each term adds a rank penalty. The problem here is that we interpret the query forwards and backwards. Having different penalty systems for name and address means that the same term ends up with different penalties and that often leads to interpretations of the wrong direction being in the way. --- lib-php/SearchDescription.php | 42 ++++++++++------------------------- 1 file changed, 12 insertions(+), 30 deletions(-) diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index ea9d7f58..913a90d2 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -329,51 +329,30 @@ class SearchDescription if ((!$bStructuredPhrases || $iPhrase > 0) && (!empty($this->aName)) - && strpos($sToken, ' ') === false ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + if (preg_match('#^[0-9 ]+$#', $sToken)) { + $oSearch->iSearchRank++; + } if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { - $oSearch = clone $this; - $oSearch->iSearchRank += $oSearchTerm->iTermCount + 1; - if (empty($this->aName)) { - $oSearch->iSearchRank++; - } - if (preg_match('#^[0-9]+$#', $sToken)) { - $oSearch->iSearchRank++; - } $oSearch->aAddress[$iWordID] = $iWordID; - $aNewSearches[] = $oSearch; } else { - $oSearch = clone $this; - $oSearch->iSearchRank += $oSearchTerm->iTermCount + 1; $oSearch->aAddressNonSearch[$iWordID] = $iWordID; if (!empty($aFullTokens)) { $oSearch->iSearchRank++; } - $aNewSearches[] = $oSearch; - - // revert to the token version? - foreach ($aFullTokens as $oSearchTermToken) { - if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) { - $oSearch = clone $this; - $oSearch->iSearchRank += 3; - $oSearch->aAddress[$oSearchTermToken->iId] - = $oSearchTermToken->iId; - $aNewSearches[] = $oSearch; - } - } } + $aNewSearches[] = $oSearch; } if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) && (empty($this->aName) || $this->iNamePhrase == $iPhrase) ) { $oSearch = clone $this; - $oSearch->iSearchRank += 2; - if (empty($this->aName)) { - $oSearch->iSearchRank += 1; - } - if (preg_match('#^[0-9]+$#', $sToken)) { - $oSearch->iSearchRank += 2; + $oSearch->iSearchRank++; + if (preg_match('#^[0-9 ]+$#', $sToken)) { + $oSearch->iSearchRank++; } if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { if (empty($this->aName) @@ -387,6 +366,9 @@ class SearchDescription } $oSearch->aName[$iWordID] = $iWordID; } else { + if (!empty($aFullTokens)) { + $oSearch->iSearchRank++; + } $oSearch->aNameNonSearch[$iWordID] = $iWordID; } $oSearch->iNamePhrase = $iPhrase; From f498e4020839019770adf4a68bb2ab793ae1e27b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 11 Mar 2021 17:14:46 +0100 Subject: [PATCH 2/8] fix result splitting for last search group When we are in the final iteration of the search groups, it is not possible to further delay the results. Unconditionally use the results with the best rank instead. --- lib-php/Geocode.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index b475add2..f638af9a 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -779,7 +779,8 @@ class Geocode $aSplitResults = Result::splitResults($aResults); Debug::printVar('Split results', $aSplitResults); if ($iGroupLoop <= 4 - && reset($aSplitResults['head'])->iResultRank > 0) { + && reset($aSplitResults['head'])->iResultRank > 0 + && $iGroupedRank !== array_key_last($aGroupedSearches)) { // Haven't found an exact match for the query yet. // Therefore add result from the next group level. $aNextResults = $aSplitResults['head']; From 478dfb0639f6bb6d8eb160bb881fddf7b730cc85 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 11 Mar 2021 17:34:23 +0100 Subject: [PATCH 3/8] add one-rank penalty for using partial search Ensures that full matches are preferred over partial ones even when the full word consists of only one term. --- lib-php/SearchDescription.php | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 913a90d2..436398cd 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -347,10 +347,13 @@ class SearchDescription } if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) - && (empty($this->aName) || $this->iNamePhrase == $iPhrase) + && ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase) ) { $oSearch = clone $this; $oSearch->iSearchRank++; + if (empty($this->aName) && empty($this->aNameNonSearch)) { + $oSearch->iSearchRank++; + } if (preg_match('#^[0-9 ]+$#', $sToken)) { $oSearch->iSearchRank++; } From d5e8c5e975b270a0839af1b54ef5308857d92225 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 11 Mar 2021 20:22:54 +0100 Subject: [PATCH 4/8] do not mix partial and full name terms If NameNonSearch already contains a partial term, then a full term must not be added to the Name list anymore. --- lib-php/SearchDescription.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 436398cd..8e8e5acb 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -289,7 +289,7 @@ class SearchDescription $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; } - } else { + } else if (empty($this->aNameNonSearch)) { $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aName = array($iWordID => $iWordID); From 00b05e239495fe147f9a74f0e36eb243c8a32227 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 11 Mar 2021 20:34:21 +0100 Subject: [PATCH 5/8] higher penalty for special searches Adds a general higher penalty for special search term and an additional one if the term is anywhere but the beginning or the end. Also housenumbers and special searches together are less likely. --- lib-php/SearchDescription.php | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 8e8e5acb..94cadda3 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -219,6 +219,9 @@ class SearchDescription $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->sHouseNumber = $oSearchTerm->sToken; + if ($this->iOperator != Operator::NONE) { + $oSearch->iSearchRank++; + } // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty if (preg_match('/\\d/', $oSearch->sHouseNumber) === 0 @@ -255,7 +258,7 @@ class SearchDescription ) { if ($this->iOperator == Operator::NONE) { $oSearch = clone $this; - $oSearch->iSearchRank++; + $oSearch->iSearchRank += 2; $iOp = $oSearchTerm->iOperator; if ($iOp == Operator::NONE) { @@ -265,6 +268,11 @@ class SearchDescription $iOp = Operator::NEAR; } $oSearch->iSearchRank += 2; + } else if (!$bFirstToken && !$bLastToken) { + $oSearch->iSearchRank += 2; + } + if ($this->sHouseNumber) { + $oSearch->iSearchRank++; } $oSearch->setPoiSearch( From 3933fc3ad335df5b6aba2f4f06eeff3127f16a0d Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 11 Mar 2021 20:42:37 +0100 Subject: [PATCH 6/8] avoid multi-term partials in names Names are either full words or single-word partial names. Searching for multi-word partials yields exactly the same result as with full words. --- lib-php/SearchDescription.php | 1 + 1 file changed, 1 insertion(+) diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 94cadda3..cde21f82 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -356,6 +356,7 @@ class SearchDescription if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) && ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase) + && strpos($sToken, ' ') === false ) { $oSearch = clone $this; $oSearch->iSearchRank++; From 3fbe4511f9c03429148d47dcf320445889be569b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 11 Mar 2021 21:14:23 +0100 Subject: [PATCH 7/8] make linter happy --- lib-php/SearchDescription.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index cde21f82..c207b210 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -268,7 +268,7 @@ class SearchDescription $iOp = Operator::NEAR; } $oSearch->iSearchRank += 2; - } else if (!$bFirstToken && !$bLastToken) { + } elseif (!$bFirstToken && !$bLastToken) { $oSearch->iSearchRank += 2; } if ($this->sHouseNumber) { @@ -297,7 +297,7 @@ class SearchDescription $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; } - } else if (empty($this->aNameNonSearch)) { + } elseif (empty($this->aNameNonSearch)) { $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aName = array($iWordID => $iWordID); From 6dd2b9c2eca74870107376d87d37589e0ff4694b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 11 Mar 2021 22:44:49 +0100 Subject: [PATCH 8/8] do not mix partial names with other words As soon as a housenumber, postcode, etc. appear, the name term must obviously be closed and no further partial terms can be appended. --- lib-php/SearchDescription.php | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index c207b210..228b0999 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -179,6 +179,7 @@ class SearchDescription // - increase score for finding it anywhere else (optimisation) if (!$bLastToken) { $oSearch->iSearchRank += 5; + $oSearch->iNamePhrase = -1; } $aNewSearches[] = $oSearch; } @@ -205,6 +206,7 @@ class SearchDescription ) { $oSearch = clone $this; $oSearch->iSearchRank++; + $oSearch->iNamePhrase = -1; if (strlen($oSearchTerm->sPostcode) < 4) { $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode); } @@ -218,6 +220,7 @@ class SearchDescription if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { $oSearch = clone $this; $oSearch->iSearchRank++; + $oSearch->iNamePhrase = -1; $oSearch->sHouseNumber = $oSearchTerm->sToken; if ($this->iOperator != Operator::NONE) { $oSearch->iSearchRank++; @@ -259,6 +262,7 @@ class SearchDescription if ($this->iOperator == Operator::NONE) { $oSearch = clone $this; $oSearch->iSearchRank += 2; + $oSearch->iNamePhrase = -1; $iOp = $oSearchTerm->iOperator; if ($iOp == Operator::NONE) { @@ -293,6 +297,7 @@ class SearchDescription if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) { if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) { $oSearch = clone $this; + $oSearch->iNamePhrase = -1; $oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount; $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch;