adapt search algorithm to new postcode format in word

This commit is contained in:
Sarah Hoffmann
2022-06-22 09:54:47 +02:00
parent 612d34930b
commit 6eb9044353
4 changed files with 111 additions and 25 deletions

View File

@@ -25,7 +25,12 @@ class Postcode
public function __construct($iId, $sPostcode, $sCountryCode = '') public function __construct($iId, $sPostcode, $sCountryCode = '')
{ {
$this->iId = $iId; $this->iId = $iId;
$this->sPostcode = $sPostcode; $iSplitPos = strpos($sPostcode, '@');
if ($iSplitPos === false) {
$this->sPostcode = $sPostcode;
} else {
$this->sPostcode = substr($sPostcode, 0, $iSplitPos);
}
$this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode; $this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
} }

View File

@@ -190,13 +190,17 @@ class Tokenizer
if ($aWord['word'] !== null if ($aWord['word'] !== null
&& pg_escape_string($aWord['word']) == $aWord['word'] && pg_escape_string($aWord['word']) == $aWord['word']
) { ) {
$sNormPostcode = $this->normalizeString($aWord['word']); $iSplitPos = strpos($aWord['word'], '@');
if (strpos($sNormQuery, $sNormPostcode) !== false) { if ($iSplitPos === false) {
$oValidTokens->addToken( $sPostcode = $aWord['word'];
$sTok, } else {
new Token\Postcode($iId, $aWord['word'], null) $sPostcode = substr($aWord['word'], 0, $iSplitPos);
);
} }
$oValidTokens->addToken(
$sTok,
new Token\Postcode($iId, $sPostcode, null)
);
} }
break; break;
case 'S': // tokens for classification terms (special phrases) case 'S': // tokens for classification terms (special phrases)

View File

@@ -163,24 +163,6 @@ Feature: Import of postcodes
| de | 01982 | country:de | | de | 01982 | country:de |
And there are word tokens for postcodes 01982 And there are word tokens for postcodes 01982
Scenario: Different postcodes with the same normalization can both be found
Given the places
| osm | class | type | addr+postcode | addr+housenumber | geometry |
| N34 | place | house | EH4 7EA | 111 | country:gb |
| N35 | place | house | E4 7EA | 111 | country:gb |
When importing
Then location_postcode contains exactly
| country | postcode | geometry |
| gb | EH4 7EA | country:gb |
| gb | E4 7EA | country:gb |
When sending search query "EH4 7EA"
Then results contain
| type | display_name |
| postcode | EH4 7EA |
When sending search query "E4 7EA"
Then results contain
| type | display_name |
| postcode | E4 7EA |
@Fail @Fail
Scenario: search and address ranks for GB post codes correctly assigned Scenario: search and address ranks for GB post codes correctly assigned

View File

@@ -0,0 +1,95 @@
@DB
@fail-legacy
Feature: Querying fo postcode variants
Scenario: Postcodes in Singapore (6-digit postcode)
Given the grid with origin SG
| 10 | | | | 11 |
And the places
| osm | class | type | name | addr+postcode | geometry |
| W1 | highway | path | Lorang | 399174 | 10,11 |
When importing
When sending search query "399174"
Then results contain
| ID | type | display_name |
| 0 | postcode | 399174 |
Scenario Outline: Postcodes in the Netherlands (mixed postcode with spaces)
Given the grid with origin NL
| 10 | | | | 11 |
And the places
| osm | class | type | name | addr+postcode | geometry |
| W1 | highway | path | De Weide | 3993 DX | 10,11 |
When importing
When sending search query "3993 DX"
Then results contain
| ID | type | display_name |
| 0 | postcode | 3993 DX |
When sending search query "3993dx"
Then results contain
| ID | type | display_name |
| 0 | postcode | 3993 DX |
Examples:
| postcode |
| 3993 DX |
| 3993DX |
| 3993 dx |
Scenario: Postcodes in Singapore (6-digit postcode)
Given the grid with origin SG
| 10 | | | | 11 |
And the places
| osm | class | type | name | addr+postcode | geometry |
| W1 | highway | path | Lorang | 399174 | 10,11 |
When importing
When sending search query "399174"
Then results contain
| ID | type | display_name |
| 0 | postcode | 399174 |
Scenario Outline: Postcodes in Andorra (with country code)
Given the grid with origin AD
| 10 | | | | 11 |
And the places
| osm | class | type | name | addr+postcode | geometry |
| W1 | highway | path | Lorang | <postcode> | 10,11 |
When importing
When sending search query "675"
Then results contain
| ID | type | display_name |
| 0 | postcode | AD675 |
When sending search query "AD675"
Then results contain
| ID | type | display_name |
| 0 | postcode | AD675 |
Examples:
| postcode |
| 675 |
| AD 675 |
| AD675 |
Scenario: Different postcodes with the same normalization can both be found
Given the places
| osm | class | type | addr+postcode | addr+housenumber | geometry |
| N34 | place | house | EH4 7EA | 111 | country:gb |
| N35 | place | house | E4 7EA | 111 | country:gb |
When importing
Then location_postcode contains exactly
| country | postcode | geometry |
| gb | EH4 7EA | country:gb |
| gb | E4 7EA | country:gb |
When sending search query "EH4 7EA"
Then results contain
| type | display_name |
| postcode | EH4 7EA |
When sending search query "E4 7EA"
Then results contain
| type | display_name |
| postcode | E4 7EA |