mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-07 18:44:07 +00:00
move special hack for US states to legacy tokenizer
The hack for IL, AL and LA is only needed because these abbreviations are removed by the legacy tokenizer as a stop word. There is no need to keep the hack for future tokenizers. Move it therefore to the token extraction function.
This commit is contained in:
@@ -506,13 +506,6 @@ class Geocode
|
|||||||
userError('Query string is not UTF-8 encoded.');
|
userError('Query string is not UTF-8 encoded.');
|
||||||
}
|
}
|
||||||
|
|
||||||
// Conflicts between US state abreviations and various words for 'the' in different languages
|
|
||||||
if (isset($this->aLangPrefOrder['name:en'])) {
|
|
||||||
$sQuery = preg_replace('/(^|,)\s*il\s*(,|$)/i', '\1illinois\2', $sQuery);
|
|
||||||
$sQuery = preg_replace('/(^|,)\s*al\s*(,|$)/i', '\1alabama\2', $sQuery);
|
|
||||||
$sQuery = preg_replace('/(^|,)\s*la\s*(,|$)/i', '\1louisiana\2', $sQuery);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Do we have anything that looks like a lat/lon pair?
|
// Do we have anything that looks like a lat/lon pair?
|
||||||
$sQuery = $oCtx->setNearPointFromQuery($sQuery);
|
$sQuery = $oCtx->setNearPointFromQuery($sQuery);
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,8 @@ namespace Nominatim;
|
|||||||
*/
|
*/
|
||||||
class Phrase
|
class Phrase
|
||||||
{
|
{
|
||||||
// Complete phrase as a string.
|
// Complete phrase as a string (guaranteed to have no leading or trailing
|
||||||
|
// spaces).
|
||||||
private $sPhrase;
|
private $sPhrase;
|
||||||
// Element type for structured searches.
|
// Element type for structured searches.
|
||||||
private $sPhraseType;
|
private $sPhraseType;
|
||||||
|
|||||||
@@ -87,6 +87,23 @@ class Tokenizer
|
|||||||
$sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
|
$sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
|
||||||
$sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.',';
|
$sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.',';
|
||||||
$aParams[':'.$iPhrase] = $oPhrase->getPhrase();
|
$aParams[':'.$iPhrase] = $oPhrase->getPhrase();
|
||||||
|
|
||||||
|
// Conflicts between US state abbreviations and various words
|
||||||
|
// for 'the' in different languages
|
||||||
|
switch (strtolower($oPhrase->getPhrase())) {
|
||||||
|
case 'il':
|
||||||
|
$aParams[':'.$iPhrase] = 'illinois';
|
||||||
|
break;
|
||||||
|
case 'al':
|
||||||
|
$aParams[':'.$iPhrase] = 'alabama';
|
||||||
|
break;
|
||||||
|
case 'la':
|
||||||
|
$aParams[':'.$iPhrase] = 'louisiana';
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
$aParams[':'.$iPhrase] = $oPhrase->getPhrase();
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
$sSQL = substr($sSQL, 0, -1);
|
$sSQL = substr($sSQL, 0, -1);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user