mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-14 18:37:58 +00:00
introduce a separate token type for partials
This means that the leading space can be removed as a partial word indicator.
This commit is contained in:
@@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/TokenHousenumber.php');
|
||||
require_once(CONST_LibDir.'/TokenPostcode.php');
|
||||
require_once(CONST_LibDir.'/TokenSpecialTerm.php');
|
||||
require_once(CONST_LibDir.'/TokenWord.php');
|
||||
require_once(CONST_LibDir.'/TokenPartial.php');
|
||||
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
|
||||
|
||||
/**
|
||||
|
||||
31
lib-php/TokenPartial.php
Normal file
31
lib-php/TokenPartial.php
Normal file
@@ -0,0 +1,31 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim\Token;
|
||||
|
||||
/**
|
||||
* A standard word token.
|
||||
*/
|
||||
class Partial
|
||||
{
|
||||
/// Database word id, if applicable.
|
||||
public $iId;
|
||||
/// Number of appearances in the database.
|
||||
public $iSearchNameCount;
|
||||
|
||||
public function __construct($iId, $iSearchNameCount)
|
||||
{
|
||||
$this->iId = $iId;
|
||||
$this->iSearchNameCount = $iSearchNameCount;
|
||||
}
|
||||
|
||||
public function debugInfo()
|
||||
{
|
||||
return array(
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'partial',
|
||||
'Info' => array(
|
||||
'count' => $this->iSearchNameCount
|
||||
)
|
||||
);
|
||||
}
|
||||
}
|
||||
@@ -9,17 +9,14 @@ class Word
|
||||
{
|
||||
/// Database word id, if applicable.
|
||||
public $iId;
|
||||
/// If true, the word may represent only part of a place name.
|
||||
public $bPartial;
|
||||
/// Number of appearances in the database.
|
||||
public $iSearchNameCount;
|
||||
/// Number of terms in the word.
|
||||
public $iTermCount;
|
||||
|
||||
public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount)
|
||||
public function __construct($iId, $iSearchNameCount, $iTermCount)
|
||||
{
|
||||
$this->iId = $iId;
|
||||
$this->bPartial = $bPartial;
|
||||
$this->iSearchNameCount = $iSearchNameCount;
|
||||
$this->iTermCount = $iTermCount;
|
||||
}
|
||||
@@ -30,8 +27,8 @@ class Word
|
||||
'ID' => $this->iId,
|
||||
'Type' => 'word',
|
||||
'Info' => array(
|
||||
'partial' => $this->bPartial,
|
||||
'count' => $this->iSearchNameCount
|
||||
'count' => $this->iSearchNameCount,
|
||||
'terms' => $this->iTermCount
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
@@ -195,17 +195,27 @@ class Tokenizer
|
||||
) {
|
||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||
}
|
||||
} elseif ($aWord['word_token'][0] == ' ') {
|
||||
$oToken = new Token\Word(
|
||||
$iId,
|
||||
$aWord['word_token'][0] != ' ',
|
||||
(int) $aWord['count'],
|
||||
substr_count($aWord['word_token'], ' ')
|
||||
);
|
||||
} else {
|
||||
$oToken = new Token\Word(
|
||||
$oToken = new Token\Partial(
|
||||
$iId,
|
||||
$aWord['word_token'][0] != ' ',
|
||||
(int) $aWord['count'],
|
||||
substr_count($aWord['word_token'], ' ')
|
||||
(int) $aWord['count']
|
||||
);
|
||||
}
|
||||
|
||||
if ($oToken) {
|
||||
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
||||
// remove any leading spaces
|
||||
if ($aWord['word_token'][0] == ' ') {
|
||||
$oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
|
||||
} else {
|
||||
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -212,17 +212,26 @@ class Tokenizer
|
||||
) {
|
||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||
}
|
||||
} else {
|
||||
} elseif ($aWord['word_token'][0] == ' ') {
|
||||
$oToken = new Token\Word(
|
||||
$iId,
|
||||
$aWord['word_token'][0] != ' ',
|
||||
(int) $aWord['count'],
|
||||
substr_count($aWord['word_token'], ' ')
|
||||
);
|
||||
} else {
|
||||
$oToken = new Token\Partial(
|
||||
$iId,
|
||||
(int) $aWord['count']
|
||||
);
|
||||
}
|
||||
|
||||
if ($oToken) {
|
||||
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
||||
// remove any leading spaces
|
||||
if ($aWord['word_token'][0] == ' ') {
|
||||
$oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
|
||||
} else {
|
||||
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user