mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
introduce a separate token type for partials
This means that the leading space can be removed as a partial word indicator.
This commit is contained in:
@@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/TokenHousenumber.php');
|
|||||||
require_once(CONST_LibDir.'/TokenPostcode.php');
|
require_once(CONST_LibDir.'/TokenPostcode.php');
|
||||||
require_once(CONST_LibDir.'/TokenSpecialTerm.php');
|
require_once(CONST_LibDir.'/TokenSpecialTerm.php');
|
||||||
require_once(CONST_LibDir.'/TokenWord.php');
|
require_once(CONST_LibDir.'/TokenWord.php');
|
||||||
|
require_once(CONST_LibDir.'/TokenPartial.php');
|
||||||
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
|
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|||||||
31
lib-php/TokenPartial.php
Normal file
31
lib-php/TokenPartial.php
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Nominatim\Token;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A standard word token.
|
||||||
|
*/
|
||||||
|
class Partial
|
||||||
|
{
|
||||||
|
/// Database word id, if applicable.
|
||||||
|
public $iId;
|
||||||
|
/// Number of appearances in the database.
|
||||||
|
public $iSearchNameCount;
|
||||||
|
|
||||||
|
public function __construct($iId, $iSearchNameCount)
|
||||||
|
{
|
||||||
|
$this->iId = $iId;
|
||||||
|
$this->iSearchNameCount = $iSearchNameCount;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function debugInfo()
|
||||||
|
{
|
||||||
|
return array(
|
||||||
|
'ID' => $this->iId,
|
||||||
|
'Type' => 'partial',
|
||||||
|
'Info' => array(
|
||||||
|
'count' => $this->iSearchNameCount
|
||||||
|
)
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -9,17 +9,14 @@ class Word
|
|||||||
{
|
{
|
||||||
/// Database word id, if applicable.
|
/// Database word id, if applicable.
|
||||||
public $iId;
|
public $iId;
|
||||||
/// If true, the word may represent only part of a place name.
|
|
||||||
public $bPartial;
|
|
||||||
/// Number of appearances in the database.
|
/// Number of appearances in the database.
|
||||||
public $iSearchNameCount;
|
public $iSearchNameCount;
|
||||||
/// Number of terms in the word.
|
/// Number of terms in the word.
|
||||||
public $iTermCount;
|
public $iTermCount;
|
||||||
|
|
||||||
public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount)
|
public function __construct($iId, $iSearchNameCount, $iTermCount)
|
||||||
{
|
{
|
||||||
$this->iId = $iId;
|
$this->iId = $iId;
|
||||||
$this->bPartial = $bPartial;
|
|
||||||
$this->iSearchNameCount = $iSearchNameCount;
|
$this->iSearchNameCount = $iSearchNameCount;
|
||||||
$this->iTermCount = $iTermCount;
|
$this->iTermCount = $iTermCount;
|
||||||
}
|
}
|
||||||
@@ -30,8 +27,8 @@ class Word
|
|||||||
'ID' => $this->iId,
|
'ID' => $this->iId,
|
||||||
'Type' => 'word',
|
'Type' => 'word',
|
||||||
'Info' => array(
|
'Info' => array(
|
||||||
'partial' => $this->bPartial,
|
'count' => $this->iSearchNameCount,
|
||||||
'count' => $this->iSearchNameCount
|
'terms' => $this->iTermCount
|
||||||
)
|
)
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -195,17 +195,27 @@ class Tokenizer
|
|||||||
) {
|
) {
|
||||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||||
}
|
}
|
||||||
|
} elseif ($aWord['word_token'][0] == ' ') {
|
||||||
|
$oToken = new Token\Word(
|
||||||
|
$iId,
|
||||||
|
$aWord['word_token'][0] != ' ',
|
||||||
|
(int) $aWord['count'],
|
||||||
|
substr_count($aWord['word_token'], ' ')
|
||||||
|
);
|
||||||
} else {
|
} else {
|
||||||
$oToken = new Token\Word(
|
$oToken = new Token\Partial(
|
||||||
$iId,
|
$iId,
|
||||||
$aWord['word_token'][0] != ' ',
|
(int) $aWord['count']
|
||||||
(int) $aWord['count'],
|
|
||||||
substr_count($aWord['word_token'], ' ')
|
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($oToken) {
|
if ($oToken) {
|
||||||
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
// remove any leading spaces
|
||||||
|
if ($aWord['word_token'][0] == ' ') {
|
||||||
|
$oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
|
||||||
|
} else {
|
||||||
|
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -212,17 +212,26 @@ class Tokenizer
|
|||||||
) {
|
) {
|
||||||
$oToken = new Token\Country($iId, $aWord['country_code']);
|
$oToken = new Token\Country($iId, $aWord['country_code']);
|
||||||
}
|
}
|
||||||
} else {
|
} elseif ($aWord['word_token'][0] == ' ') {
|
||||||
$oToken = new Token\Word(
|
$oToken = new Token\Word(
|
||||||
$iId,
|
$iId,
|
||||||
$aWord['word_token'][0] != ' ',
|
|
||||||
(int) $aWord['count'],
|
(int) $aWord['count'],
|
||||||
substr_count($aWord['word_token'], ' ')
|
substr_count($aWord['word_token'], ' ')
|
||||||
);
|
);
|
||||||
|
} else {
|
||||||
|
$oToken = new Token\Partial(
|
||||||
|
$iId,
|
||||||
|
(int) $aWord['count']
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($oToken) {
|
if ($oToken) {
|
||||||
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
// remove any leading spaces
|
||||||
|
if ($aWord['word_token'][0] == ' ') {
|
||||||
|
$oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
|
||||||
|
} else {
|
||||||
|
$oValidTokens->addToken($aWord['word_token'], $oToken);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user