forked from hans/Nominatim
move tokenization in query into tokenizer
This commit is contained in:
@@ -44,19 +44,16 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
public function testEmptyPhrase()
|
||||
{
|
||||
$oPhrase = new Phrase('', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array(), new TokensFullSet());
|
||||
|
||||
$this->assertEquals(
|
||||
array(array('')),
|
||||
$oPhrase->getWordSets()
|
||||
);
|
||||
$this->assertNull($oPhrase->getWordSets());
|
||||
}
|
||||
|
||||
|
||||
public function testSingleWordPhrase()
|
||||
{
|
||||
$oPhrase = new Phrase('a', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a'), new TokensFullSet());
|
||||
|
||||
$this->assertEquals(
|
||||
'(a)',
|
||||
@@ -68,21 +65,21 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
public function testMultiWordPhrase()
|
||||
{
|
||||
$oPhrase = new Phrase('a b', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a', 'b'), new TokensFullSet());
|
||||
$this->assertEquals(
|
||||
'(a b),(a|b)',
|
||||
$this->serializeSets($oPhrase->getWordSets())
|
||||
);
|
||||
|
||||
$oPhrase = new Phrase('a b c', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
|
||||
$this->assertEquals(
|
||||
'(a b c),(a|b c),(a b|c),(a|b|c)',
|
||||
$this->serializeSets($oPhrase->getWordSets())
|
||||
);
|
||||
|
||||
$oPhrase = new Phrase('a b c d', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensFullSet());
|
||||
$this->assertEquals(
|
||||
'(a b c d),(a b c|d),(a b|c d),(a|b c d),(a b|c|d),(a|b c|d),(a|b|c d),(a|b|c|d)',
|
||||
$this->serializeSets($oPhrase->getWordSets())
|
||||
@@ -93,7 +90,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
public function testInverseWordSets()
|
||||
{
|
||||
$oPhrase = new Phrase('a b c', '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$oPhrase->computeWordSets(array('a', 'b', 'c'), new TokensFullSet());
|
||||
$oPhrase->invertWordSets();
|
||||
|
||||
$this->assertEquals(
|
||||
@@ -105,14 +102,16 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
|
||||
public function testMaxWordSets()
|
||||
{
|
||||
$oPhrase = new Phrase(join(' ', array_fill(0, 4, 'a')), '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$aWords = array_fill(0, 4, 'a');
|
||||
$oPhrase = new Phrase(join(' ', $aWords), '');
|
||||
$oPhrase->computeWordSets($aWords, new TokensFullSet());
|
||||
$this->assertEquals(8, count($oPhrase->getWordSets()));
|
||||
$oPhrase->invertWordSets();
|
||||
$this->assertEquals(8, count($oPhrase->getWordSets()));
|
||||
|
||||
$oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
|
||||
$oPhrase->computeWordSets(new TokensFullSet());
|
||||
$aWords = array_fill(0, 18, 'a');
|
||||
$oPhrase = new Phrase(join(' ', $aWords), '');
|
||||
$oPhrase->computeWordSets($aWords, new TokensFullSet());
|
||||
$this->assertEquals(100, count($oPhrase->getWordSets()));
|
||||
$oPhrase->invertWordSets();
|
||||
$this->assertEquals(100, count($oPhrase->getWordSets()));
|
||||
@@ -122,7 +121,7 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
public function testPartialTokensShortTerm()
|
||||
{
|
||||
$oPhrase = new Phrase('a b c d', '');
|
||||
$oPhrase->computeWordSets(new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
|
||||
$oPhrase->computeWordSets(array('a', 'b', 'c', 'd'), new TokensPartialSet(array('a', 'b', 'd', 'b c', 'b c d')));
|
||||
$this->assertEquals(
|
||||
'(a|b c d),(a|b c|d)',
|
||||
$this->serializeSets($oPhrase->getWordSets())
|
||||
@@ -132,8 +131,9 @@ class PhraseTest extends \PHPUnit\Framework\TestCase
|
||||
|
||||
public function testPartialTokensLongTerm()
|
||||
{
|
||||
$oPhrase = new Phrase(join(' ', array_fill(0, 18, 'a')), '');
|
||||
$oPhrase->computeWordSets(new TokensPartialSet(array('a', 'a a a a a')));
|
||||
$aWords = array_fill(0, 18, 'a');
|
||||
$oPhrase = new Phrase(join(' ', $aWords), '');
|
||||
$oPhrase->computeWordSets($aWords, new TokensPartialSet(array('a', 'a a a a a')));
|
||||
$this->assertEquals(80, count($oPhrase->getWordSets()));
|
||||
}
|
||||
}
|
||||
|
||||
@@ -49,88 +49,4 @@ class TokenTest extends \PHPUnit\Framework\TestCase
|
||||
$this->assertFalse($TL->contains('unknownword'));
|
||||
$this->assertEquals(array(), $TL->get('unknownword'));
|
||||
}
|
||||
|
||||
public function testAddress()
|
||||
{
|
||||
$this->expectOutputRegex('/<p><tt>/');
|
||||
|
||||
$oDbStub = $this->getMockBuilder(Nominatim\DB::class)
|
||||
->setMethods(array('getAll', 'getDBQuotedList'))
|
||||
->getMock();
|
||||
|
||||
$oDbStub->method('getDBQuotedList')
|
||||
->will($this->returnCallback(function ($aVals) {
|
||||
return array_map(function ($sVal) {
|
||||
return "'".$sVal."'";
|
||||
}, $aVals);
|
||||
}));
|
||||
|
||||
|
||||
$oDbStub->method('getAll')
|
||||
->will($this->returnCallback(function ($sql) {
|
||||
$aResults = array();
|
||||
if (preg_match('/1051/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => '1051',
|
||||
'class' => 'place',
|
||||
'type' => 'house'
|
||||
));
|
||||
}
|
||||
if (preg_match('/hauptstr/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => 'hauptstr',
|
||||
'class' => 'place',
|
||||
'type' => 'street',
|
||||
'operator' => true
|
||||
));
|
||||
}
|
||||
if (preg_match('/64286/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => '64286',
|
||||
'word' => '64286',
|
||||
'class' => 'place',
|
||||
'type' => 'postcode'
|
||||
));
|
||||
}
|
||||
if (preg_match('/darmstadt/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => 'darmstadt',
|
||||
'count' => 533
|
||||
));
|
||||
}
|
||||
if (preg_match('/alemagne/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => 'alemagne',
|
||||
'country_code' => 'de',
|
||||
));
|
||||
}
|
||||
if (preg_match('/mexico/', $sql)) {
|
||||
$aResults[] = $this->wordResult(array(
|
||||
'word_id' => 999,
|
||||
'word_token' => 'mexico',
|
||||
'country_code' => 'mx',
|
||||
));
|
||||
}
|
||||
return $aResults;
|
||||
}));
|
||||
|
||||
$aCountryCodes = array('de', 'fr');
|
||||
$sNormQuery = '1051 hauptstr 64286 darmstadt alemagne mexico';
|
||||
$aTokens = explode(' ', $sNormQuery);
|
||||
|
||||
$TL = new TokenList;
|
||||
$TL->addTokensFromDB($oDbStub, $aTokens, $aCountryCodes, $sNormQuery, $this->oNormalizer);
|
||||
$this->assertEquals(5, $TL->count());
|
||||
|
||||
$this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
|
||||
$this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
|
||||
$this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
|
||||
$this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt'));
|
||||
$this->assertEquals(array(new Token\SpecialTerm(999, 'place', 'street', true)), $TL->get('hauptstr'));
|
||||
}
|
||||
}
|
||||
|
||||
17
test/php/Nominatim/tokenizer.php
Normal file
17
test/php/Nominatim/tokenizer.php
Normal file
@@ -0,0 +1,17 @@
|
||||
<?php
|
||||
|
||||
namespace Nominatim;
|
||||
|
||||
class Tokenizer
|
||||
{
|
||||
private $oDB;
|
||||
|
||||
public function __construct(&$oDB)
|
||||
{
|
||||
$this->oDB =& $oDB;
|
||||
}
|
||||
|
||||
public function checkStatus()
|
||||
{
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user