forked from hans/Nominatim
drop category tokens when they make up a full phrase
This commit is contained in:
@@ -28,12 +28,12 @@ def mktoken(tid: int):
|
||||
('COUNTRY', 'COUNTRY'),
|
||||
('POSTCODE', 'POSTCODE')])
|
||||
def test_phrase_compatible(ptype, ttype):
|
||||
assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype])
|
||||
assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype], False)
|
||||
|
||||
|
||||
@pytest.mark.parametrize('ptype', ['COUNTRY', 'POSTCODE'])
|
||||
def test_phrase_incompatible(ptype):
|
||||
assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL)
|
||||
assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL, True)
|
||||
|
||||
|
||||
def test_query_node_empty():
|
||||
@@ -99,3 +99,36 @@ def test_query_struct_incompatible_token():
|
||||
|
||||
assert q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL) == []
|
||||
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.COUNTRY)) == 1
|
||||
|
||||
|
||||
def test_query_struct_amenity_single_word():
|
||||
q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')])
|
||||
q.add_node(query.BreakType.END, query.PhraseType.NONE)
|
||||
|
||||
q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
|
||||
q.add_token(query.TokenRange(0, 1), query.TokenType.CATEGORY, mktoken(2))
|
||||
q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3))
|
||||
|
||||
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
|
||||
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.CATEGORY)) == 1
|
||||
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0
|
||||
|
||||
|
||||
def test_query_struct_amenity_two_words():
|
||||
q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')])
|
||||
q.add_node(query.BreakType.WORD, query.PhraseType.AMENITY)
|
||||
q.add_node(query.BreakType.END, query.PhraseType.NONE)
|
||||
|
||||
for trange in [(0, 1), (1, 2)]:
|
||||
q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1))
|
||||
q.add_token(query.TokenRange(*trange), query.TokenType.CATEGORY, mktoken(2))
|
||||
q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3))
|
||||
|
||||
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
|
||||
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.CATEGORY)) == 0
|
||||
assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1
|
||||
|
||||
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1
|
||||
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.CATEGORY)) == 0
|
||||
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1
|
||||
|
||||
|
||||
@@ -21,21 +21,18 @@ class MyToken(Token):
|
||||
|
||||
|
||||
def make_query(*args):
|
||||
q = None
|
||||
q = QueryStruct([Phrase(PhraseType.NONE, '')])
|
||||
|
||||
for tlist in args:
|
||||
if q is None:
|
||||
q = QueryStruct([Phrase(PhraseType.NONE, '')])
|
||||
else:
|
||||
q.add_node(BreakType.WORD, PhraseType.NONE)
|
||||
for _ in range(max(inner[0] for tlist in args for inner in tlist)):
|
||||
q.add_node(BreakType.WORD, PhraseType.NONE)
|
||||
q.add_node(BreakType.END, PhraseType.NONE)
|
||||
|
||||
start = len(q.nodes) - 1
|
||||
for start, tlist in enumerate(args):
|
||||
for end, ttype, tinfo in tlist:
|
||||
for tid, word in tinfo:
|
||||
q.add_token(TokenRange(start, end), ttype,
|
||||
MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
|
||||
|
||||
q.add_node(BreakType.END, PhraseType.NONE)
|
||||
|
||||
return q
|
||||
|
||||
|
||||
@@ -18,21 +18,17 @@ class MyToken(Token):
|
||||
|
||||
|
||||
def make_query(*args):
|
||||
q = None
|
||||
q = QueryStruct([Phrase(args[0][1], '')])
|
||||
dummy = MyToken(3.0, 45, 1, 'foo', True)
|
||||
|
||||
for btype, ptype, tlist in args:
|
||||
if q is None:
|
||||
q = QueryStruct([Phrase(ptype, '')])
|
||||
else:
|
||||
q.add_node(btype, ptype)
|
||||
|
||||
start = len(q.nodes) - 1
|
||||
for end, ttype in tlist:
|
||||
q.add_token(TokenRange(start, end), ttype, dummy)
|
||||
|
||||
for btype, ptype, _ in args[1:]:
|
||||
q.add_node(btype, ptype)
|
||||
q.add_node(BreakType.END, PhraseType.NONE)
|
||||
|
||||
for start, t in enumerate(args):
|
||||
for end, ttype in t[2]:
|
||||
q.add_token(TokenRange(start, end), ttype, dummy)
|
||||
|
||||
return q
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user