drop category tokens when they make up a full phrase

This commit is contained in:
Sarah Hoffmann
2023-11-26 20:58:50 +01:00
parent a8b023e57e
commit a7f5c6c8f5
4 changed files with 56 additions and 26 deletions

View File

@@ -18,21 +18,17 @@ class MyToken(Token):
def make_query(*args):
q = None
q = QueryStruct([Phrase(args[0][1], '')])
dummy = MyToken(3.0, 45, 1, 'foo', True)
for btype, ptype, tlist in args:
if q is None:
q = QueryStruct([Phrase(ptype, '')])
else:
q.add_node(btype, ptype)
start = len(q.nodes) - 1
for end, ttype in tlist:
q.add_token(TokenRange(start, end), ttype, dummy)
for btype, ptype, _ in args[1:]:
q.add_node(btype, ptype)
q.add_node(BreakType.END, PhraseType.NONE)
for start, t in enumerate(args):
for end, ttype in t[2]:
q.add_token(TokenRange(start, end), ttype, dummy)
return q