replace BreakType enum with simple char constants

This commit is contained in:
Sarah Hoffmann
2025-02-21 09:57:48 +01:00
parent 9bf1428d81
commit 4577669213
8 changed files with 150 additions and 147 deletions

View File

@@ -429,11 +429,11 @@ class SearchBuilder:
PENALTY_WORDCHANGE = { PENALTY_WORDCHANGE = {
qmod.BreakType.START: 0.0, qmod.BREAK_START: 0.0,
qmod.BreakType.END: 0.0, qmod.BREAK_END: 0.0,
qmod.BreakType.PHRASE: 0.0, qmod.BREAK_PHRASE: 0.0,
qmod.BreakType.SOFT_PHRASE: 0.0, qmod.BREAK_SOFT_PHRASE: 0.0,
qmod.BreakType.WORD: 0.1, qmod.BREAK_WORD: 0.1,
qmod.BreakType.PART: 0.2, qmod.BREAK_PART: 0.2,
qmod.BreakType.TOKEN: 0.4 qmod.BREAK_TOKEN: 0.4
} }

View File

@@ -37,13 +37,13 @@ DB_TO_TOKEN_TYPE = {
} }
PENALTY_IN_TOKEN_BREAK = { PENALTY_IN_TOKEN_BREAK = {
qmod.BreakType.START: 0.5, qmod.BREAK_START: 0.5,
qmod.BreakType.END: 0.5, qmod.BREAK_END: 0.5,
qmod.BreakType.PHRASE: 0.5, qmod.BREAK_PHRASE: 0.5,
qmod.BreakType.SOFT_PHRASE: 0.5, qmod.BREAK_SOFT_PHRASE: 0.5,
qmod.BreakType.WORD: 0.1, qmod.BREAK_WORD: 0.1,
qmod.BreakType.PART: 0.0, qmod.BREAK_PART: 0.0,
qmod.BreakType.TOKEN: 0.0 qmod.BREAK_TOKEN: 0.0
} }
@@ -72,7 +72,7 @@ def extract_words(terms: List[QueryPart], start: int, words: WordDict) -> None:
given position to the word list. given position to the word list.
""" """
total = len(terms) total = len(terms)
base_penalty = PENALTY_IN_TOKEN_BREAK[qmod.BreakType.WORD] base_penalty = PENALTY_IN_TOKEN_BREAK[qmod.BREAK_WORD]
for first in range(start, total): for first in range(start, total):
word = terms[first].token word = terms[first].token
penalty = base_penalty penalty = base_penalty
@@ -273,15 +273,15 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
for term in trans.split(' '): for term in trans.split(' '):
if term: if term:
parts.append(QueryPart(term, word, parts.append(QueryPart(term, word,
PENALTY_IN_TOKEN_BREAK[qmod.BreakType.TOKEN])) PENALTY_IN_TOKEN_BREAK[qmod.BREAK_TOKEN]))
query.add_node(qmod.BreakType.TOKEN, phrase.ptype) query.add_node(qmod.BREAK_TOKEN, phrase.ptype)
query.nodes[-1].btype = qmod.BreakType(breakchar) query.nodes[-1].btype = breakchar
parts[-1].penalty = PENALTY_IN_TOKEN_BREAK[qmod.BreakType(breakchar)] parts[-1].penalty = PENALTY_IN_TOKEN_BREAK[breakchar]
extract_words(parts, phrase_start, words) extract_words(parts, phrase_start, words)
phrase_start = len(parts) phrase_start = len(parts)
query.nodes[-1].btype = qmod.BreakType.END query.nodes[-1].btype = qmod.BREAK_END
return parts, words return parts, words
@@ -322,16 +322,16 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL): elif tlist.ttype not in (qmod.TokenType.COUNTRY, qmod.TokenType.PARTIAL):
norm = parts[i].normalized norm = parts[i].normalized
for j in range(i + 1, tlist.end): for j in range(i + 1, tlist.end):
if node.btype != qmod.BreakType.TOKEN: if node.btype != qmod.BREAK_TOKEN:
norm += ' ' + parts[j].normalized norm += ' ' + parts[j].normalized
for token in tlist.tokens: for token in tlist.tokens:
cast(ICUToken, token).rematch(norm) cast(ICUToken, token).rematch(norm)
def _dump_transliterated(query: qmod.QueryStruct, parts: QueryParts) -> str: def _dump_transliterated(query: qmod.QueryStruct, parts: QueryParts) -> str:
out = query.nodes[0].btype.value out = query.nodes[0].btype
for node, part in zip(query.nodes[1:], parts): for node, part in zip(query.nodes[1:], parts):
out += part.token + node.btype.value out += part.token + node.btype
return out return out

View File

@@ -13,29 +13,29 @@ import dataclasses
import enum import enum
class BreakType(enum.Enum): BreakType = str
""" Type of break between tokens. """ Type of break between tokens.
""" """
START = '<' BREAK_START = '<'
""" Begin of the query. """ """ Begin of the query. """
END = '>' BREAK_END = '>'
""" End of the query. """ """ End of the query. """
PHRASE = ',' BREAK_PHRASE = ','
""" Hard break between two phrases. Address parts cannot cross hard """ Hard break between two phrases. Address parts cannot cross hard
phrase boundaries.""" phrase boundaries."""
SOFT_PHRASE = ':' BREAK_SOFT_PHRASE = ':'
""" Likely break between two phrases. Address parts should not cross soft """ Likely break between two phrases. Address parts should not cross soft
phrase boundaries. Soft breaks can be inserted by a preprocessor phrase boundaries. Soft breaks can be inserted by a preprocessor
that is analysing the input string. that is analysing the input string.
""" """
WORD = ' ' BREAK_WORD = ' '
""" Break between words. """ """ Break between words. """
PART = '-' BREAK_PART = '-'
""" Break inside a word, for example a hyphen or apostrophe. """ """ Break inside a word, for example a hyphen or apostrophe. """
TOKEN = '`' BREAK_TOKEN = '`'
""" Break created as a result of tokenization. """ Break created as a result of tokenization.
This may happen in languages without spaces between words. This may happen in languages without spaces between words.
""" """
class TokenType(enum.Enum): class TokenType(enum.Enum):
@@ -218,7 +218,7 @@ class QueryStruct:
def __init__(self, source: List[Phrase]) -> None: def __init__(self, source: List[Phrase]) -> None:
self.source = source self.source = source
self.nodes: List[QueryNode] = \ self.nodes: List[QueryNode] = \
[QueryNode(BreakType.START, source[0].ptype if source else PhraseType.NONE)] [QueryNode(BREAK_START, source[0].ptype if source else PhraseType.NONE)]
def num_token_slots(self) -> int: def num_token_slots(self) -> int:
""" Return the length of the query in vertice steps. """ Return the length of the query in vertice steps.
@@ -243,8 +243,8 @@ class QueryStruct:
be added to, then the token is silently dropped. be added to, then the token is silently dropped.
""" """
snode = self.nodes[trange.start] snode = self.nodes[trange.start]
full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\ full_phrase = snode.btype in (BREAK_START, BREAK_PHRASE)\
and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END) and self.nodes[trange.end].btype in (BREAK_PHRASE, BREAK_END)
if snode.ptype.compatible_with(ttype, full_phrase): if snode.ptype.compatible_with(ttype, full_phrase):
tlist = snode.get_tokens(trange.end, ttype) tlist = snode.get_tokens(trange.end, ttype)
if tlist is None: if tlist is None:

View File

@@ -24,13 +24,13 @@ class TypedRange:
PENALTY_TOKENCHANGE = { PENALTY_TOKENCHANGE = {
qmod.BreakType.START: 0.0, qmod.BREAK_START: 0.0,
qmod.BreakType.END: 0.0, qmod.BREAK_END: 0.0,
qmod.BreakType.PHRASE: 0.0, qmod.BREAK_PHRASE: 0.0,
qmod.BreakType.SOFT_PHRASE: 0.0, qmod.BREAK_SOFT_PHRASE: 0.0,
qmod.BreakType.WORD: 0.1, qmod.BREAK_WORD: 0.1,
qmod.BreakType.PART: 0.2, qmod.BREAK_PART: 0.2,
qmod.BreakType.TOKEN: 0.4 qmod.BREAK_TOKEN: 0.4
} }
TypedRangeSeq = List[TypedRange] TypedRangeSeq = List[TypedRange]
@@ -205,7 +205,7 @@ class _TokenSequence:
new_penalty = 0.0 new_penalty = 0.0
else: else:
last = self.seq[-1] last = self.seq[-1]
if btype != qmod.BreakType.PHRASE and last.ttype == ttype: if btype != qmod.BREAK_PHRASE and last.ttype == ttype:
# extend the existing range # extend the existing range
newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))] newseq = self.seq[:-1] + [TypedRange(ttype, last.trange.replace_end(end_pos))]
new_penalty = 0.0 new_penalty = 0.0

View File

@@ -38,14 +38,14 @@ def test_phrase_incompatible(ptype):
def test_query_node_empty(): def test_query_node_empty():
qn = query.QueryNode(query.BreakType.PHRASE, query.PhraseType.NONE) qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE)
assert not qn.has_tokens(3, query.TokenType.PARTIAL) assert not qn.has_tokens(3, query.TokenType.PARTIAL)
assert qn.get_tokens(3, query.TokenType.WORD) is None assert qn.get_tokens(3, query.TokenType.WORD) is None
def test_query_node_with_content(): def test_query_node_with_content():
qn = query.QueryNode(query.BreakType.PHRASE, query.PhraseType.NONE) qn = query.QueryNode(query.BREAK_PHRASE, query.PhraseType.NONE)
qn.starting.append(query.TokenList(2, query.TokenType.PARTIAL, [mktoken(100), mktoken(101)])) qn.starting.append(query.TokenList(2, query.TokenType.PARTIAL, [mktoken(100), mktoken(101)]))
qn.starting.append(query.TokenList(2, query.TokenType.WORD, [mktoken(1000)])) qn.starting.append(query.TokenList(2, query.TokenType.WORD, [mktoken(1000)]))
@@ -68,8 +68,8 @@ def test_query_struct_empty():
def test_query_struct_with_tokens(): def test_query_struct_with_tokens():
q = query.QueryStruct([query.Phrase(query.PhraseType.NONE, 'foo bar')]) q = query.QueryStruct([query.Phrase(query.PhraseType.NONE, 'foo bar')])
q.add_node(query.BreakType.WORD, query.PhraseType.NONE) q.add_node(query.BREAK_WORD, query.PhraseType.NONE)
q.add_node(query.BreakType.END, query.PhraseType.NONE) q.add_node(query.BREAK_END, query.PhraseType.NONE)
assert q.num_token_slots() == 2 assert q.num_token_slots() == 2
@@ -92,8 +92,8 @@ def test_query_struct_with_tokens():
def test_query_struct_incompatible_token(): def test_query_struct_incompatible_token():
q = query.QueryStruct([query.Phrase(query.PhraseType.COUNTRY, 'foo bar')]) q = query.QueryStruct([query.Phrase(query.PhraseType.COUNTRY, 'foo bar')])
q.add_node(query.BreakType.WORD, query.PhraseType.COUNTRY) q.add_node(query.BREAK_WORD, query.PhraseType.COUNTRY)
q.add_node(query.BreakType.END, query.PhraseType.NONE) q.add_node(query.BREAK_END, query.PhraseType.NONE)
q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
q.add_token(query.TokenRange(1, 2), query.TokenType.COUNTRY, mktoken(100)) q.add_token(query.TokenRange(1, 2), query.TokenType.COUNTRY, mktoken(100))
@@ -104,7 +104,7 @@ def test_query_struct_incompatible_token():
def test_query_struct_amenity_single_word(): def test_query_struct_amenity_single_word():
q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')]) q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')])
q.add_node(query.BreakType.END, query.PhraseType.NONE) q.add_node(query.BREAK_END, query.PhraseType.NONE)
q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2)) q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2))
@@ -117,8 +117,8 @@ def test_query_struct_amenity_single_word():
def test_query_struct_amenity_two_words(): def test_query_struct_amenity_two_words():
q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')]) q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')])
q.add_node(query.BreakType.WORD, query.PhraseType.AMENITY) q.add_node(query.BREAK_WORD, query.PhraseType.AMENITY)
q.add_node(query.BreakType.END, query.PhraseType.NONE) q.add_node(query.BREAK_END, query.PhraseType.NONE)
for trange in [(0, 1), (1, 2)]: for trange in [(0, 1), (1, 2)]:
q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1)) q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1))

View File

@@ -9,7 +9,8 @@ Tests for creating abstract searches from token assignments.
""" """
import pytest import pytest
from nominatim_api.search.query import Token, TokenRange, BreakType, PhraseType, TokenType, QueryStruct, Phrase from nominatim_api.search.query import Token, TokenRange, PhraseType, TokenType, QueryStruct, Phrase
import nominatim_api.search.query as qmod
from nominatim_api.search.db_search_builder import SearchBuilder from nominatim_api.search.db_search_builder import SearchBuilder
from nominatim_api.search.token_assignment import TokenAssignment from nominatim_api.search.token_assignment import TokenAssignment
from nominatim_api.types import SearchDetails from nominatim_api.types import SearchDetails
@@ -24,8 +25,8 @@ def make_query(*args):
q = QueryStruct([Phrase(PhraseType.NONE, '')]) q = QueryStruct([Phrase(PhraseType.NONE, '')])
for _ in range(max(inner[0] for tlist in args for inner in tlist)): for _ in range(max(inner[0] for tlist in args for inner in tlist)):
q.add_node(BreakType.WORD, PhraseType.NONE) q.add_node(qmod.BREAK_WORD, PhraseType.NONE)
q.add_node(BreakType.END, PhraseType.NONE) q.add_node(qmod.BREAK_END, PhraseType.NONE)
for start, tlist in enumerate(args): for start, tlist in enumerate(args):
for end, ttype, tinfo in tlist: for end, ttype, tinfo in tlist:
@@ -393,8 +394,8 @@ def make_counted_searches(name_part, name_full, address_part, address_full,
num_address_parts=1): num_address_parts=1):
q = QueryStruct([Phrase(PhraseType.NONE, '')]) q = QueryStruct([Phrase(PhraseType.NONE, '')])
for i in range(1 + num_address_parts): for i in range(1 + num_address_parts):
q.add_node(BreakType.WORD, PhraseType.NONE) q.add_node(qmod.BREAK_WORD, PhraseType.NONE)
q.add_node(BreakType.END, PhraseType.NONE) q.add_node(qmod.BREAK_END, PhraseType.NONE)
q.add_token(TokenRange(0, 1), TokenType.PARTIAL, q.add_token(TokenRange(0, 1), TokenType.PARTIAL,
MyToken(0.5, 1, name_part, 1, 'name_part')) MyToken(0.5, 1, name_part, 1, 'name_part'))

View File

@@ -11,7 +11,8 @@ import pytest
import pytest_asyncio import pytest_asyncio
from nominatim_api import NominatimAPIAsync from nominatim_api import NominatimAPIAsync
from nominatim_api.search.query import Phrase, PhraseType, TokenType, BreakType from nominatim_api.search.query import Phrase, PhraseType, TokenType
import nominatim_api.search.query as qmod
import nominatim_api.search.icu_tokenizer as tok import nominatim_api.search.icu_tokenizer as tok
from nominatim_api.logging import set_log_output, get_and_disable from nominatim_api.logging import set_log_output, get_and_disable
@@ -96,7 +97,7 @@ async def test_splitting_in_transliteration(conn):
assert query.num_token_slots() == 2 assert query.num_token_slots() == 2
assert query.nodes[0].starting assert query.nodes[0].starting
assert query.nodes[1].starting assert query.nodes[1].starting
assert query.nodes[1].btype == BreakType.TOKEN assert query.nodes[1].btype == qmod.BREAK_TOKEN
@pytest.mark.asyncio @pytest.mark.asyncio

View File

@@ -9,7 +9,8 @@ Test for creation of token assignments from tokenized queries.
""" """
import pytest import pytest
from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, BreakType, TokenType, TokenRange, Token from nominatim_api.search.query import QueryStruct, Phrase, PhraseType, TokenType, TokenRange, Token
import nominatim_api.search.query as qmod
from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE from nominatim_api.search.token_assignment import yield_token_assignments, TokenAssignment, PENALTY_TOKENCHANGE
class MyToken(Token): class MyToken(Token):
@@ -24,7 +25,7 @@ def make_query(*args):
for btype, ptype, _ in args[1:]: for btype, ptype, _ in args[1:]:
q.add_node(btype, ptype) q.add_node(btype, ptype)
q.add_node(BreakType.END, PhraseType.NONE) q.add_node(qmod.BREAK_END, PhraseType.NONE)
for start, t in enumerate(args): for start, t in enumerate(args):
for end, ttype in t[2]: for end, ttype in t[2]:
@@ -44,13 +45,13 @@ def check_assignments(actual, *expected):
def test_query_with_missing_tokens(): def test_query_with_missing_tokens():
q = QueryStruct([Phrase(PhraseType.NONE, '')]) q = QueryStruct([Phrase(PhraseType.NONE, '')])
q.add_node(BreakType.END, PhraseType.NONE) q.add_node(qmod.BREAK_END, PhraseType.NONE)
assert list(yield_token_assignments(q)) == [] assert list(yield_token_assignments(q)) == []
def test_one_word_query(): def test_one_word_query():
q = make_query((BreakType.START, PhraseType.NONE, q = make_query((qmod.BREAK_START, PhraseType.NONE,
[(1, TokenType.PARTIAL), [(1, TokenType.PARTIAL),
(1, TokenType.WORD), (1, TokenType.WORD),
(1, TokenType.HOUSENUMBER)])) (1, TokenType.HOUSENUMBER)]))
@@ -60,7 +61,7 @@ def test_one_word_query():
def test_single_postcode(): def test_single_postcode():
q = make_query((BreakType.START, PhraseType.NONE, q = make_query((qmod.BREAK_START, PhraseType.NONE,
[(1, TokenType.POSTCODE)])) [(1, TokenType.POSTCODE)]))
res = list(yield_token_assignments(q)) res = list(yield_token_assignments(q))
@@ -68,7 +69,7 @@ def test_single_postcode():
def test_single_country_name(): def test_single_country_name():
q = make_query((BreakType.START, PhraseType.NONE, q = make_query((qmod.BREAK_START, PhraseType.NONE,
[(1, TokenType.COUNTRY)])) [(1, TokenType.COUNTRY)]))
res = list(yield_token_assignments(q)) res = list(yield_token_assignments(q))
@@ -76,7 +77,7 @@ def test_single_country_name():
def test_single_word_poi_search(): def test_single_word_poi_search():
q = make_query((BreakType.START, PhraseType.NONE, q = make_query((qmod.BREAK_START, PhraseType.NONE,
[(1, TokenType.NEAR_ITEM), [(1, TokenType.NEAR_ITEM),
(1, TokenType.QUALIFIER)])) (1, TokenType.QUALIFIER)]))
@@ -84,9 +85,9 @@ def test_single_word_poi_search():
assert res == [TokenAssignment(near_item=TokenRange(0, 1))] assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
@pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN]) @pytest.mark.parametrize('btype', [qmod.BREAK_WORD, qmod.BREAK_PART, qmod.BREAK_TOKEN])
def test_multiple_simple_words(btype): def test_multiple_simple_words(btype):
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (btype, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(btype, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (btype, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
@@ -106,8 +107,8 @@ def test_multiple_simple_words(btype):
def test_multiple_words_respect_phrase_break(): def test_multiple_words_respect_phrase_break():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1), TokenAssignment(name=TokenRange(0, 1),
@@ -117,8 +118,8 @@ def test_multiple_words_respect_phrase_break():
def test_housenumber_and_street(): def test_housenumber_and_street():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(1, 2), TokenAssignment(name=TokenRange(1, 2),
@@ -128,8 +129,8 @@ def test_housenumber_and_street():
def test_housenumber_and_street_backwards(): def test_housenumber_and_street_backwards():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1), TokenAssignment(name=TokenRange(0, 1),
@@ -139,10 +140,10 @@ def test_housenumber_and_street_backwards():
def test_housenumber_and_postcode(): def test_housenumber_and_postcode():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)])) (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=pytest.approx(0.3), TokenAssignment(penalty=pytest.approx(0.3),
@@ -156,10 +157,10 @@ def test_housenumber_and_postcode():
postcode=TokenRange(3, 4))) postcode=TokenRange(3, 4)))
def test_postcode_and_housenumber(): def test_postcode_and_housenumber():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.POSTCODE)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)])) (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=pytest.approx(0.3), TokenAssignment(penalty=pytest.approx(0.3),
@@ -174,10 +175,10 @@ def test_postcode_and_housenumber():
def test_country_housenumber_postcode(): def test_country_housenumber_postcode():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)])) (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.POSTCODE)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
@@ -185,27 +186,27 @@ def test_country_housenumber_postcode():
@pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY, @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
TokenType.NEAR_ITEM, TokenType.QUALIFIER]) TokenType.NEAR_ITEM, TokenType.QUALIFIER])
def test_housenumber_with_only_special_terms(ttype): def test_housenumber_with_only_special_terms(ttype):
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(2, ttype)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, ttype)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
@pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY]) @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.HOUSENUMBER, TokenType.COUNTRY])
def test_multiple_special_tokens(ttype): def test_multiple_special_tokens(ttype):
q = make_query((BreakType.START, PhraseType.NONE, [(1, ttype)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, ttype)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(3, ttype)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, ttype)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_housenumber_many_phrases(): def test_housenumber_many_phrases():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(3, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(4, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, TokenAssignment(penalty=0.1,
@@ -220,8 +221,8 @@ def test_housenumber_many_phrases():
def test_country_at_beginning(): def test_country_at_beginning():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.COUNTRY)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.COUNTRY)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2), TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
@@ -229,8 +230,8 @@ def test_country_at_beginning():
def test_country_at_end(): def test_country_at_end():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(0, 1), TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
@@ -238,16 +239,16 @@ def test_country_at_end():
def test_country_in_middle(): def test_country_in_middle():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.COUNTRY)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_postcode_with_designation(): def test_postcode_with_designation():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.POSTCODE)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.POSTCODE)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2), TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
@@ -257,8 +258,8 @@ def test_postcode_with_designation():
def test_postcode_with_designation_backwards(): def test_postcode_with_designation_backwards():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.POSTCODE)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(name=TokenRange(0, 1), TokenAssignment(name=TokenRange(0, 1),
@@ -268,8 +269,8 @@ def test_postcode_with_designation_backwards():
def test_near_item_at_beginning(): def test_near_item_at_beginning():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2), TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
@@ -277,8 +278,8 @@ def test_near_item_at_beginning():
def test_near_item_at_end(): def test_near_item_at_end():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)])) (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(0, 1), TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
@@ -286,17 +287,17 @@ def test_near_item_at_end():
def test_near_item_in_middle(): def test_near_item_in_middle():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_qualifier_at_beginning(): def test_qualifier_at_beginning():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
@@ -308,11 +309,11 @@ def test_qualifier_at_beginning():
def test_qualifier_after_name(): def test_qualifier_after_name():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q), check_assignments(yield_token_assignments(q),
@@ -325,27 +326,27 @@ def test_qualifier_after_name():
def test_qualifier_before_housenumber(): def test_qualifier_before_housenumber():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_qualifier_after_housenumber(): def test_qualifier_after_housenumber():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(2, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)])) (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))
def test_qualifier_in_middle_of_phrase(): def test_qualifier_in_middle_of_phrase():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]), q = make_query((qmod.BREAK_START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]), (qmod.BREAK_PHRASE, PhraseType.NONE, [(2, TokenType.PARTIAL)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]), (qmod.BREAK_WORD, PhraseType.NONE, [(3, TokenType.QUALIFIER)]),
(BreakType.WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]), (qmod.BREAK_WORD, PhraseType.NONE, [(4, TokenType.PARTIAL)]),
(BreakType.PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)])) (qmod.BREAK_PHRASE, PhraseType.NONE, [(5, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q)) check_assignments(yield_token_assignments(q))