Merge pull request #3290 from lonvia/near-vs-quaifier-words

Do not run near queries on qualifier words
This commit is contained in:
Sarah Hoffmann
2024-01-07 15:23:00 +01:00
committed by GitHub
2 changed files with 2 additions and 7 deletions

View File

@@ -8,7 +8,6 @@
Implementation of query analysis for the ICU tokenizer. Implementation of query analysis for the ICU tokenizer.
""" """
from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
from copy import copy
from collections import defaultdict from collections import defaultdict
import dataclasses import dataclasses
import difflib import difflib
@@ -188,10 +187,6 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token) query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else: else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token) query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else: else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token) query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)

View File

@@ -148,9 +148,9 @@ async def test_qualifier_words(conn):
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo')) query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5 assert query.num_token_slots() == 5
assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.QUALIFIER}
@pytest.mark.asyncio @pytest.mark.asyncio