Compare commits

...

23 Commits

Author SHA1 Message Date
Sarah Hoffmann
96d04e3a2e Merge pull request #3894 from lonvia/country-names-with-word-lookup
Add normalized form of country names to coutry tokens in word table
2025-12-01 14:54:24 +01:00
Sarah Hoffmann
23db1ab981 avoid most recent psycopg 3.3 release 2025-12-01 14:23:36 +01:00
Sarah Hoffmann
cd1b1736a9 add migration for changed country token format 2025-12-01 13:10:18 +01:00
Sarah Hoffmann
9447c90b09 adapt tests to new country token format 2025-12-01 13:10:18 +01:00
Sarah Hoffmann
81c6cb72e6 add normalised country name to word table
Country tokens now follow the usual convetion of having the
normalized version in the word column and the extra info about the
country code in the info column.
2025-12-01 13:10:18 +01:00
Sarah Hoffmann
f2a122c5c0 Merge pull request #3893 from lonvia/nature-reserve
Prefer leisure=nature_reserve as main tag over boundary=protected_area
2025-12-01 11:36:17 +01:00
Sarah Hoffmann
57ef0e1f98 prefer leisure=nature_reserve as main tag 2025-12-01 09:47:55 +01:00
Sarah Hoffmann
922667b650 Merge pull request #3892 from daishu0000/master
Add success message to setup.log: related to #3891
2025-11-30 14:13:51 +01:00
Sarah Hoffmann
fba803167c fix imprecise import 2025-11-30 11:50:55 +01:00
daishu0000
782df52ea0 Add success message to db log 2025-11-30 01:53:40 +08:00
Sarah Hoffmann
c36da68a48 Merge pull request #3890 from mtmail/remove-nat-name
Skip nat_name in default import
2025-11-28 14:13:30 +01:00
marc tobias
716de13bc9 Skip nat_name in default import 2025-11-28 11:35:35 +01:00
Sarah Hoffmann
1df56d7548 Merge pull request #3889 from lonvia/improve-linkage-code
Small improvements to place linking code
2025-11-26 22:11:11 +01:00
Sarah Hoffmann
9cfef7a31a prefer wikidata over name match when linking 2025-11-26 17:44:47 +01:00
Sarah Hoffmann
139678f367 fix linkage removal when nothing has changed 2025-11-26 17:03:19 +01:00
Sarah Hoffmann
e578c60ff4 Merge pull request #3874 from vytas7/falcon-4.2-typing
Adapt type annotations to Falcon App type changes
2025-11-16 16:12:35 +01:00
Vytautas Liuolia
7b4a3c8500 Add from __future__ import annotations to delay evaluation 2025-11-16 14:41:25 +01:00
Vytautas Liuolia
7751f9a6b6 Adapt type annotations to Falcon App type changes
See also: https://falcon.readthedocs.io/en/latest/api/typing.html#generic-app-types
2025-11-10 20:09:17 +01:00
Sarah Hoffmann
303ac42b47 Merge pull request #3862 from mtmail/skip-all-zero-postcodes
Postcode sanetizer now skips values which are only zeros
2025-10-31 10:36:05 +01:00
Sarah Hoffmann
6a2d2daad5 Merge pull request #3863 from lonvia/improve-bdd-test-names
Add custom pytest collector for BDD feature files
2025-10-31 10:19:56 +01:00
Sarah Hoffmann
a51c771107 disable improved BDD test naming for pytest < 8
Needs the improved test collector introduced in pytest 8.0.
2025-10-30 20:50:00 +01:00
Sarah Hoffmann
55547723bf add custom pytest collector for BDD feature files 2025-10-30 17:56:23 +01:00
marc tobias
362088775f postcode sanetizer skips postcodes which are only zeros 2025-10-30 13:45:29 +01:00
26 changed files with 234 additions and 69 deletions

View File

@@ -52,6 +52,15 @@ To run the functional tests, do
pytest test/bdd
You can run a single feature file using expression matching:
pytest test/bdd -k osm2pgsql/import/entrances.feature
This even works for running single tests by adding the line number of the
scenario header like that:
pytest test/bdd -k 'osm2pgsql/import/entrances.feature and L4'
The BDD tests create databases for the tests. You can set name of the databases
through configuration variables in your `pytest.ini`:

View File

@@ -117,6 +117,7 @@ module.MAIN_TAGS.all_boundaries = {
boundary = {'named',
place = 'delete',
land_area = 'delete',
protected_area = 'fallback',
postal_code = 'always'},
landuse = 'fallback',
place = 'always'
@@ -198,7 +199,7 @@ module.MAIN_TAGS_POIS = function (group)
no = group},
landuse = {cemetery = 'always'},
leisure = {'always',
nature_reserve = 'fallback',
nature_reserve = 'named',
swimming_pool = 'named',
garden = 'named',
common = 'named',
@@ -321,7 +322,6 @@ module.NAME_TAGS = {}
module.NAME_TAGS.core = {main = {'name', 'name:*',
'int_name', 'int_name:*',
'nat_name', 'nat_name:*',
'reg_name', 'reg_name:*',
'loc_name', 'loc_name:*',
'old_name', 'old_name:*',

View File

@@ -341,6 +341,22 @@ BEGIN
END IF;
END IF;
IF bnd.extratags ? 'wikidata' THEN
FOR linked_placex IN
SELECT * FROM placex
WHERE placex.class = 'place' AND placex.osm_type = 'N'
AND placex.extratags ? 'wikidata' -- needed to select right index
AND placex.extratags->'wikidata' = bnd.extratags->'wikidata'
AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id)
AND placex.rank_search < 26
AND _st_covers(bnd.geometry, placex.geometry)
ORDER BY lower(name->'name') = bnd_name desc
LOOP
{% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
RETURN linked_placex;
END LOOP;
END IF;
-- If extratags has a place tag, look for linked nodes by their place type.
-- Area and node still have to have the same name.
IF bnd.extratags ? 'place' and bnd.extratags->'place' != 'postcode'
@@ -361,22 +377,6 @@ BEGIN
END LOOP;
END IF;
IF bnd.extratags ? 'wikidata' THEN
FOR linked_placex IN
SELECT * FROM placex
WHERE placex.class = 'place' AND placex.osm_type = 'N'
AND placex.extratags ? 'wikidata' -- needed to select right index
AND placex.extratags->'wikidata' = bnd.extratags->'wikidata'
AND (placex.linked_place_id is null or placex.linked_place_id = bnd.place_id)
AND placex.rank_search < 26
AND _st_covers(bnd.geometry, placex.geometry)
ORDER BY lower(name->'name') = bnd_name desc
LOOP
{% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
RETURN linked_placex;
END LOOP;
END IF;
-- Name searches can be done for ways as well as relations
IF bnd_name is not null THEN
{% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %}
@@ -874,7 +874,7 @@ BEGIN
-- Remove linkage, if we have computed a different new linkee.
UPDATE placex SET linked_place_id = null, indexed_status = 2
WHERE linked_place_id = NEW.place_id
and (linked_place is null or linked_place_id != linked_place);
and (linked_place is null or place_id != linked_place);
-- update not necessary for osmline, cause linked_place_id does not exist
-- Postcodes are just here to compute the centroids. They are not searchable

View File

@@ -15,7 +15,7 @@ classifiers = [
"Operating System :: OS Independent",
]
dependencies = [
"psycopg",
"psycopg<3.3",
"python-dotenv",
"jinja2",
"pyYAML>=5.1",

View File

@@ -10,7 +10,7 @@ Helper classes and functions for formatting results into API responses.
from typing import Type, TypeVar, Dict, List, Callable, Any, Mapping, Optional, cast
from collections import defaultdict
from pathlib import Path
import importlib
import importlib.util
from .server.content_types import CONTENT_JSON

View File

@@ -374,7 +374,7 @@ class SearchBuilder:
tokens = self.get_country_tokens(assignment.country)
if not tokens:
return None
sdata.set_strings('countries', tokens)
sdata.set_countries(tokens)
sdata.penalty += self.query.get_in_word_penalty(assignment.country)
elif self.details.countries:
sdata.countries = dbf.WeightedStrings(self.details.countries,

View File

@@ -244,6 +244,21 @@ class SearchData:
setattr(self, field, wstrs)
def set_countries(self, tokens: List[Token]) -> None:
""" Set the WeightedStrings properties for countries. Multiple
entries for the same country are deduplicated and the minimum
penalty is used. Adapts the global penalty, so that the
minimum penalty is 0.
"""
if tokens:
min_penalty = min(t.penalty for t in tokens)
self.penalty += min_penalty
countries: dict[str, float] = {}
for t in tokens:
cc = t.get_country()
countries[cc] = min(t.penalty - min_penalty, countries.get(cc, 10000))
self.countries = WeightedStrings(list(countries.keys()), list(countries.values()))
def set_qualifiers(self, tokens: List[Token]) -> None:
""" Set the qulaifier field from the given tokens.
"""

View File

@@ -59,12 +59,16 @@ class ICUToken(qmod.Token):
assert self.info
return self.info.get('class', ''), self.info.get('type', '')
def rematch(self, norm: str) -> None:
def get_country(self) -> str:
assert self.info
return cast(str, self.info.get('cc', ''))
def match_penalty(self, norm: str) -> float:
""" Check how well the token matches the given normalized string
and add a penalty, if necessary.
"""
if not self.lookup_word:
return
return 0.0
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
distance = 0
@@ -75,7 +79,7 @@ class ICUToken(qmod.Token):
distance += max((ato-afrom), (bto-bfrom))
elif tag != 'equal':
distance += abs((ato-afrom) - (bto-bfrom))
self.penalty += (distance/len(self.lookup_word))
return (distance/len(self.lookup_word))
@staticmethod
def from_db_row(row: SaRow) -> 'ICUToken':
@@ -330,9 +334,10 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
norm = ''.join(f"{n.term_normalized}{'' if n.btype == qmod.BREAK_TOKEN else ' '}"
for n in query.nodes[start + 1:end + 1]).strip()
for ttype, tokens in tlist.items():
if ttype != qmod.TOKEN_COUNTRY:
for token in tokens:
cast(ICUToken, token).rematch(norm)
for token in tokens:
itok = cast(ICUToken, token)
itok.penalty += itok.match_penalty(norm) * \
(1 if ttype in (qmod.TOKEN_WORD, qmod.TOKEN_PARTIAL) else 2)
def compute_break_penalties(self, query: qmod.QueryStruct) -> None:
""" Set the break penalties for the nodes in the query.

View File

@@ -127,6 +127,12 @@ class Token(ABC):
category objects.
"""
@abstractmethod
def get_country(self) -> str:
""" Return the country code this tojen is associated with
(currently for country tokens only).
"""
@dataclasses.dataclass
class TokenRange:

View File

@@ -7,6 +7,8 @@
"""
Server implementation using the falcon webserver framework.
"""
from __future__ import annotations
from typing import Optional, Mapping, Any, List, cast
from pathlib import Path
import asyncio
@@ -161,7 +163,7 @@ class APIMiddleware:
def __init__(self, project_dir: Path, environ: Optional[Mapping[str, str]]) -> None:
self.api = NominatimAPIAsync(project_dir, environ)
self.app: Optional[App] = None
self.app: Optional[App[Request, Response]] = None
@property
def config(self) -> Configuration:
@@ -169,7 +171,7 @@ class APIMiddleware:
"""
return self.api.config
def set_app(self, app: App) -> None:
def set_app(self, app: App[Request, Response]) -> None:
""" Set the Falcon application this middleware is connected to.
"""
self.app = app
@@ -193,7 +195,7 @@ class APIMiddleware:
def get_application(project_dir: Path,
environ: Optional[Mapping[str, str]] = None) -> App:
environ: Optional[Mapping[str, str]] = None) -> App[Request, Response]:
""" Create a Nominatim Falcon ASGI application.
"""
apimw = APIMiddleware(project_dir, environ)
@@ -215,7 +217,7 @@ def get_application(project_dir: Path,
return app
def run_wsgi() -> App:
def run_wsgi() -> App[Request, Response]:
""" Entry point for uvicorn.
Make sure uvicorn is run from the project directory.

View File

@@ -23,6 +23,7 @@ from ..tokenizer.base import AbstractTokenizer
from ..version import NOMINATIM_VERSION
from .args import NominatimArgs
import time
LOG = logging.getLogger()
@@ -86,6 +87,8 @@ class SetupAll:
from ..tools import database_import, postcodes, freeze
from ..indexer.indexer import Indexer
start_time = time.time()
num_threads = args.threads or psutil.cpu_count() or 1
country_info.setup_country_config(args.config)
@@ -138,6 +141,10 @@ class SetupAll:
LOG.warning('Recompute word counts')
tokenizer.update_statistics(args.config, threads=num_threads)
end_time = time.time()
elapsed = end_time - start_time
LOG.warning(f'Import completed successfully in {elapsed:.2f} seconds.')
self._finalize_database(args.config.get_libpq_dsn(), args.offline)
return 0

View File

@@ -29,6 +29,9 @@ class CountryPostcodeMatcher:
self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
self.pattern = re.compile(pc_pattern)
# We want to exclude 0000, 00-000, 000 00 etc
self.zero_pattern = re.compile(r'^[0\- ]+$')
self.output = config.get('output', r'\g<0>')
def match(self, postcode: str) -> Optional[Match[str]]:
@@ -40,7 +43,10 @@ class CountryPostcodeMatcher:
normalized = self.norm_pattern.fullmatch(postcode.upper())
if normalized:
return self.pattern.fullmatch(normalized.group(1))
match = self.pattern.fullmatch(normalized.group(1))
if match and self.zero_pattern.match(match.string):
return None
return match
return None

View File

@@ -475,20 +475,23 @@ class ICUNameAnalyzer(AbstractAnalyzer):
assert self.conn is not None
word_tokens = set()
for name in names:
norm_name = self._search_normalized(name.name)
if norm_name:
word_tokens.add(norm_name)
norm_name = self._normalized(name.name)
token_name = self._search_normalized(name.name)
if norm_name and token_name:
word_tokens.add((token_name, norm_name))
with self.conn.cursor() as cur:
# Get existing names
cur.execute("""SELECT word_token, coalesce(info ? 'internal', false) as is_internal
cur.execute("""SELECT word_token,
word as lookup,
coalesce(info ? 'internal', false) as is_internal
FROM word
WHERE type = 'C' and word = %s""",
WHERE type = 'C' and info->>'cc' = %s""",
(country_code, ))
# internal/external names
existing_tokens: Dict[bool, Set[str]] = {True: set(), False: set()}
existing_tokens: Dict[bool, Set[Tuple[str, str]]] = {True: set(), False: set()}
for word in cur:
existing_tokens[word[1]].add(word[0])
existing_tokens[word[2]].add((word[0], word[1]))
# Delete names that no longer exist.
gone_tokens = existing_tokens[internal] - word_tokens
@@ -496,10 +499,10 @@ class ICUNameAnalyzer(AbstractAnalyzer):
gone_tokens.update(existing_tokens[False] & word_tokens)
if gone_tokens:
cur.execute("""DELETE FROM word
USING unnest(%s::text[]) as token
WHERE type = 'C' and word = %s
and word_token = token""",
(list(gone_tokens), country_code))
USING jsonb_array_elements(%s) as data
WHERE type = 'C' and info->>'cc' = %s
and word_token = data->>0 and word = data->>1""",
(Jsonb(list(gone_tokens)), country_code))
# Only add those names that are not yet in the list.
new_tokens = word_tokens - existing_tokens[True]
@@ -508,15 +511,17 @@ class ICUNameAnalyzer(AbstractAnalyzer):
if new_tokens:
if internal:
sql = """INSERT INTO word (word_token, type, word, info)
(SELECT token, 'C', %s, '{"internal": "yes"}'
FROM unnest(%s::text[]) as token)
(SELECT data->>0, 'C', data->>1,
jsonb_build_object('internal', 'yes', 'cc', %s::text)
FROM jsonb_array_elements(%s) as data)
"""
else:
sql = """INSERT INTO word (word_token, type, word)
(SELECT token, 'C', %s
FROM unnest(%s::text[]) as token)
sql = """INSERT INTO word (word_token, type, word, info)
(SELECT data->>0, 'C', data->>1,
jsonb_build_object('cc', %s::text)
FROM jsonb_array_elements(%s) as data)
"""
cur.execute(sql, (country_code, list(new_tokens)))
cur.execute(sql, (country_code, Jsonb(list(new_tokens))))
def process_place(self, place: PlaceInfo) -> Mapping[str, Any]:
""" Determine tokenizer information about the given place.

View File

@@ -2,7 +2,7 @@
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2024 by the Nominatim developer community.
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Functions for database migration to newer software versions.
@@ -18,6 +18,7 @@ from ..db.connection import connect, Connection, \
from ..db.sql_preprocessor import SQLPreprocessor
from ..version import NominatimVersion, NOMINATIM_VERSION, parse_version
from ..tokenizer import factory as tokenizer_factory
from ..data.country_info import create_country_names, setup_country_config
from . import refresh
LOG = logging.getLogger()
@@ -156,3 +157,25 @@ def create_place_entrance_table(conn: Connection, config: Configuration, **_: An
CREATE UNIQUE INDEX place_entrance_osm_id_idx ON place_entrance
USING BTREE (osm_id);
""")
@_migration(5, 2, 99, 1)
def convert_country_tokens(conn: Connection, config: Configuration, **_: Any) -> None:
""" Convert country word tokens
Country tokens now save the country in the info field instead of the
word. This migration removes all country tokens from the word table
and reimports the default country name. This means that custom names
are lost. If you need them back, invalidate the OSM objects containing
the names by setting indexed_status to 2 and then reindex the database.
"""
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
# There is only one tokenizer at the time of migration, so we make
# some assumptions here about the structure of the database. This will
# fail if somebody has written a custom tokenizer.
with conn.cursor() as cur:
cur.execute("DELETE FROM word WHERE type = 'C'")
conn.commit()
setup_country_config(config)
create_country_names(conn, tokenizer, config.get_str_list('LANGUAGES'))

View File

@@ -55,7 +55,7 @@ def parse_version(version: str) -> NominatimVersion:
return NominatimVersion(*[int(x) for x in parts[:2] + parts[2].split('-')])
NOMINATIM_VERSION = parse_version('5.2.0-0')
NOMINATIM_VERSION = parse_version('5.2.99-0')
POSTGRESQL_REQUIRED_VERSION = (12, 0)
POSTGIS_REQUIRED_VERSION = (3, 0)

View File

@@ -9,6 +9,7 @@ Fixtures for BDD test steps
"""
import sys
import json
import re
from pathlib import Path
import psycopg
@@ -20,7 +21,8 @@ sys.path.insert(0, str(SRC_DIR / 'src'))
import pytest
from pytest_bdd.parsers import re as step_parse
from pytest_bdd import given, when, then
from pytest_bdd import given, when, then, scenario
from pytest_bdd.feature import get_features
pytest.register_assert_rewrite('utils')
@@ -373,3 +375,57 @@ def check_place_missing_lines(db_conn, table, osm_type, osm_id, osm_class):
with db_conn.cursor() as cur:
assert cur.execute(sql, params).fetchone()[0] == 0
if pytest.version_tuple >= (8, 0, 0):
def pytest_pycollect_makemodule(module_path, parent):
return BddTestCollector.from_parent(parent, path=module_path)
class BddTestCollector(pytest.Module):
def __init__(self, **kwargs):
super().__init__(**kwargs)
def collect(self):
for item in super().collect():
yield item
if hasattr(self.obj, 'PYTEST_BDD_SCENARIOS'):
for path in self.obj.PYTEST_BDD_SCENARIOS:
for feature in get_features([str(Path(self.path.parent, path).resolve())]):
yield FeatureFile.from_parent(self,
name=str(Path(path, feature.rel_filename)),
path=Path(feature.filename),
feature=feature)
# borrowed from pytest-bdd: src/pytest_bdd/scenario.py
def make_python_name(string: str) -> str:
"""Make python attribute name out of a given string."""
string = re.sub(r"\W", "", string.replace(" ", "_"))
return re.sub(r"^\d+_*", "", string).lower()
class FeatureFile(pytest.File):
class obj:
pass
def __init__(self, feature, **kwargs):
self.feature = feature
super().__init__(**kwargs)
def collect(self):
for sname, sobject in self.feature.scenarios.items():
class_name = f"L{sobject.line_number}"
test_name = "test_" + make_python_name(sname)
@scenario(self.feature.filename, sname)
def _test():
pass
tclass = type(class_name, (),
{test_name: staticmethod(_test)})
setattr(self.obj, class_name, tclass)
yield pytest.Class.from_parent(self, name=class_name, obj=tclass)

View File

@@ -15,7 +15,7 @@ import xml.etree.ElementTree as ET
import pytest
from pytest_bdd.parsers import re as step_parse
from pytest_bdd import scenarios, when, given, then
from pytest_bdd import when, given, then
from nominatim_db import cli
from nominatim_db.config import Configuration
@@ -150,4 +150,8 @@ def parse_api_json_response(api_response, fmt, num):
return result
scenarios('features/api')
if pytest.version_tuple >= (8, 0, 0):
PYTEST_BDD_SCENARIOS = ['features/api']
else:
from pytest_bdd import scenarios
scenarios('features/api')

View File

@@ -15,7 +15,7 @@ import re
import psycopg
import pytest
from pytest_bdd import scenarios, when, then, given
from pytest_bdd import when, then, given
from pytest_bdd.parsers import re as step_parse
from utils.place_inserter import PlaceColumn
@@ -276,4 +276,8 @@ def then_check_interpolation_table_negative(db_conn, oid):
assert cur.fetchone()[0] == 0
scenarios('features/db')
if pytest.version_tuple >= (8, 0, 0):
PYTEST_BDD_SCENARIOS = ['features/db']
else:
from pytest_bdd import scenarios
scenarios('features/db')

View File

@@ -11,7 +11,7 @@ import asyncio
import random
import pytest
from pytest_bdd import scenarios, when, then, given
from pytest_bdd import when, then, given
from pytest_bdd.parsers import re as step_parse
from nominatim_db import cli
@@ -106,4 +106,8 @@ def check_place_content(db_conn, datatable, node_grid, table, exact):
check_table_content(db_conn, table, datatable, grid=node_grid, exact=bool(exact))
scenarios('features/osm2pgsql')
if pytest.version_tuple >= (8, 0, 0):
PYTEST_BDD_SCENARIOS = ['features/osm2pgsql']
else:
from pytest_bdd import scenarios
scenarios('features/osm2pgsql')

View File

@@ -17,6 +17,9 @@ class MyToken(query.Token):
def get_category(self):
return 'this', 'that'
def get_country(self):
return 'cc'
def mktoken(tid: int):
return MyToken(penalty=3.0, token=tid, count=1, addr_count=1,

View File

@@ -21,6 +21,9 @@ class MyToken(Token):
def get_category(self):
return 'this', 'that'
def get_country(self):
return self.lookup_word
def make_query(*args):
q = QueryStruct([Phrase(qmod.PHRASE_ANY, '')])

View File

@@ -20,6 +20,9 @@ class MyToken(Token):
def get_category(self):
return 'this', 'that'
def get_country(self):
return 'cc'
def make_query(*args):
q = QueryStruct([Phrase(args[0][1], '')])

View File

@@ -99,7 +99,7 @@ def test_address_simple_places(apiobj, frontend, atype, address, search):
def test_address_country(apiobj, frontend):
apiobj.add_word_table([(None, 'ro', 'C', 'ro', None)])
apiobj.add_word_table([(None, 'ro', 'C', 'ro', {'cc': 'ro'})])
apiobj.add_country('ro', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
apiobj.add_country_name('ro', {'name': 'România'})

View File

@@ -10,6 +10,8 @@ of the table.
"""
from nominatim_db.db.connection import execute_scalar
from psycopg.types.json import Jsonb
class MockIcuWordTable:
""" A word table for testing using legacy word table structure.
@@ -42,11 +44,11 @@ class MockIcuWordTable:
""", (word_token, word, cls, typ, oper))
self.conn.commit()
def add_country(self, country_code, word_token):
def add_country(self, country_code, word_token, lookup):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_token, type, word)
VALUES(%s, 'C', %s)""",
(word_token, country_code))
cur.execute("""INSERT INTO word (word_token, type, word, info)
VALUES(%s, 'C', %s, %s)""",
(word_token, lookup, Jsonb({'cc': country_code})))
self.conn.commit()
def add_postcode(self, word_token, postcode):
@@ -93,7 +95,7 @@ class MockIcuWordTable:
def get_country(self):
with self.conn.cursor() as cur:
cur.execute("SELECT word, word_token FROM word WHERE type = 'C'")
cur.execute("SELECT info->>'cc', word_token, word FROM word WHERE type = 'C'")
result = set((tuple(row) for row in cur))
assert len(result) == cur.rowcount, "Word table has duplicates."
return result

View File

@@ -237,3 +237,9 @@ def test_postcode_default_pattern_pass(sanitize, postcode):
@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
def test_postcode_default_pattern_fail(sanitize, postcode):
assert sanitize(country='an', postcode=postcode) == []
@pytest.mark.parametrize("postcode", ('00000', '00-000', 'PL-00000', 'PL 00-000'))
@pytest.mark.sanitizer_params(convert_to_address=False)
def test_postcode_zeros(sanitize, postcode):
assert sanitize(country='pl', postcode=postcode) == []

View File

@@ -343,16 +343,18 @@ def test_add_country_names_new(analyzer, word_table):
with analyzer() as anl:
anl.add_country_names('es', {'name': 'Espagña', 'name:en': 'Spain'})
assert word_table.get_country() == {('es', 'ESPAGÑA'), ('es', 'SPAIN')}
assert word_table.get_country() == {('es', 'ESPAGÑA', 'Espagña'),
('es', 'SPAIN', 'Spain')}
def test_add_country_names_extend(analyzer, word_table):
word_table.add_country('ch', 'SCHWEIZ')
word_table.add_country('ch', 'SCHWEIZ', 'Schweiz')
with analyzer() as anl:
anl.add_country_names('ch', {'name': 'Schweiz', 'name:fr': 'Suisse'})
assert word_table.get_country() == {('ch', 'SCHWEIZ'), ('ch', 'SUISSE')}
assert word_table.get_country() == {('ch', 'SCHWEIZ', 'Schweiz'),
('ch', 'SUISSE', 'Suisse')}
class TestPlaceNames:
@@ -403,7 +405,7 @@ class TestPlaceNames:
info = self.analyzer.process_place(place)
self.expect_name_terms(info, '#norge', 'norge')
assert word_table.get_country() == {('no', 'NORGE')}
assert word_table.get_country() == {('no', 'NORGE', 'Norge')}
class TestPlaceAddress: