Made the limit configurable with an optional argument, updating the testing as well to reflect this. default is now 0, meaning that it will return everything that occurs more than once. Removed mock database test, and got rid of fetch all. Rebased all tests to monkeypatch

This commit is contained in:
anqixxx
2025-04-14 10:21:07 -07:00
parent 59a947c5f5
commit 3f51cb3fd1
2 changed files with 41 additions and 60 deletions

View File

@@ -64,23 +64,25 @@ class SPImporter():
# special phrases class/type on the wiki. # special phrases class/type on the wiki.
self.table_phrases_to_delete: Set[str] = set() self.table_phrases_to_delete: Set[str] = set()
def get_classtype_pairs(self) -> Set[Tuple[str, str]]: def get_classtype_pairs(self, min: int = 0) -> Set[Tuple[str, str]]:
""" """
Returns list of allowed special phrases from the database, Returns list of allowed special phrases from the database,
restricting to a list of combinations of classes and types restricting to a list of combinations of classes and types
which occur more than 100 times which occur more than a specified amount of times.
Default value for this, if not specified, is at least once.
""" """
db_combinations = set() db_combinations = set()
query = """ query = f"""
SELECT class AS CLS, type AS typ SELECT class AS CLS, type AS typ
FROM placex FROM placex
GROUP BY class, type GROUP BY class, type
HAVING COUNT(*) > 100 HAVING COUNT(*) > {min}
""" """
with self.db_connection.cursor() as db_cursor: with self.db_connection.cursor() as db_cursor:
db_cursor.execute(SQL(query)) db_cursor.execute(SQL(query))
for row in db_cursor.fetchall(): for row in db_cursor:
db_combinations.add((row[0], row[1])) db_combinations.add((row[0], row[1]))
return db_combinations return db_combinations

View File

@@ -1,60 +1,20 @@
import pytest
import tempfile
import os
from nominatim_db.tools.special_phrases.sp_importer import SPImporter from nominatim_db.tools.special_phrases.sp_importer import SPImporter
# Testing Database Class Pair Retrival using Mock Database
def test_get_classtype_pairs(monkeypatch):
class Config:
def load_sub_configuration(self, path, section=None):
return {"blackList": {}, "whiteList": {}}
class Cursor:
def execute(self, query): pass
def fetchall(self):
return [
("highway", "motorway"),
("historic", "castle")
]
def __enter__(self): return self
def __exit__(self, exc_type, exc_val, exc_tb): pass
class Connection:
def cursor(self): return Cursor()
config = Config()
conn = Connection()
importer = SPImporter(config=config, conn=conn, sp_loader=None)
result = importer.get_classtype_pairs()
expected = {
("highway", "motorway"),
("historic", "castle")
}
assert result == expected
# Testing Database Class Pair Retrival using Conftest.py and placex # Testing Database Class Pair Retrival using Conftest.py and placex
def test_get_classtype_pair_data(placex_table, temp_db_conn): def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn):
class Config:
def load_sub_configuration(self, *_):
return {'blackList': {}, 'whiteList': {}}
for _ in range(101): for _ in range(101):
placex_table.add(cls='highway', typ='motorway') # edge case 101 placex_table.add(cls='highway', typ='motorway') # edge case 101
for _ in range(99): for _ in range(99):
placex_table.add(cls='amenity', typ='prison') # edge case 99 placex_table.add(cls='amenity', typ='prison') # edge case 99
for _ in range(150): for _ in range(150):
placex_table.add(cls='tourism', typ='hotel') placex_table.add(cls='tourism', typ='hotel')
config = Config() importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
importer = SPImporter(config=config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs() result = importer.get_classtype_pairs(min=100)
expected = { expected = {
("highway", "motorway"), ("highway", "motorway"),
@@ -63,13 +23,10 @@ def test_get_classtype_pair_data(placex_table, temp_db_conn):
assert result == expected, f"Expected {expected}, got {result}" assert result == expected, f"Expected {expected}, got {result}"
def test_get_classtype_pair_data_more(placex_table, temp_db_conn):
class Config:
def load_sub_configuration(self, *_):
return {'blackList': {}, 'whiteList': {}}
def test_get_classtype_pair_data_more(placex_table, def_config, temp_db_conn):
for _ in range(100): for _ in range(100):
placex_table.add(cls='emergency', typ='firehydrant') # edge case 100, not included placex_table.add(cls='emergency', typ='firehydrant') # edge case 100, not included
for _ in range(199): for _ in range(199):
placex_table.add(cls='amenity', typ='prison') placex_table.add(cls='amenity', typ='prison')
@@ -77,10 +34,9 @@ def test_get_classtype_pair_data_more(placex_table, temp_db_conn):
for _ in range(3478): for _ in range(3478):
placex_table.add(cls='tourism', typ='hotel') placex_table.add(cls='tourism', typ='hotel')
config = Config() importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
importer = SPImporter(config=config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs() result = importer.get_classtype_pairs(min=100)
expected = { expected = {
("amenity", "prison"), ("amenity", "prison"),
@@ -88,3 +44,26 @@ def test_get_classtype_pair_data_more(placex_table, temp_db_conn):
} }
assert result == expected, f"Expected {expected}, got {result}" assert result == expected, f"Expected {expected}, got {result}"
def test_get_classtype_pair_data_default(placex_table, def_config, temp_db_conn):
for _ in range(1):
placex_table.add(cls='emergency', typ='firehydrant')
for _ in range(199):
placex_table.add(cls='amenity', typ='prison')
for _ in range(3478):
placex_table.add(cls='tourism', typ='hotel')
importer = SPImporter(config=def_config, conn=temp_db_conn, sp_loader=None)
result = importer.get_classtype_pairs()
expected = {
("amenity", "prison"),
("tourism", "hotel"),
("emergency", "firehydrant")
}
assert result == expected, f"Expected {expected}, got {result}"