forked from hans/Nominatim
Merge pull request #3747 from anqixxx/fix-special-phrases-filtering
Special Phrases Filtering: Add Command Line Functionality
This commit is contained in:
@@ -136,6 +136,7 @@ class NominatimArgs:
|
|||||||
import_from_wiki: bool
|
import_from_wiki: bool
|
||||||
import_from_csv: Optional[str]
|
import_from_csv: Optional[str]
|
||||||
no_replace: bool
|
no_replace: bool
|
||||||
|
min: int
|
||||||
|
|
||||||
# Arguments to all query functions
|
# Arguments to all query functions
|
||||||
format: str
|
format: str
|
||||||
|
|||||||
@@ -58,6 +58,8 @@ class ImportSpecialPhrases:
|
|||||||
help='Import special phrases from a CSV file')
|
help='Import special phrases from a CSV file')
|
||||||
group.add_argument('--no-replace', action='store_true',
|
group.add_argument('--no-replace', action='store_true',
|
||||||
help='Keep the old phrases and only add the new ones')
|
help='Keep the old phrases and only add the new ones')
|
||||||
|
group.add_argument('--min', type=int, default=0,
|
||||||
|
help='Restrict special phrases by minimum occurance')
|
||||||
|
|
||||||
def run(self, args: NominatimArgs) -> int:
|
def run(self, args: NominatimArgs) -> int:
|
||||||
|
|
||||||
@@ -82,7 +84,9 @@ class ImportSpecialPhrases:
|
|||||||
|
|
||||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||||
should_replace = not args.no_replace
|
should_replace = not args.no_replace
|
||||||
|
min = args.min
|
||||||
|
|
||||||
with connect(args.config.get_libpq_dsn()) as db_connection:
|
with connect(args.config.get_libpq_dsn()) as db_connection:
|
||||||
SPImporter(
|
SPImporter(
|
||||||
args.config, db_connection, loader
|
args.config, db_connection, loader
|
||||||
).import_phrases(tokenizer, should_replace)
|
).import_phrases(tokenizer, should_replace, min)
|
||||||
|
|||||||
@@ -68,16 +68,17 @@ class SPImporter():
|
|||||||
"""
|
"""
|
||||||
Returns list of allowed special phrases from the database,
|
Returns list of allowed special phrases from the database,
|
||||||
restricting to a list of combinations of classes and types
|
restricting to a list of combinations of classes and types
|
||||||
which occur more than a specified amount of times.
|
which occur equal to or more than a specified amount of times.
|
||||||
|
|
||||||
Default value for this, if not specified, is at least once.
|
Default value for this is 0, which allows everything in database.
|
||||||
"""
|
"""
|
||||||
db_combinations = set()
|
db_combinations = set()
|
||||||
|
|
||||||
query = f"""
|
query = f"""
|
||||||
SELECT class AS CLS, type AS typ
|
SELECT class AS CLS, type AS typ
|
||||||
FROM placex
|
FROM placex
|
||||||
GROUP BY class, type
|
GROUP BY class, type
|
||||||
HAVING COUNT(*) > {min}
|
HAVING COUNT(*) >= {min}
|
||||||
"""
|
"""
|
||||||
|
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
@@ -87,7 +88,8 @@ class SPImporter():
|
|||||||
|
|
||||||
return db_combinations
|
return db_combinations
|
||||||
|
|
||||||
def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None:
|
def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool,
|
||||||
|
min: int = 0) -> None:
|
||||||
"""
|
"""
|
||||||
Iterate through all SpecialPhrases extracted from the
|
Iterate through all SpecialPhrases extracted from the
|
||||||
loader and import them into the database.
|
loader and import them into the database.
|
||||||
@@ -107,7 +109,7 @@ class SPImporter():
|
|||||||
if result:
|
if result:
|
||||||
class_type_pairs.add(result)
|
class_type_pairs.add(result)
|
||||||
|
|
||||||
self._create_classtype_table_and_indexes(class_type_pairs)
|
self._create_classtype_table_and_indexes(class_type_pairs, min)
|
||||||
if should_replace:
|
if should_replace:
|
||||||
self._remove_non_existent_tables_from_db()
|
self._remove_non_existent_tables_from_db()
|
||||||
|
|
||||||
@@ -186,7 +188,8 @@ class SPImporter():
|
|||||||
return (phrase.p_class, phrase.p_type)
|
return (phrase.p_class, phrase.p_type)
|
||||||
|
|
||||||
def _create_classtype_table_and_indexes(self,
|
def _create_classtype_table_and_indexes(self,
|
||||||
class_type_pairs: Iterable[Tuple[str, str]]) -> None:
|
class_type_pairs: Iterable[Tuple[str, str]],
|
||||||
|
min: int = 0) -> None:
|
||||||
"""
|
"""
|
||||||
Create table place_classtype for each given pair.
|
Create table place_classtype for each given pair.
|
||||||
Also create indexes on place_id and centroid.
|
Also create indexes on place_id and centroid.
|
||||||
@@ -200,13 +203,15 @@ class SPImporter():
|
|||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
|
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
|
||||||
|
|
||||||
allowed_special_phrases = self.get_classtype_pairs()
|
if min:
|
||||||
|
allowed_special_phrases = self.get_classtype_pairs(min)
|
||||||
|
|
||||||
for pair in class_type_pairs:
|
for pair in class_type_pairs:
|
||||||
phrase_class = pair[0]
|
phrase_class = pair[0]
|
||||||
phrase_type = pair[1]
|
phrase_type = pair[1]
|
||||||
|
|
||||||
if (phrase_class, phrase_type) not in allowed_special_phrases:
|
# Will only filter if min is not 0
|
||||||
|
if min and (phrase_class, phrase_type) not in allowed_special_phrases:
|
||||||
LOG.warning("Skipping phrase %s=%s: not in allowed special phrases",
|
LOG.warning("Skipping phrase %s=%s: not in allowed special phrases",
|
||||||
phrase_class, phrase_type)
|
phrase_class, phrase_type)
|
||||||
continue
|
continue
|
||||||
|
|||||||
@@ -3,8 +3,8 @@ from nominatim_db.tools.special_phrases.sp_importer import SPImporter
|
|||||||
|
|
||||||
# Testing Database Class Pair Retrival using Conftest.py and placex
|
# Testing Database Class Pair Retrival using Conftest.py and placex
|
||||||
def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn):
|
def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn):
|
||||||
for _ in range(101):
|
for _ in range(100):
|
||||||
placex_table.add(cls='highway', typ='motorway') # edge case 101
|
placex_table.add(cls='highway', typ='motorway') # edge case 100
|
||||||
|
|
||||||
for _ in range(99):
|
for _ in range(99):
|
||||||
placex_table.add(cls='amenity', typ='prison') # edge case 99
|
placex_table.add(cls='amenity', typ='prison') # edge case 99
|
||||||
@@ -25,8 +25,8 @@ def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn):
|
|||||||
|
|
||||||
|
|
||||||
def test_get_classtype_pair_data_more(placex_table, def_config, temp_db_conn):
|
def test_get_classtype_pair_data_more(placex_table, def_config, temp_db_conn):
|
||||||
for _ in range(100):
|
for _ in range(99):
|
||||||
placex_table.add(cls='emergency', typ='firehydrant') # edge case 100, not included
|
placex_table.add(cls='emergency', typ='firehydrant') # edge case 99, not included
|
||||||
|
|
||||||
for _ in range(199):
|
for _ in range(199):
|
||||||
placex_table.add(cls='amenity', typ='prison')
|
placex_table.add(cls='amenity', typ='prison')
|
||||||
|
|||||||
Reference in New Issue
Block a user