From 40d5b78eb80c1e7479a51d62969a35e8c31ca98a Mon Sep 17 00:00:00 2001
From: anqixxx <anqix2002@gmail.com>
Date: Thu, 29 May 2025 09:25:08 -0700
Subject: [PATCH 1/4] Added command line (default 0) min argument for minimum
 filtering, updated args.py to reflect this

---
 src/nominatim_db/clicmd/args.py                       | 1 +
 src/nominatim_db/clicmd/special_phrases.py            | 6 +++++-
 src/nominatim_db/tools/special_phrases/sp_importer.py | 9 +++++----
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/nominatim_db/clicmd/args.py b/src/nominatim_db/clicmd/args.py
index 45df9b7c..5c6a806a 100644
--- a/src/nominatim_db/clicmd/args.py
+++ b/src/nominatim_db/clicmd/args.py
@@ -136,6 +136,7 @@ class NominatimArgs:
     import_from_wiki: bool
     import_from_csv: Optional[str]
     no_replace: bool
+    min: int
 
     # Arguments to all query functions
     format: str
diff --git a/src/nominatim_db/clicmd/special_phrases.py b/src/nominatim_db/clicmd/special_phrases.py
index 9ba751a0..90560fb7 100644
--- a/src/nominatim_db/clicmd/special_phrases.py
+++ b/src/nominatim_db/clicmd/special_phrases.py
@@ -58,6 +58,8 @@ class ImportSpecialPhrases:
                            help='Import special phrases from a CSV file')
         group.add_argument('--no-replace', action='store_true',
                            help='Keep the old phrases and only add the new ones')
+        group.add_argument('--min', type=int, default=0,
+                           help='Restrict special phrases by minimum occurance')
 
     def run(self, args: NominatimArgs) -> int:
 
@@ -82,7 +84,9 @@ class ImportSpecialPhrases:
 
         tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
         should_replace = not args.no_replace
+        min = args.min
+
         with connect(args.config.get_libpq_dsn()) as db_connection:
             SPImporter(
                 args.config, db_connection, loader
-            ).import_phrases(tokenizer, should_replace)
+            ).import_phrases(tokenizer, should_replace, min)
diff --git a/src/nominatim_db/tools/special_phrases/sp_importer.py b/src/nominatim_db/tools/special_phrases/sp_importer.py
index ac50377f..6bd3c287 100644
--- a/src/nominatim_db/tools/special_phrases/sp_importer.py
+++ b/src/nominatim_db/tools/special_phrases/sp_importer.py
@@ -87,7 +87,7 @@ class SPImporter():
 
         return db_combinations
 
-    def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None:
+    def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool, min: int) -> None:
         """
             Iterate through all SpecialPhrases extracted from the
             loader and import them into the database.
@@ -107,7 +107,7 @@ class SPImporter():
             if result:
                 class_type_pairs.add(result)
 
-        self._create_classtype_table_and_indexes(class_type_pairs)
+        self._create_classtype_table_and_indexes(class_type_pairs, min)
         if should_replace:
             self._remove_non_existent_tables_from_db()
 
@@ -186,7 +186,8 @@ class SPImporter():
         return (phrase.p_class, phrase.p_type)
 
     def _create_classtype_table_and_indexes(self,
-                                            class_type_pairs: Iterable[Tuple[str, str]]) -> None:
+                                            class_type_pairs: Iterable[Tuple[str, str]],
+                                            min: int) -> None:
         """
             Create table place_classtype for each given pair.
             Also create indexes on place_id and centroid.
@@ -200,7 +201,7 @@ class SPImporter():
         with self.db_connection.cursor() as db_cursor:
             db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
 
-        allowed_special_phrases = self.get_classtype_pairs()
+        allowed_special_phrases = self.get_classtype_pairs(min)
 
         for pair in class_type_pairs:
             phrase_class = pair[0]

From 20cf4b56b9c0fd8a6c8f91d341143e8d1c815e4e Mon Sep 17 00:00:00 2001
From: anqixxx <anqix2002@gmail.com>
Date: Sat, 31 May 2025 09:41:36 -0700
Subject: [PATCH 2/4] Refactored min and associated tests to follow greater
 than or equal to logic, so that min=0 accounted for no filtering

r
---
 src/nominatim_db/tools/special_phrases/sp_importer.py | 10 ++++++----
 test/python/tools/test_sp_importer.py                 |  8 ++++----
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/src/nominatim_db/tools/special_phrases/sp_importer.py b/src/nominatim_db/tools/special_phrases/sp_importer.py
index 6bd3c287..890cf2fc 100644
--- a/src/nominatim_db/tools/special_phrases/sp_importer.py
+++ b/src/nominatim_db/tools/special_phrases/sp_importer.py
@@ -68,16 +68,17 @@ class SPImporter():
         """
             Returns list of allowed special phrases from the database,
             restricting to a list of combinations of classes and types
-            which occur more than a specified amount of times.
+            which occur equal to or more than a specified amount of times.
 
-            Default value for this, if not specified, is at least once.
+            Default value for this is 0, which allows everything in database.
         """
         db_combinations = set()
+
         query = f"""
         SELECT class AS CLS, type AS typ
         FROM placex
         GROUP BY class, type
-        HAVING COUNT(*) > {min}
+        HAVING COUNT(*) >= {min}
         """
 
         with self.db_connection.cursor() as db_cursor:
@@ -207,7 +208,8 @@ class SPImporter():
             phrase_class = pair[0]
             phrase_type = pair[1]
 
-            if (phrase_class, phrase_type) not in allowed_special_phrases:
+            # Will only filter if min is not 0
+            if min and (phrase_class, phrase_type) not in allowed_special_phrases:
                 LOG.warning("Skipping phrase %s=%s: not in allowed special phrases",
                             phrase_class, phrase_type)
                 continue
diff --git a/test/python/tools/test_sp_importer.py b/test/python/tools/test_sp_importer.py
index dda02f11..c64c2b7d 100644
--- a/test/python/tools/test_sp_importer.py
+++ b/test/python/tools/test_sp_importer.py
@@ -3,8 +3,8 @@ from nominatim_db.tools.special_phrases.sp_importer import SPImporter
 
 # Testing Database Class Pair Retrival using Conftest.py and placex
 def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn):
-    for _ in range(101):
-        placex_table.add(cls='highway', typ='motorway')  # edge case 101
+    for _ in range(100):
+        placex_table.add(cls='highway', typ='motorway')  # edge case 100
 
     for _ in range(99):
         placex_table.add(cls='amenity', typ='prison')  # edge case 99
@@ -25,8 +25,8 @@ def test_get_classtype_pair_data(placex_table, def_config, temp_db_conn):
 
 
 def test_get_classtype_pair_data_more(placex_table, def_config, temp_db_conn):
-    for _ in range(100):
-        placex_table.add(cls='emergency', typ='firehydrant')  # edge case 100, not included
+    for _ in range(99):
+        placex_table.add(cls='emergency', typ='firehydrant')  # edge case 99, not included
 
     for _ in range(199):
         placex_table.add(cls='amenity', typ='prison')

From 7dc3924a3c640ff3e7e2a3c91c7436576fb57b0c Mon Sep 17 00:00:00 2001
From: anqixxx <anqix2002@gmail.com>
Date: Wed, 4 Jun 2025 01:10:14 -0700
Subject: [PATCH 3/4] Added default min = 0 argument for private functions

empty
---
 src/nominatim_db/tools/special_phrases/sp_importer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/nominatim_db/tools/special_phrases/sp_importer.py b/src/nominatim_db/tools/special_phrases/sp_importer.py
index 890cf2fc..4989ef73 100644
--- a/src/nominatim_db/tools/special_phrases/sp_importer.py
+++ b/src/nominatim_db/tools/special_phrases/sp_importer.py
@@ -88,7 +88,8 @@ class SPImporter():
 
         return db_combinations
 
-    def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool, min: int) -> None:
+    def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool,
+                       min: int = 0) -> None:
         """
             Iterate through all SpecialPhrases extracted from the
             loader and import them into the database.
@@ -188,7 +189,7 @@ class SPImporter():
 
     def _create_classtype_table_and_indexes(self,
                                             class_type_pairs: Iterable[Tuple[str, str]],
-                                            min: int) -> None:
+                                            min: int = 0) -> None:
         """
             Create table place_classtype for each given pair.
             Also create indexes on place_id and centroid.

From cf9b946eba5067790d49d5dc62290f0d9517dd07 Mon Sep 17 00:00:00 2001
From: anqixxx <anqix2002@gmail.com>
Date: Thu, 5 Jun 2025 09:25:14 +0800
Subject: [PATCH 4/4] Added skip for when min =0

---
 src/nominatim_db/tools/special_phrases/sp_importer.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/nominatim_db/tools/special_phrases/sp_importer.py b/src/nominatim_db/tools/special_phrases/sp_importer.py
index 4989ef73..12e695b6 100644
--- a/src/nominatim_db/tools/special_phrases/sp_importer.py
+++ b/src/nominatim_db/tools/special_phrases/sp_importer.py
@@ -203,7 +203,8 @@ class SPImporter():
         with self.db_connection.cursor() as db_cursor:
             db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
 
-        allowed_special_phrases = self.get_classtype_pairs(min)
+        if min:
+            allowed_special_phrases = self.get_classtype_pairs(min)
 
         for pair in class_type_pairs:
             phrase_class = pair[0]