bdd tests: do not query word table directly

The BDD tests cannot make assumptions about the structure of the word table anymore because it depends on the tokenizer. Use more abstract descriptions instead that ask for specific kinds of tokens.
2026-02-16 15:47:58 +00:00 · 2021-07-24 12:12:31 +02:00
parent e42878eeda
commit 324b1b5575
4 changed files with 74 additions and 64 deletions
--- a/test/bdd/db/import/postcodes.feature
+++ b/test/bdd/db/import/postcodes.feature
@@ -134,9 +134,7 @@ Feature: Import of postcodes
        Then location_postcode contains exactly
           | country | postcode | geometry |
           | de      | 01982    | country:de |
-        And word contains
-           | word  | class | type |
-           | 01982 | place | postcode |
+        And there are word tokens for postcodes 01982

    Scenario: Different postcodes with the same normalization can both be found
        Given the places
--- a/test/bdd/db/update/postcode.feature
+++ b/test/bdd/db/update/postcode.feature
@@ -18,10 +18,7 @@ Feature: Update of postcode
           | country | postcode | geometry |
           | de      | 01982    | country:de |
           | ch      | 4567     | country:ch |
-        And word contains
-           | word  | class | type |
-           | 01982 | place | postcode |
-           | 4567  | place | postcode |
+        And there are word tokens for postcodes 01982,4567

     Scenario: When the last postcode is deleted, it is deleted from postcode and word
        Given the places
@@ -34,12 +31,8 @@ Feature: Update of postcode
        Then location_postcode contains exactly
           | country | postcode | geometry |
           | ch      | 4567     | country:ch |
-        And word contains not
-           | word  | class | type |
-           | 01982 | place | postcode |
-        And word contains
-           | word  | class | type |
-           | 4567  | place | postcode |
+        And there are word tokens for postcodes 4567
+        And there are no word tokens for postcodes 01982

     Scenario: A postcode is not deleted from postcode and word when it exist in another country
        Given the places
@@ -52,9 +45,7 @@ Feature: Update of postcode
        Then location_postcode contains exactly
           | country | postcode | geometry |
           | ch      | 01982    | country:ch |
-        And word contains
-           | word  | class | type |
-           | 01982 | place | postcode |
+        And there are word tokens for postcodes 01982

     Scenario: Updating a postcode is reflected in postcode table
        Given the places
@@ -68,9 +59,7 @@ Feature: Update of postcode
        Then location_postcode contains exactly
           | country | postcode | geometry |
           | de      | 20453    | country:de |
-        And word contains
-           | word  | class | type |
-           | 20453 | place | postcode |
+        And there are word tokens for postcodes 20453

     Scenario: When changing from a postcode type, the entry appears in placex
        When importing
@@ -91,9 +80,7 @@ Feature: Update of postcode
        Then location_postcode contains exactly
           | country | postcode | geometry |
           | de      | 20453    | country:de |
-        And word contains
-           | word  | class | type |
-           | 20453 | place | postcode |
+        And there are word tokens for postcodes 20453

     Scenario: When changing to a postcode type, the entry disappears from placex
        When importing
@@ -114,6 +101,4 @@ Feature: Update of postcode
        Then location_postcode contains exactly
           | country | postcode | geometry |
           | de      | 01982    | country:de |
-        And word contains
-           | word  | class | type |
-           | 01982 | place | postcode |
+        And there are word tokens for postcodes 01982
--- a/test/bdd/steps/steps_db_ops.py
+++ b/test/bdd/steps/steps_db_ops.py
@@ -281,6 +281,39 @@ def check_word_table(context, exclude):
            else:
                assert cur.rowcount > 0, "Row not in word table: %s" % '/'.join(values)

+
+@then("there are(?P<exclude> no)? word tokens for postcodes (?P<postcodes>.*)")
+def check_word_table_for_postcodes(context, exclude, postcodes):
+    """ Check that the tokenizer produces postcode tokens for the given
+        postcodes. The postcodes are a comma-separated list of postcodes.
+        Whitespace matters.
+    """
+    nctx = context.nominatim
+    tokenizer = tokenizer_factory.get_tokenizer_for_db(nctx.get_test_config())
+    with tokenizer.name_analyzer() as ana:
+        plist = [ana.normalize_postcode(p) for p in postcodes.split(',')]
+
+    plist.sort()
+
+    with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
+        if nctx.tokenizer == 'legacy_icu':
+            cur.execute("""SELECT info->>'postcode' FROM word
+                           WHERE type = 'P' and info->>'postcode' = any(%s)""",
+                        (plist,))
+        else:
+            cur.execute("""SELECT word FROM word WHERE word = any(%s)
+                             and class = 'place' and type = 'postcode'""",
+                        (plist,))
+
+        found = [row[0] for row in cur]
+        assert len(found) == len(set(found)), f"Duplicate rows for postcodes: {found}"
+
+    if exclude:
+        assert len(found) == 0, f"Unexpected postcodes: {found}"
+    else:
+        assert set(found) == set(plist), \
+        f"Missing postcodes {set(plist) - set(found)}. Found: {found}"
+
@then("place_addressline contains")
 def check_place_addressline(context):
    """ Check the contents of the place_addressline table. Each row represents