Files
Nominatim/test/python/mock_icu_word_table.py
Sarah Hoffmann 81c6cb72e6 add normalised country name to word table
Country tokens now follow the usual convetion of having the
normalized version in the word column and the extra info about the
country code in the info column.
2025-12-01 13:10:18 +01:00

112 lines
4.6 KiB
Python

# SPDX-License-Identifier: GPL-3.0-or-later
#
# This file is part of Nominatim. (https://nominatim.org)
#
# Copyright (C) 2025 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Legacy word table for testing with functions to prefil and test contents
of the table.
"""
from nominatim_db.db.connection import execute_scalar
from psycopg.types.json import Jsonb
class MockIcuWordTable:
""" A word table for testing using legacy word table structure.
"""
def __init__(self, conn):
self.conn = conn
with conn.cursor() as cur:
cur.execute("""CREATE TABLE word (word_id INTEGER,
word_token text NOT NULL,
type text NOT NULL,
word text,
info jsonb)""")
conn.commit()
def add_full_word(self, word_id, word, word_token=None):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_id, word_token, type, word, info)
VALUES(%s, %s, 'W', %s, '{}'::jsonb)""",
(word_id, word or word_token, word))
self.conn.commit()
def add_special(self, word_token, word, cls, typ, oper):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_token, type, word, info)
VALUES (%s, 'S', %s,
json_build_object('class', %s::text,
'type', %s::text,
'op', %s::text))
""", (word_token, word, cls, typ, oper))
self.conn.commit()
def add_country(self, country_code, word_token, lookup):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_token, type, word, info)
VALUES(%s, 'C', %s, %s)""",
(word_token, lookup, Jsonb({'cc': country_code})))
self.conn.commit()
def add_postcode(self, word_token, postcode):
with self.conn.cursor() as cur:
cur.execute("""INSERT INTO word (word_token, type, word)
VALUES (%s, 'P', %s)
""", (word_token, postcode))
self.conn.commit()
def add_housenumber(self, word_id, word_tokens, word=None):
with self.conn.cursor() as cur:
if isinstance(word_tokens, str):
# old style without analyser
cur.execute("""INSERT INTO word (word_id, word_token, type)
VALUES (%s, %s, 'H')
""", (word_id, word_tokens))
else:
if word is None:
word = word_tokens[0]
for token in word_tokens:
cur.execute("""INSERT INTO word (word_id, word_token, type, word, info)
VALUES (%s, %s, 'H', %s,
jsonb_build_object('lookup', %s::text))
""", (word_id, token, word, word_tokens[0]))
self.conn.commit()
def count(self):
return execute_scalar(self.conn, "SELECT count(*) FROM word")
def count_special(self):
return execute_scalar(self.conn, "SELECT count(*) FROM word WHERE type = 'S'")
def count_housenumbers(self):
return execute_scalar(self.conn, "SELECT count(*) FROM word WHERE type = 'H'")
def get_special(self):
with self.conn.cursor() as cur:
cur.execute("SELECT word_token, info, word FROM word WHERE type = 'S'")
result = set(((row[0], row[2], row[1]['class'],
row[1]['type'], row[1]['op']) for row in cur))
assert len(result) == cur.rowcount, "Word table has duplicates."
return result
def get_country(self):
with self.conn.cursor() as cur:
cur.execute("SELECT info->>'cc', word_token, word FROM word WHERE type = 'C'")
result = set((tuple(row) for row in cur))
assert len(result) == cur.rowcount, "Word table has duplicates."
return result
def get_postcodes(self):
with self.conn.cursor() as cur:
cur.execute("SELECT word FROM word WHERE type = 'P'")
return set((row[0] for row in cur))
def get_partial_words(self):
with self.conn.cursor() as cur:
cur.execute("SELECT word_token, info FROM word WHERE type ='w'")
return set(((row[0], row[1]['count']) for row in cur))