Merge pull request #2655 from lonvia/migration-internal-country-name

Add migration for new country name handling in ICU tokenizer
This commit is contained in:
Sarah Hoffmann
2022-03-31 18:04:18 +02:00
committed by GitHub
5 changed files with 61 additions and 3 deletions

View File

@@ -90,6 +90,17 @@ class _Connection(psycopg2.extensions.connection):
return num == 1 return num == 1
def table_has_column(self, table, column):
""" Check if the table 'table' exists and has a column with name 'column'.
"""
with self.cursor() as cur:
has_column = cur.scalar("""SELECT count(*) FROM information_schema.columns
WHERE table_name = %s
and column_name = %s""",
(table, column))
return has_column > 0
def index_exists(self, index, table=None): def index_exists(self, index, table=None):
""" Check that an index with the given name exists in the database. """ Check that an index with the given name exists in the database.
If table is not None then the index must relate to the given If table is not None then the index must relate to the given

View File

@@ -236,6 +236,9 @@ def add_step_column_for_interpolation(conn, **_):
Also convers the data into the stricter format which requires that Also convers the data into the stricter format which requires that
startnumbers comply with the odd/even requirements. startnumbers comply with the odd/even requirements.
""" """
if conn.table_has_column('location_property_osmline', 'step'):
return
with conn.cursor() as cur: with conn.cursor() as cur:
# Mark invalid all interpolations with no intermediate numbers. # Mark invalid all interpolations with no intermediate numbers.
cur.execute("""UPDATE location_property_osmline SET startnumber = null cur.execute("""UPDATE location_property_osmline SET startnumber = null
@@ -265,6 +268,9 @@ def add_step_column_for_interpolation(conn, **_):
def add_step_column_for_tiger(conn, **_): def add_step_column_for_tiger(conn, **_):
""" Add a new column 'step' to the tiger data table. """ Add a new column 'step' to the tiger data table.
""" """
if conn.table_has_column('location_property_tiger', 'step'):
return
with conn.cursor() as cur: with conn.cursor() as cur:
cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT") cur.execute("ALTER TABLE location_property_tiger ADD COLUMN step SMALLINT")
cur.execute("""UPDATE location_property_tiger cur.execute("""UPDATE location_property_tiger
@@ -278,5 +284,26 @@ def add_derived_name_column_for_country_names(conn, **_):
""" Add a new column 'derived_name' which in the future takes the """ Add a new column 'derived_name' which in the future takes the
country names as imported from OSM data. country names as imported from OSM data.
""" """
with conn.cursor() as cur: if not conn.table_has_column('country_name', 'derived_name'):
cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE") with conn.cursor() as cur:
cur.execute("ALTER TABLE country_name ADD COLUMN derived_name public.HSTORE")
@_migration(4, 0, 99, 5)
def mark_internal_country_names(conn, config, **_):
""" Names from the country table should be marked as internal to prevent
them from being deleted. Only necessary for ICU tokenizer.
"""
import psycopg2.extras # pylint: disable=import-outside-toplevel
tokenizer = tokenizer_factory.get_tokenizer_for_db(config)
with tokenizer.name_analyzer() as analyzer:
with conn.cursor() as cur:
psycopg2.extras.register_hstore(cur)
cur.execute("SELECT country_code, name FROM country_name")
for country_code, names in cur:
if not names:
names = {}
names['countrycode'] = country_code
analyzer.add_country_names(country_code, names)

View File

@@ -24,7 +24,7 @@ Version information for Nominatim.
# patch level when cherry-picking the commit with the migration. # patch level when cherry-picking the commit with the migration.
# #
# Released versions always have a database patch level of 0. # Released versions always have a database patch level of 0.
NOMINATIM_VERSION = (4, 0, 99, 5) NOMINATIM_VERSION = (4, 0, 99, 6)
POSTGRESQL_REQUIRED_VERSION = (9, 5) POSTGRESQL_REQUIRED_VERSION = (9, 5)
POSTGIS_REQUIRED_VERSION = (2, 2) POSTGIS_REQUIRED_VERSION = (2, 2)

View File

@@ -53,6 +53,10 @@ Feature: Country handling
| N1 | place | town | Wenig | country:de | | N1 | place | town | Wenig | country:de |
When importing When importing
When sending search query "Wenig, Germany" When sending search query "Wenig, Germany"
Then results contain
| osm |
| N1 |
When sending search query "Wenig, de"
Then results contain Then results contain
| osm | | osm |
| N1 | | N1 |
@@ -65,6 +69,12 @@ Feature: Country handling
Then results contain Then results contain
| osm | display_name | | osm | display_name |
| N1 | Wenig, Lilly | | N1 | Wenig, Lilly |
When sending search query "Wenig, de"
| accept-language |
| en,de |
Then results contain
| osm | display_name |
| N1 | Wenig, Lilly |
@fail-legacy @fail-legacy

View File

@@ -26,6 +26,16 @@ def test_connection_table_exists(db, table_factory):
assert db.table_exists('foobar') assert db.table_exists('foobar')
def test_has_column_no_table(db):
assert not db.table_has_column('sometable', 'somecolumn')
@pytest.mark.parametrize('name,result', [('tram', True), ('car', False)])
def test_has_column(db, table_factory, name, result):
table_factory('stuff', 'tram TEXT')
assert db.table_has_column('stuff', name) == result
def test_connection_index_exists(db, table_factory, temp_db_cursor): def test_connection_index_exists(db, table_factory, temp_db_cursor):
assert not db.index_exists('some_index') assert not db.index_exists('some_index')