mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
bdd: run full import on tests
This uncovered a couple of outdated/wrong tests which have been fixed, too.
This commit is contained in:
@@ -390,17 +390,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
|
|
||||||
|
|
||||||
def add_country_names(self, country_code, names):
|
def add_country_names(self, country_code, names):
|
||||||
""" Add names for the given country to the search index.
|
""" Add default names for the given country to the search index.
|
||||||
"""
|
"""
|
||||||
# Make sure any name preprocessing for country names applies.
|
# Make sure any name preprocessing for country names applies.
|
||||||
info = PlaceInfo({'name': names, 'country_code': country_code,
|
info = PlaceInfo({'name': names, 'country_code': country_code,
|
||||||
'rank_address': 4, 'class': 'boundary',
|
'rank_address': 4, 'class': 'boundary',
|
||||||
'type': 'administrative'})
|
'type': 'administrative'})
|
||||||
self._add_country_full_names(country_code,
|
self._add_country_full_names(country_code,
|
||||||
self.sanitizer.process_names(info)[0])
|
self.sanitizer.process_names(info)[0],
|
||||||
|
internal=True)
|
||||||
|
|
||||||
|
|
||||||
def _add_country_full_names(self, country_code, names):
|
def _add_country_full_names(self, country_code, names, internal=False):
|
||||||
""" Add names for the given country from an already sanitized
|
""" Add names for the given country from an already sanitized
|
||||||
name list.
|
name list.
|
||||||
"""
|
"""
|
||||||
@@ -412,21 +413,18 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
|
|
||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
# Get existing names
|
# Get existing names
|
||||||
cur.execute("""SELECT word_token FROM word
|
cur.execute("""SELECT word_token, coalesce(info ? 'internal', false) as is_internal
|
||||||
WHERE type = 'C' and word = %s""",
|
FROM word
|
||||||
|
WHERE type = 'C' and word = %s""",
|
||||||
(country_code, ))
|
(country_code, ))
|
||||||
existing_tokens = {t[0] for t in cur}
|
existing_tokens = {True: set(), False: set()} # internal/external names
|
||||||
|
for word in cur:
|
||||||
# Only add those names that are not yet in the list.
|
existing_tokens[word[1]].add(word[0])
|
||||||
new_tokens = word_tokens - existing_tokens
|
|
||||||
if new_tokens:
|
|
||||||
cur.execute("""INSERT INTO word (word_token, type, word)
|
|
||||||
(SELECT token, 'C', %s
|
|
||||||
FROM unnest(%s) as token)
|
|
||||||
""", (country_code, list(new_tokens)))
|
|
||||||
|
|
||||||
# Delete names that no longer exist.
|
# Delete names that no longer exist.
|
||||||
gone_tokens = existing_tokens - word_tokens
|
gone_tokens = existing_tokens[internal] - word_tokens
|
||||||
|
if internal:
|
||||||
|
gone_tokens.update(existing_tokens[False] & word_tokens)
|
||||||
if gone_tokens:
|
if gone_tokens:
|
||||||
cur.execute("""DELETE FROM word
|
cur.execute("""DELETE FROM word
|
||||||
USING unnest(%s) as token
|
USING unnest(%s) as token
|
||||||
@@ -434,6 +432,23 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
|
|||||||
and word_token = token""",
|
and word_token = token""",
|
||||||
(list(gone_tokens), country_code))
|
(list(gone_tokens), country_code))
|
||||||
|
|
||||||
|
# Only add those names that are not yet in the list.
|
||||||
|
new_tokens = word_tokens - existing_tokens[True]
|
||||||
|
if not internal:
|
||||||
|
new_tokens -= existing_tokens[False]
|
||||||
|
if new_tokens:
|
||||||
|
if internal:
|
||||||
|
sql = """INSERT INTO word (word_token, type, word, info)
|
||||||
|
(SELECT token, 'C', %s, '{"internal": "yes"}'
|
||||||
|
FROM unnest(%s) as token)
|
||||||
|
"""
|
||||||
|
else:
|
||||||
|
sql = """INSERT INTO word (word_token, type, word)
|
||||||
|
(SELECT token, 'C', %s
|
||||||
|
FROM unnest(%s) as token)
|
||||||
|
"""
|
||||||
|
cur.execute(sql, (country_code, list(new_tokens)))
|
||||||
|
|
||||||
|
|
||||||
def process_place(self, place):
|
def process_place(self, place):
|
||||||
""" Determine tokenizer information about the given place.
|
""" Determine tokenizer information about the given place.
|
||||||
|
|||||||
@@ -61,70 +61,6 @@ Feature: Import into placex
|
|||||||
When importing
|
When importing
|
||||||
Then placex has no entry for R1
|
Then placex has no entry for R1
|
||||||
|
|
||||||
Scenario: search and address ranks for GB post codes correctly assigned
|
|
||||||
Given the places
|
|
||||||
| osm | class | type | postcode | geometry |
|
|
||||||
| N1 | place | postcode | E45 2CD | country:gb |
|
|
||||||
| N2 | place | postcode | E45 2 | country:gb |
|
|
||||||
| N3 | place | postcode | Y45 | country:gb |
|
|
||||||
When importing
|
|
||||||
Then placex contains
|
|
||||||
| object | addr+postcode | country_code | rank_search | rank_address |
|
|
||||||
| N1 | E45 2CD | gb | 25 | 5 |
|
|
||||||
| N2 | E45 2 | gb | 23 | 5 |
|
|
||||||
| N3 | Y45 | gb | 21 | 5 |
|
|
||||||
|
|
||||||
Scenario: wrongly formatted GB postcodes are down-ranked
|
|
||||||
Given the places
|
|
||||||
| osm | class | type | postcode | geometry |
|
|
||||||
| N1 | place | postcode | EA452CD | country:gb |
|
|
||||||
| N2 | place | postcode | E45 23 | country:gb |
|
|
||||||
When importing
|
|
||||||
Then placex contains
|
|
||||||
| object | country_code | rank_search | rank_address |
|
|
||||||
| N1 | gb | 30 | 30 |
|
|
||||||
| N2 | gb | 30 | 30 |
|
|
||||||
|
|
||||||
Scenario: search and address rank for DE postcodes correctly assigned
|
|
||||||
Given the places
|
|
||||||
| osm | class | type | postcode | geometry |
|
|
||||||
| N1 | place | postcode | 56427 | country:de |
|
|
||||||
| N2 | place | postcode | 5642 | country:de |
|
|
||||||
| N3 | place | postcode | 5642A | country:de |
|
|
||||||
| N4 | place | postcode | 564276 | country:de |
|
|
||||||
When importing
|
|
||||||
Then placex contains
|
|
||||||
| object | country_code | rank_search | rank_address |
|
|
||||||
| N1 | de | 21 | 11 |
|
|
||||||
| N2 | de | 30 | 30 |
|
|
||||||
| N3 | de | 30 | 30 |
|
|
||||||
| N4 | de | 30 | 30 |
|
|
||||||
|
|
||||||
Scenario: search and address rank for other postcodes are correctly assigned
|
|
||||||
Given the places
|
|
||||||
| osm | class | type | postcode | geometry |
|
|
||||||
| N1 | place | postcode | 1 | country:ca |
|
|
||||||
| N2 | place | postcode | X3 | country:ca |
|
|
||||||
| N3 | place | postcode | 543 | country:ca |
|
|
||||||
| N4 | place | postcode | 54dc | country:ca |
|
|
||||||
| N5 | place | postcode | 12345 | country:ca |
|
|
||||||
| N6 | place | postcode | 55TT667 | country:ca |
|
|
||||||
| N7 | place | postcode | 123-65 | country:ca |
|
|
||||||
| N8 | place | postcode | 12 445 4 | country:ca |
|
|
||||||
| N9 | place | postcode | A1:bc10 | country:ca |
|
|
||||||
When importing
|
|
||||||
Then placex contains
|
|
||||||
| object | country_code | rank_search | rank_address |
|
|
||||||
| N1 | ca | 21 | 11 |
|
|
||||||
| N2 | ca | 21 | 11 |
|
|
||||||
| N3 | ca | 21 | 11 |
|
|
||||||
| N4 | ca | 21 | 11 |
|
|
||||||
| N5 | ca | 21 | 11 |
|
|
||||||
| N6 | ca | 21 | 11 |
|
|
||||||
| N7 | ca | 25 | 11 |
|
|
||||||
| N8 | ca | 25 | 11 |
|
|
||||||
| N9 | ca | 25 | 11 |
|
|
||||||
|
|
||||||
Scenario: search and address ranks for boundaries are correctly assigned
|
Scenario: search and address ranks for boundaries are correctly assigned
|
||||||
Given the named places
|
Given the named places
|
||||||
| osm | class | type |
|
| osm | class | type |
|
||||||
|
|||||||
@@ -153,4 +153,70 @@ Feature: Import of postcodes
|
|||||||
When sending search query "E4 7EA"
|
When sending search query "E4 7EA"
|
||||||
Then results contain
|
Then results contain
|
||||||
| type | display_name |
|
| type | display_name |
|
||||||
| postcode | E4 7EA |
|
| postcode | E4 7EA |
|
||||||
|
|
||||||
|
Scenario: search and address ranks for GB post codes correctly assigned
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | postcode | geometry |
|
||||||
|
| N1 | place | postcode | E45 2CD | country:gb |
|
||||||
|
| N2 | place | postcode | E45 2 | country:gb |
|
||||||
|
| N3 | place | postcode | Y45 | country:gb |
|
||||||
|
When importing
|
||||||
|
Then location_postcode contains exactly
|
||||||
|
| postcode | country | rank_search | rank_address |
|
||||||
|
| E45 2CD | gb | 25 | 5 |
|
||||||
|
| E45 2 | gb | 23 | 5 |
|
||||||
|
| Y45 | gb | 21 | 5 |
|
||||||
|
|
||||||
|
Scenario: wrongly formatted GB postcodes are down-ranked
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | postcode | geometry |
|
||||||
|
| N1 | place | postcode | EA452CD | country:gb |
|
||||||
|
| N2 | place | postcode | E45 23 | country:gb |
|
||||||
|
When importing
|
||||||
|
Then location_postcode contains exactly
|
||||||
|
| postcode | country | rank_search | rank_address |
|
||||||
|
| EA452CD | gb | 30 | 30 |
|
||||||
|
| E45 23 | gb | 30 | 30 |
|
||||||
|
|
||||||
|
Scenario: search and address rank for DE postcodes correctly assigned
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | postcode | geometry |
|
||||||
|
| N1 | place | postcode | 56427 | country:de |
|
||||||
|
| N2 | place | postcode | 5642 | country:de |
|
||||||
|
| N3 | place | postcode | 5642A | country:de |
|
||||||
|
| N4 | place | postcode | 564276 | country:de |
|
||||||
|
When importing
|
||||||
|
Then location_postcode contains exactly
|
||||||
|
| postcode | country | rank_search | rank_address |
|
||||||
|
| 56427 | de | 21 | 11 |
|
||||||
|
| 5642 | de | 30 | 30 |
|
||||||
|
| 5642A | de | 30 | 30 |
|
||||||
|
| 564276 | de | 30 | 30 |
|
||||||
|
|
||||||
|
Scenario: search and address rank for other postcodes are correctly assigned
|
||||||
|
Given the places
|
||||||
|
| osm | class | type | postcode | geometry |
|
||||||
|
| N1 | place | postcode | 1 | country:ca |
|
||||||
|
| N2 | place | postcode | X3 | country:ca |
|
||||||
|
| N3 | place | postcode | 543 | country:ca |
|
||||||
|
| N4 | place | postcode | 54dc | country:ca |
|
||||||
|
| N5 | place | postcode | 12345 | country:ca |
|
||||||
|
| N6 | place | postcode | 55TT667 | country:ca |
|
||||||
|
| N7 | place | postcode | 123-65 | country:ca |
|
||||||
|
| N8 | place | postcode | 12 445 4 | country:ca |
|
||||||
|
| N9 | place | postcode | A1:bc10 | country:ca |
|
||||||
|
When importing
|
||||||
|
Then location_postcode contains exactly
|
||||||
|
| postcode | country | rank_search | rank_address |
|
||||||
|
| 1 | ca | 21 | 11 |
|
||||||
|
| X3 | ca | 21 | 11 |
|
||||||
|
| 543 | ca | 21 | 11 |
|
||||||
|
| 54DC | ca | 21 | 11 |
|
||||||
|
| 12345 | ca | 21 | 11 |
|
||||||
|
| 55TT667 | ca | 21 | 11 |
|
||||||
|
| 123-65 | ca | 25 | 11 |
|
||||||
|
| 12 445 4 | ca | 25 | 11 |
|
||||||
|
| A1:BC10 | ca | 25 | 11 |
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -16,7 +16,6 @@ Feature: Rank assignment
|
|||||||
| N18 | place | city | 0 0 |
|
| N18 | place | city | 0 0 |
|
||||||
| N19 | place | island | 0 0 |
|
| N19 | place | island | 0 0 |
|
||||||
| N36 | place | house | 0 0 |
|
| N36 | place | house | 0 0 |
|
||||||
| N38 | place | houses | 0 0 |
|
|
||||||
And the named places
|
And the named places
|
||||||
| osm | class | type | extra+capital | geometry |
|
| osm | class | type | extra+capital | geometry |
|
||||||
| N101 | place | city | yes | 0 0 |
|
| N101 | place | city | yes | 0 0 |
|
||||||
@@ -35,7 +34,6 @@ Feature: Rank assignment
|
|||||||
| N19 | 17 | 0 |
|
| N19 | 17 | 0 |
|
||||||
| N101 | 15 | 16 |
|
| N101 | 15 | 16 |
|
||||||
| N36 | 30 | 30 |
|
| N36 | 30 | 30 |
|
||||||
| N38 | 28 | 0 |
|
|
||||||
|
|
||||||
Scenario: Ranks for boundaries are assigned according to admin level
|
Scenario: Ranks for boundaries are assigned according to admin level
|
||||||
Given the named places
|
Given the named places
|
||||||
|
|||||||
@@ -164,8 +164,8 @@ Feature: Import and search of names
|
|||||||
|
|
||||||
Scenario: Unprintable characters in postcodes are ignored
|
Scenario: Unprintable characters in postcodes are ignored
|
||||||
Given the named places
|
Given the named places
|
||||||
| osm | class | type | address |
|
| osm | class | type | address | geometry |
|
||||||
| N234 | amenity | prison | 'postcode' : u'1234\u200e' |
|
| N234 | amenity | prison | 'postcode' : u'1234\u200e' | country:de |
|
||||||
When importing
|
When importing
|
||||||
And sending search query "1234"
|
And sending search query "1234"
|
||||||
Then result 0 has not attributes osm_type
|
Then result 0 has not attributes osm_type
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ Feature: Searching of simple objects
|
|||||||
| osm | class | type | postcode | geometry |
|
| osm | class | type | postcode | geometry |
|
||||||
| R1 | boundary | postal_code | 54321 | poly-area:1.0 |
|
| R1 | boundary | postal_code | 54321 | poly-area:1.0 |
|
||||||
And sending search query "12345"
|
And sending search query "12345"
|
||||||
Then result 0 has not attributes osm_type
|
Then exactly 0 results are returned
|
||||||
When sending search query "54321"
|
When sending search query "54321"
|
||||||
Then results contain
|
Then results contain
|
||||||
| ID | osm |
|
| ID | osm |
|
||||||
|
|||||||
@@ -63,9 +63,7 @@ Feature: Update of simple objects
|
|||||||
| osm | class | type | postcode | geometry |
|
| osm | class | type | postcode | geometry |
|
||||||
| N3 | place | postcode | 12345 | 1 -1 |
|
| N3 | place | postcode | 12345 | 1 -1 |
|
||||||
When importing
|
When importing
|
||||||
Then placex contains
|
Then placex has no entry for N3
|
||||||
| object | class | type |
|
|
||||||
| N3 | place | postcode |
|
|
||||||
When updating places
|
When updating places
|
||||||
| osm | class | type | postcode | housenr | geometry |
|
| osm | class | type | postcode | housenr | geometry |
|
||||||
| N3 | place | house | 12345 | 13 | 1 -1 |
|
| N3 | place | house | 12345 | 13 | 1 -1 |
|
||||||
|
|||||||
@@ -139,21 +139,3 @@ Feature: Update of relations by osm2pgsql
|
|||||||
Then place contains
|
Then place contains
|
||||||
| object | addr+country | name |
|
| object | addr+country | name |
|
||||||
| R1 | XX | 'name' : 'Foo' |
|
| R1 | XX | 'name' : 'Foo' |
|
||||||
|
|
||||||
Scenario: Country boundary names are extended when country_code known
|
|
||||||
When loading osm data
|
|
||||||
"""
|
|
||||||
n200 Tamenity=prison x0 y0
|
|
||||||
n201 x0 y0.0001
|
|
||||||
n202 x0.0001 y0.0001
|
|
||||||
n203 x0.0001 y0
|
|
||||||
"""
|
|
||||||
And updating osm data
|
|
||||||
"""
|
|
||||||
w1 Nn200,n201,n202,n203,n200
|
|
||||||
r1 Ttype=boundary,boundary=administrative,name=Foo,country_code=ch,admin_level=2 Mw1@
|
|
||||||
"""
|
|
||||||
Then place contains
|
|
||||||
| object | addr+country | name+name:de | name+name |
|
|
||||||
| R1 | ch | Schweiz | Foo |
|
|
||||||
|
|
||||||
|
|||||||
@@ -93,29 +93,8 @@ def add_data_to_planet_ways(context):
|
|||||||
def import_and_index_data_from_place_table(context):
|
def import_and_index_data_from_place_table(context):
|
||||||
""" Import data previously set up in the place table.
|
""" Import data previously set up in the place table.
|
||||||
"""
|
"""
|
||||||
nctx = context.nominatim
|
context.nominatim.run_nominatim('import', '--continue', 'load-data',
|
||||||
|
'--index-noanalyse', '-q')
|
||||||
tokenizer = tokenizer_factory.create_tokenizer(nctx.get_test_config())
|
|
||||||
context.nominatim.copy_from_place(context.db)
|
|
||||||
|
|
||||||
# XXX use tool function as soon as it is ported
|
|
||||||
with context.db.cursor() as cur:
|
|
||||||
with (context.nominatim.src_dir / 'lib-sql' / 'postcode_tables.sql').open('r') as fd:
|
|
||||||
cur.execute(fd.read())
|
|
||||||
cur.execute("""
|
|
||||||
INSERT INTO location_postcode
|
|
||||||
(place_id, indexed_status, country_code, postcode, geometry)
|
|
||||||
SELECT nextval('seq_place'), 1, country_code,
|
|
||||||
upper(trim (both ' ' from address->'postcode')) as pc,
|
|
||||||
ST_Centroid(ST_Collect(ST_Centroid(geometry)))
|
|
||||||
FROM placex
|
|
||||||
WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
|
|
||||||
AND geometry IS NOT null
|
|
||||||
GROUP BY country_code, pc""")
|
|
||||||
|
|
||||||
# Call directly as the refresh function does not include postcodes.
|
|
||||||
indexer.LOG.setLevel(logging.ERROR)
|
|
||||||
indexer.Indexer(context.nominatim.get_libpq_dsn(), tokenizer, 1).index_full(analyse=False)
|
|
||||||
|
|
||||||
check_database_integrity(context)
|
check_database_integrity(context)
|
||||||
|
|
||||||
@@ -268,7 +247,7 @@ def check_location_postcode(context):
|
|||||||
for row in context.table:
|
for row in context.table:
|
||||||
db_row = results.get((row['country'],row['postcode']))
|
db_row = results.get((row['country'],row['postcode']))
|
||||||
assert db_row is not None, \
|
assert db_row is not None, \
|
||||||
"Missing row for country '{r['country']}' postcode '{r['postcode']}'.".format(r=row)
|
f"Missing row for country '{row['country']}' postcode '{row['postcode']}'."
|
||||||
|
|
||||||
db_row.assert_row(row, ('country', 'postcode'))
|
db_row.assert_row(row, ('country', 'postcode'))
|
||||||
|
|
||||||
@@ -333,12 +312,13 @@ def check_place_addressline_exclude(context):
|
|||||||
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
with context.db.cursor(cursor_factory=psycopg2.extras.DictCursor) as cur:
|
||||||
for row in context.table:
|
for row in context.table:
|
||||||
pid = NominatimID(row['object']).get_place_id(cur)
|
pid = NominatimID(row['object']).get_place_id(cur)
|
||||||
apid = NominatimID(row['address']).get_place_id(cur)
|
apid = NominatimID(row['address']).get_place_id(cur, allow_empty=True)
|
||||||
cur.execute(""" SELECT * FROM place_addressline
|
if apid is not None:
|
||||||
WHERE place_id = %s AND address_place_id = %s""",
|
cur.execute(""" SELECT * FROM place_addressline
|
||||||
(pid, apid))
|
WHERE place_id = %s AND address_place_id = %s""",
|
||||||
assert cur.rowcount == 0, \
|
(pid, apid))
|
||||||
"Row found for place %s and address %s" % (row['object'], row['address'])
|
assert cur.rowcount == 0, \
|
||||||
|
"Row found for place %s and address %s" % (row['object'], row['address'])
|
||||||
|
|
||||||
@then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
|
@then("W(?P<oid>\d+) expands to(?P<neg> no)? interpolation")
|
||||||
def check_location_property_osmline(context, oid, neg):
|
def check_location_property_osmline(context, oid, neg):
|
||||||
|
|||||||
@@ -62,11 +62,14 @@ class NominatimID:
|
|||||||
','.join(['*'] + (extra_columns or [])), table)
|
','.join(['*'] + (extra_columns or [])), table)
|
||||||
cur.execute(query, (pid, ))
|
cur.execute(query, (pid, ))
|
||||||
|
|
||||||
def get_place_id(self, cur):
|
def get_place_id(self, cur, allow_empty=False):
|
||||||
""" Look up the place id for the ID. Throws an assertion if the ID
|
""" Look up the place id for the ID. Throws an assertion if the ID
|
||||||
is not unique.
|
is not unique.
|
||||||
"""
|
"""
|
||||||
self.query_osm_id(cur, "SELECT place_id FROM placex WHERE {}")
|
self.query_osm_id(cur, "SELECT place_id FROM placex WHERE {}")
|
||||||
|
if cur.rowcount == 0 and allow_empty:
|
||||||
|
return None
|
||||||
|
|
||||||
assert cur.rowcount == 1, \
|
assert cur.rowcount == 1, \
|
||||||
"Place ID {!s} not unique. Found {} entries.".format(self, cur.rowcount)
|
"Place ID {!s} not unique. Found {} entries.".format(self, cur.rowcount)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user