mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
ignore Unicode format characters for normalization
Also adds tests. Fixes #1007.
This commit is contained in:
@@ -20,7 +20,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
|
|||||||
// Rules for normalizing terms for comparison before doing comparisons.
|
// Rules for normalizing terms for comparison before doing comparisons.
|
||||||
// The default is to remove accents and punctuation and to lower-case the
|
// The default is to remove accents and punctuation and to lower-case the
|
||||||
// term. Spaces are kept but collapsed to one standard space.
|
// term. Spaces are kept but collapsed to one standard space.
|
||||||
@define('CONST_Term_Normalization_Rules', ":: NFD (); [:Nonspacing Mark:] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
|
@define('CONST_Term_Normalization_Rules', ":: NFD (); [[:Nonspacing Mark:] [:Cf:]] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
|
||||||
|
|
||||||
// Set to false to avoid importing extra postcodes for the US.
|
// Set to false to avoid importing extra postcodes for the US.
|
||||||
@define('CONST_Use_Extra_US_Postcodes', true);
|
@define('CONST_Use_Extra_US_Postcodes', true);
|
||||||
|
|||||||
@@ -95,7 +95,6 @@ Feature: Import of postcodes
|
|||||||
| object | postcode |
|
| object | postcode |
|
||||||
| W93 | 445023 |
|
| W93 | 445023 |
|
||||||
|
|
||||||
@wip
|
|
||||||
Scenario: Postcodes from admin boundaries are preferred over estimated postcodes
|
Scenario: Postcodes from admin boundaries are preferred over estimated postcodes
|
||||||
Given the scene admin-areas
|
Given the scene admin-areas
|
||||||
And the named places
|
And the named places
|
||||||
|
|||||||
@@ -136,3 +136,13 @@ Feature: Import and search of names
|
|||||||
Then results contain
|
Then results contain
|
||||||
| ID | osm_type | osm_id |
|
| ID | osm_type | osm_id |
|
||||||
| 0 | R | 1 |
|
| 0 | R | 1 |
|
||||||
|
|
||||||
|
Scenario: Unprintable characters in postcodes are ignored
|
||||||
|
Given the named places
|
||||||
|
| osm | class | type | address |
|
||||||
|
| N234 | amenity | prison | 'postcode' : u'1234\u200e' |
|
||||||
|
When importing
|
||||||
|
And searching for "1234"
|
||||||
|
Then results contain
|
||||||
|
| ID | osm_type |
|
||||||
|
| 0 | P |
|
||||||
|
|||||||
@@ -96,6 +96,15 @@ Feature: Tag evaluation
|
|||||||
| N3 | 'name: de' : 'Foo', 'name:\\\\' : 'real3' |
|
| N3 | 'name: de' : 'Foo', 'name:\\\\' : 'real3' |
|
||||||
| N4 | 'name: de' : 'Foo', 'name' : 'rea\\l3' |
|
| N4 | 'name: de' : 'Foo', 'name' : 'rea\\l3' |
|
||||||
|
|
||||||
|
Scenario: Unprintable character in address tag are maintained
|
||||||
|
When loading osm data
|
||||||
|
"""
|
||||||
|
n23 Tamenity=yes,name=foo,addr:postcode=1234%200e%
|
||||||
|
"""
|
||||||
|
Then place contains
|
||||||
|
| object | address |
|
||||||
|
| N23 | 'postcode' : u'1234\u200e' |
|
||||||
|
|
||||||
Scenario Outline: Included places
|
Scenario Outline: Included places
|
||||||
When loading osm data
|
When loading osm data
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -22,6 +22,8 @@ class PlaceColumn:
|
|||||||
self.add_hstore('extratags', key[6:], value)
|
self.add_hstore('extratags', key[6:], value)
|
||||||
elif key.startswith('addr+'):
|
elif key.startswith('addr+'):
|
||||||
self.add_hstore('address', key[5:], value)
|
self.add_hstore('address', key[5:], value)
|
||||||
|
elif key in ('name', 'address', 'extratags'):
|
||||||
|
self.columns[key] = eval('{' + value + '}')
|
||||||
else:
|
else:
|
||||||
assert_in(key, ('class', 'type'))
|
assert_in(key, ('class', 'type'))
|
||||||
self.columns[key] = None if value == '' else value
|
self.columns[key] = None if value == '' else value
|
||||||
|
|||||||
Reference in New Issue
Block a user