mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-07 10:34:08 +00:00
port legacy tokenizer to new postcode handling
Also documents the changes to the SQL functions of the tokenizer.
This commit is contained in:
@@ -245,11 +245,11 @@ Currently, tokenizers are encouraged to make sure that matching works against
|
|||||||
both the search token list and the match token list.
|
both the search token list and the match token list.
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
FUNCTION token_normalized_postcode(postcode TEXT) RETURNS TEXT
|
FUNCTION token_get_postcode(info JSONB) RETURNS TEXT
|
||||||
```
|
```
|
||||||
|
|
||||||
Return the normalized version of the given postcode. This function must return
|
Return the postcode for the object, if any exists. The postcode must be in
|
||||||
the same value as the Python function `AbstractAnalyzer->normalize_postcode()`.
|
the form that should also be presented to the end-user.
|
||||||
|
|
||||||
```sql
|
```sql
|
||||||
FUNCTION token_strip_info(info JSONB) RETURNS JSONB
|
FUNCTION token_strip_info(info JSONB) RETURNS JSONB
|
||||||
|
|||||||
@@ -97,13 +97,6 @@ AS $$
|
|||||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
|
||||||
RETURNS TEXT
|
|
||||||
AS $$
|
|
||||||
SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
|
|
||||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION token_get_postcode(info JSONB)
|
CREATE OR REPLACE FUNCTION token_get_postcode(info JSONB)
|
||||||
RETURNS TEXT
|
RETURNS TEXT
|
||||||
AS $$
|
AS $$
|
||||||
|
|||||||
@@ -97,10 +97,10 @@ AS $$
|
|||||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
|
||||||
CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
|
CREATE OR REPLACE FUNCTION token_get_postcode(info JSONB)
|
||||||
RETURNS TEXT
|
RETURNS TEXT
|
||||||
AS $$
|
AS $$
|
||||||
SELECT CASE WHEN postcode SIMILAR TO '%(,|;)%' THEN NULL ELSE upper(trim(postcode))END;
|
SELECT info->>'postcode';
|
||||||
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
$$ LANGUAGE SQL IMMUTABLE STRICT;
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -467,8 +467,9 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
|
|||||||
if key == 'postcode':
|
if key == 'postcode':
|
||||||
# Make sure the normalized postcode is present in the word table.
|
# Make sure the normalized postcode is present in the word table.
|
||||||
if re.search(r'[:,;]', value) is None:
|
if re.search(r'[:,;]', value) is None:
|
||||||
self._cache.add_postcode(self.conn,
|
norm_pc = self.normalize_postcode(value)
|
||||||
self.normalize_postcode(value))
|
token_info.set_postcode(norm_pc)
|
||||||
|
self._cache.add_postcode(self.conn, norm_pc)
|
||||||
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'):
|
||||||
hnrs.append(value)
|
hnrs.append(value)
|
||||||
elif key == 'street':
|
elif key == 'street':
|
||||||
@@ -527,6 +528,11 @@ class _TokenInfo:
|
|||||||
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
|
self.data['hnr_tokens'], self.data['hnr'] = cur.fetchone()
|
||||||
|
|
||||||
|
|
||||||
|
def set_postcode(self, postcode):
|
||||||
|
""" Set or replace the postcode token with the given value.
|
||||||
|
"""
|
||||||
|
self.data['postcode'] = postcode
|
||||||
|
|
||||||
def add_street(self, conn, street):
|
def add_street(self, conn, street):
|
||||||
""" Add addr:street match terms.
|
""" Add addr:street match terms.
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -186,17 +186,17 @@ def update_postcodes(dsn, project_dir, tokenizer):
|
|||||||
# Recompute the list of valid postcodes from placex.
|
# Recompute the list of valid postcodes from placex.
|
||||||
with conn.cursor(name="placex_postcodes") as cur:
|
with conn.cursor(name="placex_postcodes") as cur:
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
|
SELECT cc, pc, ST_X(centroid), ST_Y(centroid)
|
||||||
FROM (SELECT
|
FROM (SELECT
|
||||||
COALESCE(plx.country_code,
|
COALESCE(plx.country_code,
|
||||||
get_country_code(ST_Centroid(pl.geometry))) as cc,
|
get_country_code(ST_Centroid(pl.geometry))) as cc,
|
||||||
token_normalized_postcode(pl.address->'postcode') as pc,
|
pl.address->'postcode' as pc,
|
||||||
COALESCE(plx.centroid, ST_Centroid(pl.geometry)) as centroid
|
COALESCE(plx.centroid, ST_Centroid(pl.geometry)) as centroid
|
||||||
FROM place AS pl LEFT OUTER JOIN placex AS plx
|
FROM place AS pl LEFT OUTER JOIN placex AS plx
|
||||||
ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
|
ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
|
||||||
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null) xx
|
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null) xx
|
||||||
WHERE pc IS NOT null AND cc IS NOT null
|
WHERE pc IS NOT null AND cc IS NOT null
|
||||||
ORDER BY country_code, pc""")
|
ORDER BY cc, pc""")
|
||||||
|
|
||||||
collector = None
|
collector = None
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user