mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
switch postcode tokens to new word table layout
This commit is contained in:
@@ -147,7 +147,7 @@ class Tokenizer
|
|||||||
{
|
{
|
||||||
// Check which tokens we have, get the ID numbers
|
// Check which tokens we have, get the ID numbers
|
||||||
$sSQL = 'SELECT word_id, word_token, type';
|
$sSQL = 'SELECT word_id, word_token, type';
|
||||||
$sSQL .= " info->>'cc' as country";
|
$sSQL .= " info->>'cc' as country, info->>'postcode' as postcode";
|
||||||
$sSQL .= ' FROM word WHERE word_token in (';
|
$sSQL .= ' FROM word WHERE word_token in (';
|
||||||
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
|
$sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')';
|
||||||
|
|
||||||
@@ -171,6 +171,16 @@ class Tokenizer
|
|||||||
'H': // house number tokens
|
'H': // house number tokens
|
||||||
$oToken = new Token\HouseNumber($iId, $aWord['word_token']);
|
$oToken = new Token\HouseNumber($iId, $aWord['word_token']);
|
||||||
break;
|
break;
|
||||||
|
'P': // postcode tokens
|
||||||
|
// Postcodes are not normalized, so they may have content
|
||||||
|
// that makes SQL injection possible. Reject postcodes
|
||||||
|
// that would need special escaping.
|
||||||
|
if ($aWord['postcode'] === null
|
||||||
|
|| pg_escape_string($aWord['postcode']) == $aWord['postcode']
|
||||||
|
) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
$oToken = new Token\Postcode($iId, $aWord['postcode'], null);
|
||||||
default:
|
default:
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -10,7 +10,12 @@ CREATE INDEX idx_word_word_token ON word
|
|||||||
USING BTREE (word_token) {{db.tablespace.search_index}};
|
USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||||
-- Used when updating country names from the boundary relation.
|
-- Used when updating country names from the boundary relation.
|
||||||
CREATE INDEX idx_word_country_names ON word
|
CREATE INDEX idx_word_country_names ON word
|
||||||
USING btree((info->>'cc')) WHERE type = 'C';
|
USING btree((info->>'cc')) {{db.tablespace.address_index}}
|
||||||
|
WHERE type = 'C';
|
||||||
|
-- Used when inserting new postcodes on updates.
|
||||||
|
CREATE INDEX idx_word_postcodes ON word
|
||||||
|
USING btree((info->>'postcode')) {{db.tablespace.address_index}}
|
||||||
|
WHERE type = 'P'
|
||||||
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||||
|
|
||||||
DROP SEQUENCE IF EXISTS seq_word;
|
DROP SEQUENCE IF EXISTS seq_word;
|
||||||
|
|||||||
@@ -276,8 +276,7 @@ class LegacyICUNameAnalyzer:
|
|||||||
(SELECT pc, word FROM
|
(SELECT pc, word FROM
|
||||||
(SELECT distinct(postcode) as pc FROM location_postcode) p
|
(SELECT distinct(postcode) as pc FROM location_postcode) p
|
||||||
FULL JOIN
|
FULL JOIN
|
||||||
(SELECT word FROM word
|
(SELECT info->>'postcode' as word FROM word WHERE type = 'P') w
|
||||||
WHERE class ='place' and type = 'postcode') w
|
|
||||||
ON pc = word) x
|
ON pc = word) x
|
||||||
WHERE pc is null or word is null""")
|
WHERE pc is null or word is null""")
|
||||||
|
|
||||||
@@ -286,20 +285,16 @@ class LegacyICUNameAnalyzer:
|
|||||||
if postcode is None:
|
if postcode is None:
|
||||||
to_delete.append(word)
|
to_delete.append(word)
|
||||||
else:
|
else:
|
||||||
copystr.add(
|
copystr.add(self.name_processor.get_search_normalized(postcode),
|
||||||
postcode,
|
'P', {'postcode': postcode})
|
||||||
' ' + self.name_processor.get_search_normalized(postcode),
|
|
||||||
'place', 'postcode', 0)
|
|
||||||
|
|
||||||
if to_delete:
|
if to_delete:
|
||||||
cur.execute("""DELETE FROM WORD
|
cur.execute("""DELETE FROM WORD
|
||||||
WHERE class ='place' and type = 'postcode'
|
WHERE class ='P' and info->>'postcode' = any(%s)
|
||||||
and word = any(%s)
|
|
||||||
""", (to_delete, ))
|
""", (to_delete, ))
|
||||||
|
|
||||||
copystr.copy_out(cur, 'word',
|
copystr.copy_out(cur, 'word',
|
||||||
columns=['word', 'word_token', 'class', 'type',
|
columns=['word_token', 'type', 'info'])
|
||||||
'search_name_count'])
|
|
||||||
|
|
||||||
|
|
||||||
def update_special_phrases(self, phrases, should_replace):
|
def update_special_phrases(self, phrases, should_replace):
|
||||||
@@ -503,14 +498,13 @@ class LegacyICUNameAnalyzer:
|
|||||||
|
|
||||||
with self.conn.cursor() as cur:
|
with self.conn.cursor() as cur:
|
||||||
# no word_id needed for postcodes
|
# no word_id needed for postcodes
|
||||||
cur.execute("""INSERT INTO word (word, word_token, class, type,
|
cur.execute("""INSERT INTO word (word_token, type, info)
|
||||||
search_name_count)
|
(SELECT %s, 'P', json_build_object('postcode', pc)
|
||||||
(SELECT pc, %s, 'place', 'postcode', 0
|
|
||||||
FROM (VALUES (%s)) as v(pc)
|
FROM (VALUES (%s)) as v(pc)
|
||||||
WHERE NOT EXISTS
|
WHERE NOT EXISTS
|
||||||
(SELECT * FROM word
|
(SELECT * FROM word
|
||||||
WHERE word = pc and class='place' and type='postcode'))
|
WHERE type = 'P' and info->>postcode = pc))
|
||||||
""", (' ' + term, postcode))
|
""", (term, postcode))
|
||||||
self._cache.postcodes.add(postcode)
|
self._cache.postcodes.add(postcode)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user