mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 11:08:13 +00:00
new word table layout for icu tokenizer
The table now directly reflects the different token types. Extra information is saved in a json structure that may be dynamically extended in the future without affecting the table layout.
This commit is contained in:
15
lib-sql/tokenizer/icu_tokenizer_tables.sql
Normal file
15
lib-sql/tokenizer/icu_tokenizer_tables.sql
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
DROP TABLE IF EXISTS word;
|
||||||
|
CREATE TABLE word_icu (
|
||||||
|
word_id INTEGER,
|
||||||
|
word_token text NOT NULL,
|
||||||
|
type text NOT NULL,
|
||||||
|
info jsonb
|
||||||
|
) {{db.tablespace.search_data}};
|
||||||
|
|
||||||
|
CREATE INDEX idx_word_word_token ON word
|
||||||
|
USING BTREE (word_token) {{db.tablespace.search_index}};
|
||||||
|
GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}";
|
||||||
|
|
||||||
|
DROP SEQUENCE IF EXISTS seq_word;
|
||||||
|
CREATE SEQUENCE seq_word start 1;
|
||||||
|
GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}";
|
||||||
@@ -152,7 +152,7 @@ class LegacyICUTokenizer:
|
|||||||
"""
|
"""
|
||||||
with connect(self.dsn) as conn:
|
with connect(self.dsn) as conn:
|
||||||
sqlp = SQLPreprocessor(conn, config)
|
sqlp = SQLPreprocessor(conn, config)
|
||||||
sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_tables.sql')
|
sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer_tables.sql')
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
LOG.warning("Precomputing word tokens")
|
LOG.warning("Precomputing word tokens")
|
||||||
|
|||||||
Reference in New Issue
Block a user