mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-16 15:47:58 +00:00
Merge pull request #2393 from lonvia/fix-flake8-issues
Fix flake8 issues
This commit is contained in:
@@ -103,7 +103,7 @@ class CommandlineParser:
|
|||||||
return 1
|
return 1
|
||||||
|
|
||||||
|
|
||||||
##### Subcommand classes
|
# Subcommand classes
|
||||||
#
|
#
|
||||||
# Each class needs to implement two functions: add_args() adds the CLI parameters
|
# Each class needs to implement two functions: add_args() adds the CLI parameters
|
||||||
# for the subfunction, run() executes the subcommand.
|
# for the subfunction, run() executes the subcommand.
|
||||||
|
|||||||
@@ -90,7 +90,7 @@ class APISearch:
|
|||||||
if args.query:
|
if args.query:
|
||||||
params = dict(q=args.query)
|
params = dict(q=args.query)
|
||||||
else:
|
else:
|
||||||
params = {k : getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
|
params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)}
|
||||||
|
|
||||||
for param, _ in EXTRADATA_PARAMS:
|
for param, _ in EXTRADATA_PARAMS:
|
||||||
if getattr(args, param):
|
if getattr(args, param):
|
||||||
|
|||||||
@@ -24,4 +24,4 @@ class NominatimArgs:
|
|||||||
main_data=self.config.TABLESPACE_PLACE_DATA,
|
main_data=self.config.TABLESPACE_PLACE_DATA,
|
||||||
main_index=self.config.TABLESPACE_PLACE_INDEX
|
main_index=self.config.TABLESPACE_PLACE_INDEX
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ class UpdateRefresh:
|
|||||||
args.threads or 1)
|
args.threads or 1)
|
||||||
indexer.index_postcodes()
|
indexer.index_postcodes()
|
||||||
else:
|
else:
|
||||||
LOG.error("The place table doesn\'t exist. " \
|
LOG.error("The place table doesn't exist. "
|
||||||
"Postcode updates on a frozen database is not possible.")
|
"Postcode updates on a frozen database is not possible.")
|
||||||
|
|
||||||
if args.word_counts:
|
if args.word_counts:
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ class UpdateReplication:
|
|||||||
indexed_only=not args.once)
|
indexed_only=not args.once)
|
||||||
|
|
||||||
# Sanity check to not overwhelm the Geofabrik servers.
|
# Sanity check to not overwhelm the Geofabrik servers.
|
||||||
if 'download.geofabrik.de'in params['base_url']\
|
if 'download.geofabrik.de' in params['base_url']\
|
||||||
and params['update_interval'] < 86400:
|
and params['update_interval'] < 86400:
|
||||||
LOG.fatal("Update interval too low for download.geofabrik.de.\n"
|
LOG.fatal("Update interval too low for download.geofabrik.de.\n"
|
||||||
"Please check install documentation "
|
"Please check install documentation "
|
||||||
|
|||||||
@@ -85,7 +85,7 @@ class DBConnection:
|
|||||||
|
|
||||||
# Use a dict to hand in the parameters because async is a reserved
|
# Use a dict to hand in the parameters because async is a reserved
|
||||||
# word in Python3.
|
# word in Python3.
|
||||||
self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True})
|
self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True})
|
||||||
self.wait()
|
self.wait()
|
||||||
|
|
||||||
self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
|
self.cursor = self.conn.cursor(cursor_factory=cursor_factory)
|
||||||
|
|||||||
@@ -8,6 +8,7 @@ import os
|
|||||||
import psycopg2
|
import psycopg2
|
||||||
import psycopg2.extensions
|
import psycopg2.extensions
|
||||||
import psycopg2.extras
|
import psycopg2.extras
|
||||||
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
from nominatim.errors import UsageError
|
from nominatim.errors import UsageError
|
||||||
|
|
||||||
@@ -25,6 +26,16 @@ class _Cursor(psycopg2.extras.DictCursor):
|
|||||||
|
|
||||||
super().execute(query, args)
|
super().execute(query, args)
|
||||||
|
|
||||||
|
|
||||||
|
def execute_values(self, sql, argslist, template=None):
|
||||||
|
""" Wrapper for the psycopg2 convenience function to execute
|
||||||
|
SQL for a list of values.
|
||||||
|
"""
|
||||||
|
LOG.debug("SQL execute_values(%s, %s)", sql, argslist)
|
||||||
|
|
||||||
|
psycopg2.extras.execute_values(self, sql, argslist, template=template)
|
||||||
|
|
||||||
|
|
||||||
def scalar(self, sql, args=None):
|
def scalar(self, sql, args=None):
|
||||||
""" Execute query that returns a single value. The value is returned.
|
""" Execute query that returns a single value. The value is returned.
|
||||||
If the query yields more than one row, a ValueError is raised.
|
If the query yields more than one row, a ValueError is raised.
|
||||||
@@ -37,6 +48,22 @@ class _Cursor(psycopg2.extras.DictCursor):
|
|||||||
return self.fetchone()[0]
|
return self.fetchone()[0]
|
||||||
|
|
||||||
|
|
||||||
|
def drop_table(self, name, if_exists=True, cascade=False):
|
||||||
|
""" Drop the table with the given name.
|
||||||
|
Set `if_exists` to False if a non-existant table should raise
|
||||||
|
an exception instead of just being ignored. If 'cascade' is set
|
||||||
|
to True then all dependent tables are deleted as well.
|
||||||
|
"""
|
||||||
|
sql = 'DROP TABLE '
|
||||||
|
if if_exists:
|
||||||
|
sql += 'IF EXISTS '
|
||||||
|
sql += '{}'
|
||||||
|
if cascade:
|
||||||
|
sql += ' CASCADE'
|
||||||
|
|
||||||
|
self.execute(pysql.SQL(sql).format(pysql.Identifier(name)))
|
||||||
|
|
||||||
|
|
||||||
class _Connection(psycopg2.extensions.connection):
|
class _Connection(psycopg2.extensions.connection):
|
||||||
""" A connection that provides the specialised cursor by default and
|
""" A connection that provides the specialised cursor by default and
|
||||||
adds convenience functions for administrating the database.
|
adds convenience functions for administrating the database.
|
||||||
@@ -75,14 +102,13 @@ class _Connection(psycopg2.extensions.connection):
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
def drop_table(self, name, if_exists=True):
|
def drop_table(self, name, if_exists=True, cascade=False):
|
||||||
""" Drop the table with the given name.
|
""" Drop the table with the given name.
|
||||||
Set `if_exists` to False if a non-existant table should raise
|
Set `if_exists` to False if a non-existant table should raise
|
||||||
an exception instead of just being ignored.
|
an exception instead of just being ignored.
|
||||||
"""
|
"""
|
||||||
with self.cursor() as cur:
|
with self.cursor() as cur:
|
||||||
cur.execute("""DROP TABLE {} "{}"
|
cur.drop_table(name, if_exists, cascade)
|
||||||
""".format('IF EXISTS' if if_exists else '', name))
|
|
||||||
self.commit()
|
self.commit()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -61,7 +61,7 @@ def _setup_postgresql_features(conn):
|
|||||||
"""
|
"""
|
||||||
pg_version = conn.server_version_tuple()
|
pg_version = conn.server_version_tuple()
|
||||||
return {
|
return {
|
||||||
'has_index_non_key_column' : pg_version >= (11, 0, 0)
|
'has_index_non_key_column': pg_version >= (11, 0, 0)
|
||||||
}
|
}
|
||||||
|
|
||||||
class SQLPreprocessor:
|
class SQLPreprocessor:
|
||||||
|
|||||||
@@ -61,9 +61,9 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None)
|
|||||||
|
|
||||||
|
|
||||||
# List of characters that need to be quoted for the copy command.
|
# List of characters that need to be quoted for the copy command.
|
||||||
_SQL_TRANSLATION = {ord(u'\\') : u'\\\\',
|
_SQL_TRANSLATION = {ord(u'\\'): u'\\\\',
|
||||||
ord(u'\t') : u'\\t',
|
ord(u'\t'): u'\\t',
|
||||||
ord(u'\n') : u'\\n'}
|
ord(u'\n'): u'\\n'}
|
||||||
|
|
||||||
class CopyBuffer:
|
class CopyBuffer:
|
||||||
""" Data collector for the copy_from command.
|
""" Data collector for the copy_from command.
|
||||||
|
|||||||
@@ -203,7 +203,7 @@ class Indexer:
|
|||||||
|
|
||||||
# And insert the curent batch
|
# And insert the curent batch
|
||||||
for idx in range(0, len(places), batch):
|
for idx in range(0, len(places), batch):
|
||||||
part = places[idx:idx+batch]
|
part = places[idx:idx + batch]
|
||||||
LOG.debug("Processing places: %s", str(part))
|
LOG.debug("Processing places: %s", str(part))
|
||||||
runner.index_places(pool.next_free_worker(), part)
|
runner.index_places(pool.next_free_worker(), part)
|
||||||
progress.add(len(part))
|
progress.add(len(part))
|
||||||
|
|||||||
@@ -63,7 +63,7 @@ class ProgressLogger:
|
|||||||
places_per_sec = self.done_places
|
places_per_sec = self.done_places
|
||||||
else:
|
else:
|
||||||
diff_seconds = (rank_end_time - self.rank_start_time).total_seconds()
|
diff_seconds = (rank_end_time - self.rank_start_time).total_seconds()
|
||||||
places_per_sec = self.done_places/diff_seconds
|
places_per_sec = self.done_places / diff_seconds
|
||||||
|
|
||||||
LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
|
LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n",
|
||||||
self.done_places, self.total_places, int(diff_seconds),
|
self.done_places, self.total_places, int(diff_seconds),
|
||||||
|
|||||||
@@ -5,13 +5,17 @@ tasks.
|
|||||||
import functools
|
import functools
|
||||||
|
|
||||||
import psycopg2.extras
|
import psycopg2.extras
|
||||||
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
# pylint: disable=C0111
|
# pylint: disable=C0111
|
||||||
|
|
||||||
|
def _mk_valuelist(template, num):
|
||||||
|
return pysql.SQL(',').join([pysql.SQL(template)] * num)
|
||||||
|
|
||||||
class AbstractPlacexRunner:
|
class AbstractPlacexRunner:
|
||||||
""" Returns SQL commands for indexing of the placex table.
|
""" Returns SQL commands for indexing of the placex table.
|
||||||
"""
|
"""
|
||||||
SELECT_SQL = 'SELECT place_id FROM placex'
|
SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ')
|
||||||
|
|
||||||
def __init__(self, rank, analyzer):
|
def __init__(self, rank, analyzer):
|
||||||
self.rank = rank
|
self.rank = rank
|
||||||
@@ -21,11 +25,12 @@ class AbstractPlacexRunner:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
@functools.lru_cache(maxsize=1)
|
@functools.lru_cache(maxsize=1)
|
||||||
def _index_sql(num_places):
|
def _index_sql(num_places):
|
||||||
return """ UPDATE placex
|
return pysql.SQL(
|
||||||
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
""" UPDATE placex
|
||||||
FROM (VALUES {}) as v(id, addr, ti)
|
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
||||||
WHERE place_id = v.id
|
FROM (VALUES {}) as v(id, addr, ti)
|
||||||
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
|
WHERE place_id = v.id
|
||||||
|
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner):
|
|||||||
return "rank {}".format(self.rank)
|
return "rank {}".format(self.rank)
|
||||||
|
|
||||||
def sql_count_objects(self):
|
def sql_count_objects(self):
|
||||||
return """SELECT count(*) FROM placex
|
return pysql.SQL("""SELECT count(*) FROM placex
|
||||||
WHERE rank_address = {} and indexed_status > 0
|
WHERE rank_address = {} and indexed_status > 0
|
||||||
""".format(self.rank)
|
""").format(pysql.Literal(self.rank))
|
||||||
|
|
||||||
def sql_get_objects(self):
|
def sql_get_objects(self):
|
||||||
return """{} WHERE indexed_status > 0 and rank_address = {}
|
return self.SELECT_SQL + pysql.SQL(
|
||||||
ORDER BY geometry_sector
|
"""WHERE indexed_status > 0 and rank_address = {}
|
||||||
""".format(self.SELECT_SQL, self.rank)
|
ORDER BY geometry_sector
|
||||||
|
""").format(pysql.Literal(self.rank))
|
||||||
|
|
||||||
|
|
||||||
class BoundaryRunner(AbstractPlacexRunner):
|
class BoundaryRunner(AbstractPlacexRunner):
|
||||||
@@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner):
|
|||||||
return "boundaries rank {}".format(self.rank)
|
return "boundaries rank {}".format(self.rank)
|
||||||
|
|
||||||
def sql_count_objects(self):
|
def sql_count_objects(self):
|
||||||
return """SELECT count(*) FROM placex
|
return pysql.SQL("""SELECT count(*) FROM placex
|
||||||
WHERE indexed_status > 0
|
WHERE indexed_status > 0
|
||||||
AND rank_search = {}
|
AND rank_search = {}
|
||||||
AND class = 'boundary' and type = 'administrative'
|
AND class = 'boundary' and type = 'administrative'
|
||||||
""".format(self.rank)
|
""").format(pysql.Literal(self.rank))
|
||||||
|
|
||||||
def sql_get_objects(self):
|
def sql_get_objects(self):
|
||||||
return """{} WHERE indexed_status > 0 and rank_search = {}
|
return self.SELECT_SQL + pysql.SQL(
|
||||||
and class = 'boundary' and type = 'administrative'
|
"""WHERE indexed_status > 0 and rank_search = {}
|
||||||
ORDER BY partition, admin_level
|
and class = 'boundary' and type = 'administrative'
|
||||||
""".format(self.SELECT_SQL, self.rank)
|
ORDER BY partition, admin_level
|
||||||
|
""").format(pysql.Literal(self.rank))
|
||||||
|
|
||||||
|
|
||||||
class InterpolationRunner:
|
class InterpolationRunner:
|
||||||
@@ -120,11 +127,11 @@ class InterpolationRunner:
|
|||||||
@staticmethod
|
@staticmethod
|
||||||
@functools.lru_cache(maxsize=1)
|
@functools.lru_cache(maxsize=1)
|
||||||
def _index_sql(num_places):
|
def _index_sql(num_places):
|
||||||
return """ UPDATE location_property_osmline
|
return pysql.SQL("""UPDATE location_property_osmline
|
||||||
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
SET indexed_status = 0, address = v.addr, token_info = v.ti
|
||||||
FROM (VALUES {}) as v(id, addr, ti)
|
FROM (VALUES {}) as v(id, addr, ti)
|
||||||
WHERE place_id = v.id
|
WHERE place_id = v.id
|
||||||
""".format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places))
|
""").format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places))
|
||||||
|
|
||||||
|
|
||||||
def index_places(self, worker, places):
|
def index_places(self, worker, places):
|
||||||
@@ -157,6 +164,6 @@ class PostcodeRunner:
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def index_places(worker, ids):
|
def index_places(worker, ids):
|
||||||
worker.perform(""" UPDATE location_postcode SET indexed_status = 0
|
worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0
|
||||||
WHERE place_id IN ({})
|
WHERE place_id IN ({})""")
|
||||||
""".format(','.join((str(i[0]) for i in ids))))
|
.format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids))))
|
||||||
|
|||||||
@@ -9,8 +9,6 @@ import re
|
|||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import psycopg2.extras
|
|
||||||
|
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
from nominatim.db.properties import set_property, get_property
|
from nominatim.db.properties import set_property, get_property
|
||||||
from nominatim.db.utils import CopyBuffer
|
from nominatim.db.utils import CopyBuffer
|
||||||
@@ -341,7 +339,7 @@ class LegacyICUNameAnalyzer:
|
|||||||
term = self.name_processor.get_search_normalized(word)
|
term = self.name_processor.get_search_normalized(word)
|
||||||
if term:
|
if term:
|
||||||
copystr.add(word, ' ' + term, cls, typ,
|
copystr.add(word, ' ' + term, cls, typ,
|
||||||
oper if oper in ('in', 'near') else None, 0)
|
oper if oper in ('in', 'near') else None, 0)
|
||||||
added += 1
|
added += 1
|
||||||
|
|
||||||
copystr.copy_out(cursor, 'word',
|
copystr.copy_out(cursor, 'word',
|
||||||
@@ -359,8 +357,7 @@ class LegacyICUNameAnalyzer:
|
|||||||
to_delete = existing_phrases - new_phrases
|
to_delete = existing_phrases - new_phrases
|
||||||
|
|
||||||
if to_delete:
|
if to_delete:
|
||||||
psycopg2.extras.execute_values(
|
cursor.execute_values(
|
||||||
cursor,
|
|
||||||
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
||||||
WHERE word = name and class = in_class and type = in_type
|
WHERE word = name and class = in_class and type = in_type
|
||||||
and ((op = '-' and operator is null) or op = operator)""",
|
and ((op = '-' and operator is null) or op = operator)""",
|
||||||
@@ -386,9 +383,9 @@ class LegacyICUNameAnalyzer:
|
|||||||
if word_tokens:
|
if word_tokens:
|
||||||
cur.execute("""INSERT INTO word (word_id, word_token, country_code,
|
cur.execute("""INSERT INTO word (word_id, word_token, country_code,
|
||||||
search_name_count)
|
search_name_count)
|
||||||
(SELECT nextval('seq_word'), token, '{}', 0
|
(SELECT nextval('seq_word'), token, %s, 0
|
||||||
FROM unnest(%s) as token)
|
FROM unnest(%s) as token)
|
||||||
""".format(country_code), (list(word_tokens),))
|
""", (country_code, list(word_tokens)))
|
||||||
|
|
||||||
|
|
||||||
def process_place(self, place):
|
def process_place(self, place):
|
||||||
|
|||||||
@@ -370,8 +370,7 @@ class LegacyNameAnalyzer:
|
|||||||
to_delete = existing_phrases - norm_phrases
|
to_delete = existing_phrases - norm_phrases
|
||||||
|
|
||||||
if to_add:
|
if to_add:
|
||||||
psycopg2.extras.execute_values(
|
cur.execute_values(
|
||||||
cur,
|
|
||||||
""" INSERT INTO word (word_id, word_token, word, class, type,
|
""" INSERT INTO word (word_id, word_token, word, class, type,
|
||||||
search_name_count, operator)
|
search_name_count, operator)
|
||||||
(SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
|
(SELECT nextval('seq_word'), ' ' || make_standard_name(name), name,
|
||||||
@@ -381,8 +380,7 @@ class LegacyNameAnalyzer:
|
|||||||
to_add)
|
to_add)
|
||||||
|
|
||||||
if to_delete and should_replace:
|
if to_delete and should_replace:
|
||||||
psycopg2.extras.execute_values(
|
cur.execute_values(
|
||||||
cur,
|
|
||||||
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
""" DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op)
|
||||||
WHERE word = name and class = in_class and type = in_type
|
WHERE word = name and class = in_class and type = in_type
|
||||||
and ((op = '-' and operator is null) or op = operator)""",
|
and ((op = '-' and operator is null) or op = operator)""",
|
||||||
@@ -582,7 +580,7 @@ class _TokenCache:
|
|||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
|
cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text
|
||||||
FROM generate_series(1, 100) as i""")
|
FROM generate_series(1, 100) as i""")
|
||||||
self._cached_housenumbers = {str(r[0]) : r[1] for r in cur}
|
self._cached_housenumbers = {str(r[0]): r[1] for r in cur}
|
||||||
|
|
||||||
# For postcodes remember the ones that have already been added
|
# For postcodes remember the ones that have already been added
|
||||||
self.postcodes = set()
|
self.postcodes = set()
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ def _check(hint=None):
|
|||||||
"""
|
"""
|
||||||
def decorator(func):
|
def decorator(func):
|
||||||
title = func.__doc__.split('\n', 1)[0].strip()
|
title = func.__doc__.split('\n', 1)[0].strip()
|
||||||
|
|
||||||
def run_check(conn, config):
|
def run_check(conn, config):
|
||||||
print(title, end=' ... ')
|
print(title, end=' ... ')
|
||||||
ret = func(conn, config)
|
ret = func(conn, config)
|
||||||
@@ -98,13 +99,12 @@ def _get_indexes(conn):
|
|||||||
if conn.table_exists('place'):
|
if conn.table_exists('place'):
|
||||||
indexes.extend(('idx_placex_pendingsector',
|
indexes.extend(('idx_placex_pendingsector',
|
||||||
'idx_location_area_country_place_id',
|
'idx_location_area_country_place_id',
|
||||||
'idx_place_osm_unique'
|
'idx_place_osm_unique'))
|
||||||
))
|
|
||||||
|
|
||||||
return indexes
|
return indexes
|
||||||
|
|
||||||
|
|
||||||
### CHECK FUNCTIONS
|
# CHECK FUNCTIONS
|
||||||
#
|
#
|
||||||
# Functions are exectured in the order they appear here.
|
# Functions are exectured in the order they appear here.
|
||||||
|
|
||||||
|
|||||||
@@ -9,6 +9,7 @@ from pathlib import Path
|
|||||||
|
|
||||||
import psutil
|
import psutil
|
||||||
import psycopg2.extras
|
import psycopg2.extras
|
||||||
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
from nominatim.db.connection import connect, get_pg_env
|
from nominatim.db.connection import connect, get_pg_env
|
||||||
from nominatim.db import utils as db_utils
|
from nominatim.db import utils as db_utils
|
||||||
@@ -184,7 +185,12 @@ def truncate_data_tables(conn):
|
|||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'
|
|
||||||
|
_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier,
|
||||||
|
('osm_type', 'osm_id', 'class', 'type',
|
||||||
|
'name', 'admin_level', 'address',
|
||||||
|
'extratags', 'geometry')))
|
||||||
|
|
||||||
|
|
||||||
def load_data(dsn, threads):
|
def load_data(dsn, threads):
|
||||||
""" Copy data into the word and placex table.
|
""" Copy data into the word and placex table.
|
||||||
@@ -195,12 +201,15 @@ def load_data(dsn, threads):
|
|||||||
for imod in range(place_threads):
|
for imod in range(place_threads):
|
||||||
conn = DBConnection(dsn)
|
conn = DBConnection(dsn)
|
||||||
conn.connect()
|
conn.connect()
|
||||||
conn.perform("""INSERT INTO placex ({0})
|
conn.perform(
|
||||||
SELECT {0} FROM place
|
pysql.SQL("""INSERT INTO placex ({columns})
|
||||||
WHERE osm_id % {1} = {2}
|
SELECT {columns} FROM place
|
||||||
AND NOT (class='place' and (type='houses' or type='postcode'))
|
WHERE osm_id % {total} = {mod}
|
||||||
AND ST_IsValid(geometry)
|
AND NOT (class='place' and (type='houses' or type='postcode'))
|
||||||
""".format(_COPY_COLUMNS, place_threads, imod))
|
AND ST_IsValid(geometry)
|
||||||
|
""").format(columns=_COPY_COLUMNS,
|
||||||
|
total=pysql.Literal(place_threads),
|
||||||
|
mod=pysql.Literal(imod)))
|
||||||
sel.register(conn, selectors.EVENT_READ, conn)
|
sel.register(conn, selectors.EVENT_READ, conn)
|
||||||
|
|
||||||
# Address interpolations go into another table.
|
# Address interpolations go into another table.
|
||||||
@@ -250,6 +259,7 @@ def create_search_indices(conn, config, drop=False):
|
|||||||
|
|
||||||
sql.run_sql_file(conn, 'indices.sql', drop=drop)
|
sql.run_sql_file(conn, 'indices.sql', drop=drop)
|
||||||
|
|
||||||
|
|
||||||
def create_country_names(conn, tokenizer, languages=None):
|
def create_country_names(conn, tokenizer, languages=None):
|
||||||
""" Add default country names to search index. `languages` is a comma-
|
""" Add default country names to search index. `languages` is a comma-
|
||||||
separated list of language codes as used in OSM. If `languages` is not
|
separated list of language codes as used in OSM. If `languages` is not
|
||||||
@@ -261,8 +271,7 @@ def create_country_names(conn, tokenizer, languages=None):
|
|||||||
|
|
||||||
def _include_key(key):
|
def _include_key(key):
|
||||||
return key == 'name' or \
|
return key == 'name' or \
|
||||||
(key.startswith('name:') \
|
(key.startswith('name:') and (not languages or key[5:] in languages))
|
||||||
and (not languages or key[5:] in languages))
|
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
psycopg2.extras.register_hstore(cur)
|
psycopg2.extras.register_hstore(cur)
|
||||||
@@ -271,7 +280,7 @@ def create_country_names(conn, tokenizer, languages=None):
|
|||||||
|
|
||||||
with tokenizer.name_analyzer() as analyzer:
|
with tokenizer.name_analyzer() as analyzer:
|
||||||
for code, name in cur:
|
for code, name in cur:
|
||||||
names = {'countrycode' : code}
|
names = {'countrycode': code}
|
||||||
if code == 'gb':
|
if code == 'gb':
|
||||||
names['short_name'] = 'UK'
|
names['short_name'] = 'UK'
|
||||||
if code == 'us':
|
if code == 'us':
|
||||||
|
|||||||
@@ -136,11 +136,11 @@ def run_osm2pgsql(options):
|
|||||||
def get_url(url):
|
def get_url(url):
|
||||||
""" Get the contents from the given URL and return it as a UTF-8 string.
|
""" Get the contents from the given URL and return it as a UTF-8 string.
|
||||||
"""
|
"""
|
||||||
headers = {"User-Agent" : "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
|
headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)}
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
|
with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response:
|
||||||
return response.read().decode('utf-8')
|
return response.read().decode('utf-8')
|
||||||
except:
|
except Exception:
|
||||||
LOG.fatal('Failed to load URL: %s', url)
|
LOG.fatal('Failed to load URL: %s', url)
|
||||||
raise
|
raise
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database.
|
|||||||
"""
|
"""
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
UPDATE_TABLES = [
|
UPDATE_TABLES = [
|
||||||
'address_levels',
|
'address_levels',
|
||||||
'gb_postcode',
|
'gb_postcode',
|
||||||
@@ -21,15 +23,15 @@ def drop_update_tables(conn):
|
|||||||
""" Drop all tables only necessary for updating the database from
|
""" Drop all tables only necessary for updating the database from
|
||||||
OSM replication data.
|
OSM replication data.
|
||||||
"""
|
"""
|
||||||
|
parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES)
|
||||||
where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES])
|
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute("SELECT tablename FROM pg_tables WHERE " + where)
|
cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ")
|
||||||
|
+ pysql.SQL(' or ').join(parts))
|
||||||
tables = [r[0] for r in cur]
|
tables = [r[0] for r in cur]
|
||||||
|
|
||||||
for table in tables:
|
for table in tables:
|
||||||
cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table))
|
cur.drop_table(table, cascade=True)
|
||||||
|
|
||||||
conn.commit()
|
conn.commit()
|
||||||
|
|
||||||
|
|||||||
@@ -142,7 +142,8 @@ def change_housenumber_transliteration(conn, **_):
|
|||||||
BEGIN
|
BEGIN
|
||||||
SELECT array_to_string(array_agg(trans), ';')
|
SELECT array_to_string(array_agg(trans), ';')
|
||||||
INTO normtext
|
INTO normtext
|
||||||
FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word)
|
FROM (SELECT lookup_word as trans,
|
||||||
|
getorcreate_housenumber_id(lookup_word)
|
||||||
FROM (SELECT make_standard_name(h) as lookup_word
|
FROM (SELECT make_standard_name(h) as lookup_word
|
||||||
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
|
FROM regexp_split_to_table(housenumber, '[,;]') h) x) y;
|
||||||
return normtext;
|
return normtext;
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import gzip
|
|||||||
import logging
|
import logging
|
||||||
from math import isfinite
|
from math import isfinite
|
||||||
|
|
||||||
from psycopg2.extras import execute_values
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
from nominatim.db.connection import connect
|
from nominatim.db.connection import connect
|
||||||
|
|
||||||
@@ -52,27 +52,26 @@ class _CountryPostcodesCollector:
|
|||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
if to_add:
|
if to_add:
|
||||||
execute_values(cur,
|
cur.execute_values(
|
||||||
"""INSERT INTO location_postcode
|
"""INSERT INTO location_postcode
|
||||||
(place_id, indexed_status, country_code,
|
(place_id, indexed_status, country_code,
|
||||||
postcode, geometry) VALUES %s""",
|
postcode, geometry) VALUES %s""",
|
||||||
to_add,
|
to_add,
|
||||||
template="""(nextval('seq_place'), 1, '{}',
|
template=pysql.SQL("""(nextval('seq_place'), 1, {},
|
||||||
%s, 'SRID=4326;POINT(%s %s)')
|
%s, 'SRID=4326;POINT(%s %s)')
|
||||||
""".format(self.country))
|
""").format(pysql.Literal(self.country)))
|
||||||
if to_delete:
|
if to_delete:
|
||||||
cur.execute("""DELETE FROM location_postcode
|
cur.execute("""DELETE FROM location_postcode
|
||||||
WHERE country_code = %s and postcode = any(%s)
|
WHERE country_code = %s and postcode = any(%s)
|
||||||
""", (self.country, to_delete))
|
""", (self.country, to_delete))
|
||||||
if to_update:
|
if to_update:
|
||||||
execute_values(cur,
|
cur.execute_values(
|
||||||
"""UPDATE location_postcode
|
pysql.SQL("""UPDATE location_postcode
|
||||||
SET indexed_status = 2,
|
SET indexed_status = 2,
|
||||||
geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
|
geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326)
|
||||||
FROM (VALUES %s) AS v (pc, x, y)
|
FROM (VALUES %s) AS v (pc, x, y)
|
||||||
WHERE country_code = '{}' and postcode = pc
|
WHERE country_code = {} and postcode = pc
|
||||||
""".format(self.country),
|
""").format(pysql.Literal(self.country)), to_update)
|
||||||
to_update)
|
|
||||||
|
|
||||||
|
|
||||||
def _compute_changes(self, conn):
|
def _compute_changes(self, conn):
|
||||||
@@ -165,11 +164,14 @@ def update_postcodes(dsn, project_dir, tokenizer):
|
|||||||
with conn.cursor(name="placex_postcodes") as cur:
|
with conn.cursor(name="placex_postcodes") as cur:
|
||||||
cur.execute("""
|
cur.execute("""
|
||||||
SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
|
SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid)
|
||||||
FROM (SELECT
|
FROM (SELECT
|
||||||
COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc,
|
COALESCE(plx.country_code,
|
||||||
|
get_country_code(ST_Centroid(pl.geometry))) as cc,
|
||||||
token_normalized_postcode(pl.address->'postcode') as pc,
|
token_normalized_postcode(pl.address->'postcode') as pc,
|
||||||
ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid
|
ST_Centroid(ST_Collect(COALESCE(plx.centroid,
|
||||||
FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
|
ST_Centroid(pl.geometry)))) as centroid
|
||||||
|
FROM place AS pl LEFT OUTER JOIN placex AS plx
|
||||||
|
ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type
|
||||||
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null
|
WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null
|
||||||
GROUP BY cc, pc) xx
|
GROUP BY cc, pc) xx
|
||||||
WHERE pc IS NOT null AND cc IS NOT null
|
WHERE pc IS NOT null AND cc IS NOT null
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import json
|
|||||||
import logging
|
import logging
|
||||||
from textwrap import dedent
|
from textwrap import dedent
|
||||||
|
|
||||||
from psycopg2.extras import execute_values
|
from psycopg2 import sql as pysql
|
||||||
|
|
||||||
from nominatim.db.utils import execute_file
|
from nominatim.db.utils import execute_file
|
||||||
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
from nominatim.db.sql_preprocessor import SQLPreprocessor
|
||||||
@@ -49,7 +49,7 @@ def load_address_levels(conn, table, levels):
|
|||||||
_add_address_level_rows_from_entry(rows, entry)
|
_add_address_level_rows_from_entry(rows, entry)
|
||||||
|
|
||||||
with conn.cursor() as cur:
|
with conn.cursor() as cur:
|
||||||
cur.execute('DROP TABLE IF EXISTS {}'.format(table))
|
cur.drop_table(table)
|
||||||
|
|
||||||
cur.execute("""CREATE TABLE {} (country_code varchar(2),
|
cur.execute("""CREATE TABLE {} (country_code varchar(2),
|
||||||
class TEXT,
|
class TEXT,
|
||||||
@@ -57,7 +57,8 @@ def load_address_levels(conn, table, levels):
|
|||||||
rank_search SMALLINT,
|
rank_search SMALLINT,
|
||||||
rank_address SMALLINT)""".format(table))
|
rank_address SMALLINT)""".format(table))
|
||||||
|
|
||||||
execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows)
|
cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s")
|
||||||
|
.format(pysql.Identifier(table)), rows)
|
||||||
|
|
||||||
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
|
cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table))
|
||||||
|
|
||||||
|
|||||||
@@ -44,8 +44,8 @@ class SPImporter():
|
|||||||
# This set will contain all existing phrases to be added.
|
# This set will contain all existing phrases to be added.
|
||||||
# It contains tuples with the following format: (lable, class, type, operator)
|
# It contains tuples with the following format: (lable, class, type, operator)
|
||||||
self.word_phrases = set()
|
self.word_phrases = set()
|
||||||
#This set will contain all existing place_classtype tables which doesn't match any
|
# This set will contain all existing place_classtype tables which doesn't match any
|
||||||
#special phrases class/type on the wiki.
|
# special phrases class/type on the wiki.
|
||||||
self.table_phrases_to_delete = set()
|
self.table_phrases_to_delete = set()
|
||||||
|
|
||||||
def import_phrases(self, tokenizer, should_replace):
|
def import_phrases(self, tokenizer, should_replace):
|
||||||
@@ -60,7 +60,7 @@ class SPImporter():
|
|||||||
LOG.warning('Special phrases importation starting')
|
LOG.warning('Special phrases importation starting')
|
||||||
self._fetch_existing_place_classtype_tables()
|
self._fetch_existing_place_classtype_tables()
|
||||||
|
|
||||||
#Store pairs of class/type for further processing
|
# Store pairs of class/type for further processing
|
||||||
class_type_pairs = set()
|
class_type_pairs = set()
|
||||||
|
|
||||||
for loaded_phrases in self.sp_loader:
|
for loaded_phrases in self.sp_loader:
|
||||||
@@ -131,17 +131,17 @@ class SPImporter():
|
|||||||
Return the class/type pair corresponding to the phrase.
|
Return the class/type pair corresponding to the phrase.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
#blacklisting: disallow certain class/type combinations
|
# blacklisting: disallow certain class/type combinations
|
||||||
if phrase.p_class in self.black_list.keys() \
|
if phrase.p_class in self.black_list.keys() \
|
||||||
and phrase.p_type in self.black_list[phrase.p_class]:
|
and phrase.p_type in self.black_list[phrase.p_class]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
#whitelisting: if class is in whitelist, allow only tags in the list
|
# whitelisting: if class is in whitelist, allow only tags in the list
|
||||||
if phrase.p_class in self.white_list.keys() \
|
if phrase.p_class in self.white_list.keys() \
|
||||||
and phrase.p_type not in self.white_list[phrase.p_class]:
|
and phrase.p_type not in self.white_list[phrase.p_class]:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
#sanity check, in case somebody added garbage in the wiki
|
# sanity check, in case somebody added garbage in the wiki
|
||||||
if not self._check_sanity(phrase):
|
if not self._check_sanity(phrase):
|
||||||
self.statistics_handler.notify_one_phrase_invalid()
|
self.statistics_handler.notify_one_phrase_invalid()
|
||||||
return None
|
return None
|
||||||
@@ -161,7 +161,7 @@ class SPImporter():
|
|||||||
|
|
||||||
sql_tablespace = self.config.TABLESPACE_AUX_DATA
|
sql_tablespace = self.config.TABLESPACE_AUX_DATA
|
||||||
if sql_tablespace:
|
if sql_tablespace:
|
||||||
sql_tablespace = ' TABLESPACE '+sql_tablespace
|
sql_tablespace = ' TABLESPACE ' + sql_tablespace
|
||||||
|
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
|
db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
|
||||||
@@ -174,19 +174,19 @@ class SPImporter():
|
|||||||
|
|
||||||
if table_name in self.table_phrases_to_delete:
|
if table_name in self.table_phrases_to_delete:
|
||||||
self.statistics_handler.notify_one_table_ignored()
|
self.statistics_handler.notify_one_table_ignored()
|
||||||
#Remove this table from the ones to delete as it match a class/type
|
# Remove this table from the ones to delete as it match a
|
||||||
#still existing on the special phrases of the wiki.
|
# class/type still existing on the special phrases of the wiki.
|
||||||
self.table_phrases_to_delete.remove(table_name)
|
self.table_phrases_to_delete.remove(table_name)
|
||||||
#So dont need to create the table and indexes.
|
# So don't need to create the table and indexes.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
#Table creation
|
# Table creation
|
||||||
self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
|
self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
|
||||||
|
|
||||||
#Indexes creation
|
# Indexes creation
|
||||||
self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
|
self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
|
||||||
|
|
||||||
#Grant access on read to the web user.
|
# Grant access on read to the web user.
|
||||||
self._grant_access_to_webuser(phrase_class, phrase_type)
|
self._grant_access_to_webuser(phrase_class, phrase_type)
|
||||||
|
|
||||||
self.statistics_handler.notify_one_table_created()
|
self.statistics_handler.notify_one_table_created()
|
||||||
@@ -202,8 +202,8 @@ class SPImporter():
|
|||||||
table_name = _classtype_table(phrase_class, phrase_type)
|
table_name = _classtype_table(phrase_class, phrase_type)
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
db_cursor.execute(SQL("""
|
db_cursor.execute(SQL("""
|
||||||
CREATE TABLE IF NOT EXISTS {{}} {}
|
CREATE TABLE IF NOT EXISTS {{}} {}
|
||||||
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
|
AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
|
||||||
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
|
WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
|
||||||
.format(Identifier(table_name), Literal(phrase_class),
|
.format(Identifier(table_name), Literal(phrase_class),
|
||||||
Literal(phrase_type)))
|
Literal(phrase_type)))
|
||||||
@@ -215,7 +215,7 @@ class SPImporter():
|
|||||||
"""
|
"""
|
||||||
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
|
index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
|
||||||
base_table = _classtype_table(phrase_class, phrase_type)
|
base_table = _classtype_table(phrase_class, phrase_type)
|
||||||
#Index on centroid
|
# Index on centroid
|
||||||
if not self.db_connection.index_exists(index_prefix + 'centroid'):
|
if not self.db_connection.index_exists(index_prefix + 'centroid'):
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
db_cursor.execute(SQL("""
|
db_cursor.execute(SQL("""
|
||||||
@@ -223,7 +223,7 @@ class SPImporter():
|
|||||||
.format(Identifier(index_prefix + 'centroid'),
|
.format(Identifier(index_prefix + 'centroid'),
|
||||||
Identifier(base_table)), sql_tablespace)
|
Identifier(base_table)), sql_tablespace)
|
||||||
|
|
||||||
#Index on place_id
|
# Index on place_id
|
||||||
if not self.db_connection.index_exists(index_prefix + 'place_id'):
|
if not self.db_connection.index_exists(index_prefix + 'place_id'):
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
db_cursor.execute(SQL(
|
db_cursor.execute(SQL(
|
||||||
@@ -248,18 +248,14 @@ class SPImporter():
|
|||||||
Delete the place_classtype tables.
|
Delete the place_classtype tables.
|
||||||
"""
|
"""
|
||||||
LOG.warning('Cleaning database...')
|
LOG.warning('Cleaning database...')
|
||||||
#Array containing all queries to execute. Contain tuples of format (query, parameters)
|
|
||||||
queries_parameters = []
|
|
||||||
|
|
||||||
#Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
|
|
||||||
for table in self.table_phrases_to_delete:
|
|
||||||
self.statistics_handler.notify_one_table_deleted()
|
|
||||||
query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
|
|
||||||
queries_parameters.append((query, ()))
|
|
||||||
|
|
||||||
|
# Delete place_classtype tables corresponding to class/type which
|
||||||
|
# are not on the wiki anymore.
|
||||||
with self.db_connection.cursor() as db_cursor:
|
with self.db_connection.cursor() as db_cursor:
|
||||||
for query, parameters in queries_parameters:
|
for table in self.table_phrases_to_delete:
|
||||||
db_cursor.execute(query, parameters)
|
self.statistics_handler.notify_one_table_deleted()
|
||||||
|
db_cursor.drop_table(table)
|
||||||
|
|
||||||
|
|
||||||
def _convert_php_settings_if_needed(self, file_path):
|
def _convert_php_settings_if_needed(self, file_path):
|
||||||
"""
|
"""
|
||||||
@@ -271,7 +267,7 @@ class SPImporter():
|
|||||||
file, extension = os.path.splitext(file_path)
|
file, extension = os.path.splitext(file_path)
|
||||||
json_file_path = Path(file + '.json').resolve()
|
json_file_path = Path(file + '.json').resolve()
|
||||||
|
|
||||||
if extension not in('.php', '.json'):
|
if extension not in ('.php', '.json'):
|
||||||
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
|
raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
|
||||||
|
|
||||||
if extension == '.php' and not isfile(json_file_path):
|
if extension == '.php' and not isfile(json_file_path):
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ class SPWikiLoader(Iterator):
|
|||||||
def __init__(self, config, languages=None):
|
def __init__(self, config, languages=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
self.config = config
|
self.config = config
|
||||||
#Compile the regex here to increase performances.
|
# Compile the regex here to increase performances.
|
||||||
self.occurence_pattern = re.compile(
|
self.occurence_pattern = re.compile(
|
||||||
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
|
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
|
||||||
)
|
)
|
||||||
@@ -35,7 +35,7 @@ class SPWikiLoader(Iterator):
|
|||||||
Parses XML content and extracts special phrases from it.
|
Parses XML content and extracts special phrases from it.
|
||||||
Return a list of SpecialPhrase.
|
Return a list of SpecialPhrase.
|
||||||
"""
|
"""
|
||||||
#One match will be of format [label, class, type, operator, plural]
|
# One match will be of format [label, class, type, operator, plural]
|
||||||
matches = self.occurence_pattern.findall(xml)
|
matches = self.occurence_pattern.findall(xml)
|
||||||
returned_phrases = set()
|
returned_phrases = set()
|
||||||
for match in matches:
|
for match in matches:
|
||||||
@@ -65,5 +65,6 @@ class SPWikiLoader(Iterator):
|
|||||||
Requested URL Example :
|
Requested URL Example :
|
||||||
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
|
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
|
||||||
"""
|
"""
|
||||||
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
|
url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
|
||||||
|
+ lang.upper()
|
||||||
return get_url(url)
|
return get_url(url)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ class SpecialPhrase():
|
|||||||
def __init__(self, p_label, p_class, p_type, p_operator):
|
def __init__(self, p_label, p_class, p_type, p_operator):
|
||||||
self.p_label = p_label.strip()
|
self.p_label = p_label.strip()
|
||||||
self.p_class = p_class.strip()
|
self.p_class = p_class.strip()
|
||||||
#Hack around a bug where building=yes was imported with quotes into the wiki
|
# Hack around a bug where building=yes was imported with quotes into the wiki
|
||||||
self.p_type = re.sub(r'\"|"', '', p_type.strip())
|
self.p_type = re.sub(r'\"|"', '', p_type.strip())
|
||||||
#Needed if some operator in the wiki are not written in english
|
# Needed if some operator in the wiki are not written in english
|
||||||
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
|
self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
|
||||||
|
|||||||
Reference in New Issue
Block a user