mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-26 02:58:13 +00:00
move warm script to python code
This commit is contained in:
@@ -9,9 +9,11 @@ Implementation of the 'admin' subcommand.
|
||||
"""
|
||||
import logging
|
||||
import argparse
|
||||
import random
|
||||
|
||||
from nominatim.tools.exec_utils import run_legacy_script
|
||||
from nominatim.db.connection import connect
|
||||
from nominatim.clicmd.args import NominatimArgs
|
||||
import nominatim.api as napi
|
||||
|
||||
# Do not repeat documentation of subcommand classes.
|
||||
# pylint: disable=C0111
|
||||
@@ -81,11 +83,25 @@ class AdminFuncs:
|
||||
|
||||
return 1
|
||||
|
||||
|
||||
def _warm(self, args: NominatimArgs) -> int:
|
||||
LOG.warning('Warming database caches')
|
||||
params = ['warm.php']
|
||||
if args.target == 'reverse':
|
||||
params.append('--reverse-only')
|
||||
if args.target == 'search':
|
||||
params.append('--search-only')
|
||||
return run_legacy_script(*params, config=args.config)
|
||||
|
||||
api = napi.NominatimAPI(args.project_dir)
|
||||
|
||||
if args.target != 'reverse':
|
||||
for _ in range(1000):
|
||||
api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
|
||||
address_details=True)
|
||||
|
||||
if args.target != 'search':
|
||||
from ..tokenizer import factory as tokenizer_factory
|
||||
|
||||
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
|
||||
with connect(args.config.get_libpq_dsn()) as conn:
|
||||
words = tokenizer.most_frequent_words(conn, 1000)
|
||||
|
||||
for word in words:
|
||||
api.search(word)
|
||||
|
||||
return 0
|
||||
|
||||
@@ -13,6 +13,7 @@ from typing import List, Tuple, Dict, Any, Optional, Iterable
|
||||
from pathlib import Path
|
||||
|
||||
from nominatim.config import Configuration
|
||||
from nominatim.db.connection import Connection
|
||||
from nominatim.data.place_info import PlaceInfo
|
||||
from nominatim.typing import Protocol
|
||||
|
||||
@@ -233,6 +234,13 @@ class AbstractTokenizer(ABC):
|
||||
"""
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||
""" Return a list of the `num` most frequent full words
|
||||
in the database.
|
||||
"""
|
||||
|
||||
|
||||
class TokenizerModule(Protocol):
|
||||
""" Interface that must be exported by modules that implement their
|
||||
own tokenizer.
|
||||
|
||||
@@ -183,6 +183,18 @@ class ICUTokenizer(AbstractTokenizer):
|
||||
self.loader.make_token_analysis())
|
||||
|
||||
|
||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||
""" Return a list of the `num` most frequent full words
|
||||
in the database.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""SELECT word, sum((info->'count')::int) as count
|
||||
FROM word WHERE type = 'W'
|
||||
GROUP BY word
|
||||
ORDER BY count DESC LIMIT %s""", (num,))
|
||||
return list(s[0].split('@')[0] for s in cur)
|
||||
|
||||
|
||||
def _install_php(self, phpdir: Path, overwrite: bool = True) -> None:
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
|
||||
@@ -256,6 +256,16 @@ class LegacyTokenizer(AbstractTokenizer):
|
||||
return LegacyNameAnalyzer(self.dsn, normalizer)
|
||||
|
||||
|
||||
def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
|
||||
""" Return a list of the `num` most frequent full words
|
||||
in the database.
|
||||
"""
|
||||
with conn.cursor() as cur:
|
||||
cur.execute(""" SELECT word FROM word WHERE word is not null
|
||||
ORDER BY search_name_count DESC LIMIT %s""", (num,))
|
||||
return list(s[0] for s in cur)
|
||||
|
||||
|
||||
def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
|
||||
""" Install the php script for the tokenizer.
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user