remove remaining references to php code

2026-03-11 21:34:06 +00:00 · 2024-09-15 15:33:59 +02:00
parent 7ba5152493
commit 7717bbf59d
10 changed files with 6 additions and 77 deletions
--- a/src/nominatim_db/tokenizer/factory.py
+++ b/src/nominatim_db/tokenizer/factory.py
@@ -15,9 +15,6 @@ be used consistently when querying and updating the database.
 This module provides the functions to create and configure a new tokenizer
 as well as instantiating the appropriate tokenizer for updating an existing
 database.
-
-A tokenizer usually also includes PHP code for querying. The appropriate PHP
-normalizer module is installed, when the tokenizer is created.
 """
 from typing import Optional
 import logging
--- a/src/nominatim_db/tokenizer/icu_tokenizer.py
+++ b/src/nominatim_db/tokenizer/icu_tokenizer.py
@@ -13,7 +13,6 @@ from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \
 import itertools
 import logging
 from pathlib import Path
-from textwrap import dedent

 from psycopg.types.json import Jsonb
 from psycopg import sql as pysql
@@ -64,7 +63,6 @@ class ICUTokenizer(AbstractTokenizer):
        """
        self.loader = ICURuleLoader(config)

-        self._install_php(config.lib_dir.php, overwrite=True)
        self._save_config()

        if init_db:
@@ -81,8 +79,6 @@ class ICUTokenizer(AbstractTokenizer):
        with connect(self.dsn) as conn:
            self.loader.load_config_from_db(conn)

-        self._install_php(config.lib_dir.php, overwrite=False)
-

    def finalize_import(self, config: Configuration) -> None:
        """ Do any required postprocessing to make the tokenizer data ready
@@ -281,22 +277,6 @@ class ICUTokenizer(AbstractTokenizer):
            return list(s[0].split('@')[0] for s in cur)


-    def _install_php(self, phpdir: Optional[Path], overwrite: bool = True) -> None:
-        """ Install the php script for the tokenizer.
-        """
-        if phpdir is not None:
-            assert self.loader is not None
-            php_file = self.data_dir / "tokenizer.php"
-
-            if not php_file.exists() or overwrite:
-                php_file.write_text(dedent(f"""\
-                    <?php
-                    @define('CONST_Max_Word_Frequency', 10000000);
-                    @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
-                    @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
-                    require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
-
-
    def _save_config(self) -> None:
        """ Save the configuration that needs to remain stable for the given
            database as database properties.
--- a/src/nominatim_db/tokenizer/legacy_tokenizer.py
+++ b/src/nominatim_db/tokenizer/legacy_tokenizer.py
@@ -14,7 +14,6 @@ import logging
 from pathlib import Path
 import re
 import shutil
-from textwrap import dedent

 from icu import Transliterator
 import psycopg
@@ -120,8 +119,6 @@ class LegacyTokenizer(AbstractTokenizer):

        self.normalization = config.TERM_NORMALIZATION

-        self._install_php(config, overwrite=True)
-
        with connect(self.dsn) as conn:
            _check_module(module_dir, conn)
            self._save_config(conn, config)
@@ -145,8 +142,6 @@ class LegacyTokenizer(AbstractTokenizer):
                            config.lib_dir.module,
                            config.project_dir / 'module')

-        self._install_php(config, overwrite=False)
-
    def finalize_import(self, config: Configuration) -> None:
        """ Do any required postprocessing to make the tokenizer data ready
            for use.
@@ -272,21 +267,6 @@ class LegacyTokenizer(AbstractTokenizer):
            return list(s[0] for s in cur)


-    def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
-        """ Install the php script for the tokenizer.
-        """
-        if config.lib_dir.php is not None:
-            php_file = self.data_dir / "tokenizer.php"
-
-            if not php_file.exists() or overwrite:
-                php_file.write_text(dedent(f"""\
-                    <?php
-                    @define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
-                    @define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
-                    require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
-                    """), encoding='utf-8')
-
-
    def _init_db_tables(self, config: Configuration) -> None:
        """ Set up the word table and fill it with pre-computed word
            frequencies.