remove remaining references to php code

This commit is contained in:
Sarah Hoffmann
2024-09-15 15:33:59 +02:00
parent 7ba5152493
commit 7717bbf59d
10 changed files with 6 additions and 77 deletions

View File

@@ -14,7 +14,6 @@ import logging
from pathlib import Path
import re
import shutil
from textwrap import dedent
from icu import Transliterator
import psycopg
@@ -120,8 +119,6 @@ class LegacyTokenizer(AbstractTokenizer):
self.normalization = config.TERM_NORMALIZATION
self._install_php(config, overwrite=True)
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
self._save_config(conn, config)
@@ -145,8 +142,6 @@ class LegacyTokenizer(AbstractTokenizer):
config.lib_dir.module,
config.project_dir / 'module')
self._install_php(config, overwrite=False)
def finalize_import(self, config: Configuration) -> None:
""" Do any required postprocessing to make the tokenizer data ready
for use.
@@ -272,21 +267,6 @@ class LegacyTokenizer(AbstractTokenizer):
return list(s[0] for s in cur)
def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
""" Install the php script for the tokenizer.
"""
if config.lib_dir.php is not None:
php_file = self.data_dir / "tokenizer.php"
if not php_file.exists() or overwrite:
php_file.write_text(dedent(f"""\
<?php
@define('CONST_Max_Word_Frequency', {config.MAX_WORD_FREQUENCY});
@define('CONST_Term_Normalization_Rules', "{config.TERM_NORMALIZATION}");
require_once('{config.lib_dir.php}/tokenizer/legacy_tokenizer.php');
"""), encoding='utf-8')
def _init_db_tables(self, config: Configuration) -> None:
""" Set up the word table and fill it with pre-computed word
frequencies.