disregard special phrases that do not match fully

Compare the normalized terms imported with the special
terms script with the normalized version of the query string.
Disregard them if they cannot be found. This avoids a significant
number of mismatches due to transliteration issues.

The match will only be done when a normalized word has been set
making this change backwards compatible with older databases.
This commit is contained in:
Sarah Hoffmann
2017-06-01 21:40:23 +02:00
parent e3fb706c65
commit 54393addd3
3 changed files with 31 additions and 10 deletions

View File

@@ -19,7 +19,7 @@ getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
include(CONST_InstallPath.'/settings/phrase_settings.php');
if ($aCMDResult['wiki-import']) {
$oNormalizer = Transliterator::createFromRules(":: NFD (); [:Nonspacing Mark:] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC ();");
$oNormalizer = Transliterator::createFromRules(CONST_Term_Normalization_Rules);
$aPairs = array();
$sLanguageIn = CONST_Languages ? CONST_Languages :
@@ -32,7 +32,11 @@ if ($aCMDResult['wiki-import']) {
if (preg_match_all('#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#', $sWikiPageXML, $aMatches, PREG_SET_ORDER)) {
foreach ($aMatches as $aMatch) {
$sLabel = trim($aMatch[1]);
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
if ($oNormalizer !== null) {
$sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
} else {
$sTrans = null;
}
$sClass = trim($aMatch[2]);
$sType = trim($aMatch[3]);
// hack around a bug where building=yes was imported with