mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-03-11 21:34:06 +00:00
improve normalization
Make sure all special symbols are removed during normalization already. Those won't be interpreted in any way because they are unlikely to be searched for.
This commit is contained in:
@@ -1,20 +1,29 @@
|
||||
normalization:
|
||||
- ":: NFD ()"
|
||||
- "[[:Nonspacing Mark:] [:Cf:]] >"
|
||||
- ":: lower ()"
|
||||
- !include icu-rules/unicode-digits-to-decimal.yaml
|
||||
- "'№' > 'no'"
|
||||
- "'n°' > 'no'"
|
||||
- "'nº' > 'no'"
|
||||
- "ª > a"
|
||||
- "º > o"
|
||||
- "[[:Punctuation:][:Symbol:]] > ' '"
|
||||
- "ß > 'ss'" # German szet is unimbigiously equal to double ss
|
||||
- "[[:Punctuation:][:Space:]]+ > ' '"
|
||||
- ":: NFC ()"
|
||||
- "[^[:Letter:] [:Number:] [:Space:]] >"
|
||||
- "[:Lm:] >"
|
||||
- ":: [[:Number:]] Latin ()"
|
||||
- ":: [[:Number:]] Ascii ();"
|
||||
- ":: [[:Number:]] NFD ();"
|
||||
- "[[:Nonspacing Mark:] [:Cf:]] >;"
|
||||
- "[:Space:]+ > ' '"
|
||||
transliteration:
|
||||
- ":: Latin ()"
|
||||
- !include icu-rules/extended-unicode-to-asccii.yaml
|
||||
- ":: Ascii ()"
|
||||
- ":: NFD ()"
|
||||
- "'' >"
|
||||
- "[[:Nonspacing Mark:] [:Cf:]] >"
|
||||
- "[^[:Ascii:]] >"
|
||||
- ":: lower ()"
|
||||
- "[[:Punctuation:][:Space:]]+ > ' '"
|
||||
- ":: NFC ()"
|
||||
- "[:Space:]+ > ' '"
|
||||
variants:
|
||||
- words:
|
||||
- ~hal => hal
|
||||
|
||||
Reference in New Issue
Block a user