mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 19:07:58 +00:00
Sanatizer functions allow to transform name and address tags before they are handed to the tokenizer. Theses transformations are visible only for the tokenizer and thus only have an influence on the search terms and address match terms for a place. Currently two sanitizers are implemented which are responsible for splitting names with multiple values and removing bracket additions. Both was previously hard-coded in the tokenizer.
61 lines
2.0 KiB
YAML
61 lines
2.0 KiB
YAML
normalization:
|
|
- ":: lower ()"
|
|
- ":: Hans-Hant"
|
|
- !include icu-rules/unicode-digits-to-decimal.yaml
|
|
- "'№' > 'no'"
|
|
- "'n°' > 'no'"
|
|
- "'nº' > 'no'"
|
|
- "ª > a"
|
|
- "º > o"
|
|
- "[[:Punctuation:][:Symbol:]] > ' '"
|
|
- "ß > 'ss'" # German szet is unimbigiously equal to double ss
|
|
- "[^[:Letter:] [:Number:] [:Space:]] >"
|
|
- "[:Lm:] >"
|
|
- ":: [[:Number:]] Latin ()"
|
|
- ":: [[:Number:]] Ascii ();"
|
|
- ":: [[:Number:]] NFD ();"
|
|
- "[[:Nonspacing Mark:] [:Cf:]] >;"
|
|
- "[:Space:]+ > ' '"
|
|
transliteration:
|
|
- ":: Latin ()"
|
|
- !include icu-rules/extended-unicode-to-asccii.yaml
|
|
- ":: Ascii ()"
|
|
- ":: NFD ()"
|
|
- "[^[:Ascii:]] >"
|
|
- ":: lower ()"
|
|
- ":: NFC ()"
|
|
sanitizers:
|
|
- step: split-name-list
|
|
- step: strip-brace-terms
|
|
variants:
|
|
- !include icu-rules/variants-bg.yaml
|
|
- !include icu-rules/variants-ca.yaml
|
|
- !include icu-rules/variants-cs.yaml
|
|
- !include icu-rules/variants-da.yaml
|
|
- !include icu-rules/variants-de.yaml
|
|
- !include icu-rules/variants-el.yaml
|
|
- !include icu-rules/variants-en.yaml
|
|
- !include icu-rules/variants-es.yaml
|
|
- !include icu-rules/variants-et.yaml
|
|
- !include icu-rules/variants-eu.yaml
|
|
- !include icu-rules/variants-fi.yaml
|
|
- !include icu-rules/variants-fr.yaml
|
|
- !include icu-rules/variants-gl.yaml
|
|
- !include icu-rules/variants-hu.yaml
|
|
- !include icu-rules/variants-it.yaml
|
|
- !include icu-rules/variants-ja.yaml
|
|
- !include icu-rules/variants-mg.yaml
|
|
- !include icu-rules/variants-ms.yaml
|
|
- !include icu-rules/variants-nl.yaml
|
|
- !include icu-rules/variants-no.yaml
|
|
- !include icu-rules/variants-pl.yaml
|
|
- !include icu-rules/variants-pt.yaml
|
|
- !include icu-rules/variants-ro.yaml
|
|
- !include icu-rules/variants-ru.yaml
|
|
- !include icu-rules/variants-sk.yaml
|
|
- !include icu-rules/variants-sl.yaml
|
|
- !include icu-rules/variants-sv.yaml
|
|
- !include icu-rules/variants-tr.yaml
|
|
- !include icu-rules/variants-uk.yaml
|
|
- !include icu-rules/variants-vi.yaml
|