mirror of
https://github.com/osm-search/Nominatim.git
synced 2026-02-15 19:07:58 +00:00
Adds a tagger for names by language so that the analyzer of that language is used. Thus variants are now only applied to names in the specific language and only tag name tags, no longer to reference-like tags.
187 lines
4.7 KiB
YAML
187 lines
4.7 KiB
YAML
normalization:
|
|
- ":: lower ()"
|
|
- ":: Hans-Hant"
|
|
- !include icu-rules/unicode-digits-to-decimal.yaml
|
|
- "'№' > 'no'"
|
|
- "'n°' > 'no'"
|
|
- "'nº' > 'no'"
|
|
- "ª > a"
|
|
- "º > o"
|
|
- "[[:Punctuation:][:Symbol:]] > ' '"
|
|
- "ß > 'ss'" # German szet is unimbigiously equal to double ss
|
|
- "[^[:Letter:] [:Number:] [:Space:]] >"
|
|
- "[:Lm:] >"
|
|
- ":: [[:Number:]] Latin ()"
|
|
- ":: [[:Number:]] Ascii ();"
|
|
- ":: [[:Number:]] NFD ();"
|
|
- "[[:Nonspacing Mark:] [:Cf:]] >;"
|
|
- "[:Space:]+ > ' '"
|
|
transliteration:
|
|
- ":: Latin ()"
|
|
- !include icu-rules/extended-unicode-to-asccii.yaml
|
|
- ":: Ascii ()"
|
|
- ":: NFD ()"
|
|
- "[^[:Ascii:]] >"
|
|
- ":: lower ()"
|
|
- ":: NFC ()"
|
|
sanitizers:
|
|
- step: split-name-list
|
|
- step: strip-brace-terms
|
|
- step: tag-analyzer-by-language
|
|
filter-kind: [".*name.*"]
|
|
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
|
|
use-defaults: all
|
|
mode: append
|
|
token-analysis:
|
|
- analyzer: generic
|
|
- id: bg
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-bg.yaml
|
|
- id: ca
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-ca.yaml
|
|
- id: cs
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-cs.yaml
|
|
- id: da
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-da.yaml
|
|
- id: de
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-de.yaml
|
|
- id: el
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-el.yaml
|
|
- id: en
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-en.yaml
|
|
- id: es
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-es.yaml
|
|
- id: et
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-et.yaml
|
|
- id: eu
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-eu.yaml
|
|
- id: fi
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-fi.yaml
|
|
- id: fr
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-fr.yaml
|
|
- id: gl
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-gl.yaml
|
|
- id: hu
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-hu.yaml
|
|
- id: it
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-it.yaml
|
|
- id: ja
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-ja.yaml
|
|
- id: mg
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-mg.yaml
|
|
- id: ms
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-ms.yaml
|
|
- id: nl
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-nl.yaml
|
|
- id: no
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-no.yaml
|
|
- id: pl
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-pl.yaml
|
|
- id: pt
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-pt.yaml
|
|
- id: ro
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-ro.yaml
|
|
- id: ru
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-ru.yaml
|
|
- id: sk
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-sk.yaml
|
|
- id: sl
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-sl.yaml
|
|
- id: sv
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-sv.yaml
|
|
- id: tr
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-tr.yaml
|
|
- id: uk
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-uk.yaml
|
|
- id: vi
|
|
analyzer: generic
|
|
mode: variant-only
|
|
variants:
|
|
- !include icu-rules/variants-vi.yaml
|