Merge pull request #3652 from lonvia/update-variants

Cleanup and updates of tokenizer variant configuration
This commit is contained in:
Sarah Hoffmann
2025-02-18 19:47:45 +01:00
committed by GitHub
7 changed files with 126 additions and 106 deletions

View File

@@ -4,7 +4,7 @@
- aparcament -> aparc
- apartament -> apmt
- apartat -> apt
- àtic -> àt
- àtic -> àt
- autopista -> auto
- autopista -> autop
- autovia -> autov
@@ -19,7 +19,6 @@
- biblioteca -> bibl
- bloc -> bl
- carrer -> c
- carrer -> c/
- carreró -> cró
- carretera -> ctra
- cantonada -> cant
@@ -58,7 +57,6 @@
- número -> n
- sense número -> s/n
- parada -> par
- parcel·la -> parc
- passadís -> pdís
- passatge -> ptge
- passeig -> pg

View File

@@ -1,4 +1,5 @@
# Source: https://wiki.openstreetmap.org/wiki/Name_finder:Abbreviations#English
# Source: https://pe.usps.com/text/pub28/28apc_002.htm
- lang: en
words:
- Access -> Accs
@@ -11,6 +12,7 @@
- Alley -> Aly
- Alleyway -> Alwy
- Amble -> Ambl
- Anex -> Anx
- Apartments -> Apts
- Approach -> Apch
- Approach -> App
@@ -23,42 +25,52 @@
- Banan -> Ba
- Basin -> Basn
- Basin -> Bsn
- Bayou -> Byu
- Beach -> Bch
- Bend -> Bend
- Bend -> Bnd
- Block -> Blk
- Bluff -> Blf
- Bluffs -> Blfs
- Boardwalk -> Bwlk
- Bottom -> Btm
- Boulevard -> Blvd
- Boulevard -> Bvd
- Boundary -> Bdy
- Bowl -> Bl
- Brace -> Br
- Brae -> Br
- Brae -> Brae
- Branch -> Br
- Break -> Brk
- Bridge -> Bdge
- Bridge -> Br
- Bridge -> Brdg
- Bridge -> Brg
- Bridge -> Bri
- Broadway -> Bdwy
- Broadway -> Bway
- Broadway -> Bwy
- Brook -> Brk
- Brooks -> Brks
- Brow -> Brw
- Brow -> Brow
- Buildings -> Bldgs
- Buildings -> Bldngs
- Business -> Bus
- Burg -> Bg
- Burgs -> Bgs
- Bypass -> Bps
- Bypass -> Byp
- Bypass -> Bypa
- Byway -> Bywy
- Camp -> Cp
- Canyon -> Cyn
- Cape -> Cpe
- Caravan -> Cvn
- Causeway -> Caus
- Causeway -> Cswy
- Causeway -> Cway
- Center -> Cen
- Center -> Ctr
- Centers -> Ctrs
- Central -> Ctrl
- Centre -> Cen
- Centre -> Ctr
@@ -66,33 +78,42 @@
- Chase -> Ch
- Church -> Ch
- Circle -> Cir
- Circles -> Cirs
- Circuit -> Cct
- Circuit -> Ci
- Circus -> Crc
- Circus -> Crcs
- City -> Cty
- Cliff -> Clf
- Cliffs -> Clfs
- Close -> Cl
- Club -> Clb
- Common -> Cmn
- Common -> Comm
- Commons -> Cmns
- Community -> Comm
- Concourse -> Cnc
- Concourse -> Con
- Copse -> Cps
- Corner -> Cor
- Corner -> Cnr
- Corner -> Crn
- Corners -> Cors
- Corso -> Cso
- Cottages -> Cotts
- County -> Co
- County Road -> CR
- County Route -> CR
- Course -> Crse
- Court -> Crt
- Court -> Ct
- Courts -> Cts
- Courtyard -> Cyd
- Courtyard -> Ctyd
- Cove -> Ce
- Cove -> Cov
- Cove -> Cove
- Cove -> Cv
- Coves -> Cvs
- Creek -> Ck
- Creek -> Cr
- Creek -> Crk
@@ -107,59 +128,74 @@
- Crossing -> Csg
- Crossing -> Xing
- Crossroad -> Crd
- Crossroad -> Xrd
- Crossroads -> Xrds
- Crossway -> Cowy
- Cul-de-sac -> Cds
- Cul-de-sac -> Csac
- Curve -> Cve
- Curve -> Curv
- Cutting -> Cutt
- Dale -> Dle
- Dale -> Dale
- Dam -> Dm
- Deviation -> Devn
- Dip -> Dip
- Distributor -> Dstr
- Divide -> Dv
- Down -> Dn
- Downs -> Dn
- Drive -> Dr
- Drive -> Drv
- Drive -> Dv
- Drives -> Drs
- Drive-In => Drive-In # prevent abbreviation here
- Driveway -> Drwy
- Driveway -> Dvwy
- Driveway -> Dwy
- East -> E
- Edge -> Edg
- Edge -> Edge
- Elbow -> Elb
- End -> End
- Entrance -> Ent
- Esplanade -> Esp
- Estate -> Est
- Estates -> Ests
- Expressway -> Exp
- Expressway -> Expy
- Expressway -> Expwy
- Expressway -> Xway
- Extension -> Ex
- Extensions -> Exts
- Fairway -> Fawy
- Fairway -> Fy
- Falls -> Fls
- Father -> Fr
- Ferry -> Fy
- Ferry -> Fry
- Field -> Fd
- Field -> Fld
- Fields -> Flds
- Fire Track -> Ftrk
- Firetrail -> Fit
- Flat -> Fl
- Flat -> Flat
- Flat -> Flt
- Flats -> Flts
- Follow -> Folw
- Footway -> Ftwy
- Ford -> Frd
- Fords -> Frds
- Foreshore -> Fshr
- Forest -> Frst
- Forest Service Road -> FSR
- Forge -> Frg
- Forges -> Frgs
- Formation -> Form
- Fork -> Frk
- Forks -> Frks
- Fort -> Ft
- Freeway -> Frwy
- Freeway -> Fwy
- Front -> Frnt
- Frontage -> Fr
- Frontage -> Frtg
- Gap -> Gap
- Garden -> Gdn
- Gardens -> Gdn
- Gardens -> Gdns
@@ -168,21 +204,26 @@
- Gates -> Ga
- Gates -> Gte
- Gateway -> Gwy
- Gateway -> Gtwy
- George -> Geo
- Glade -> Gl
- Glade -> Gld
- Glade -> Glde
- Glen -> Gln
- Glen -> Glen
- Glens -> Glns
- Grange -> Gra
- Green -> Gn
- Green -> Grn
- Greens -> Grns
- Ground -> Grnd
- Grove -> Gr
- Grove -> Gro
- Grove -> Grv
- Groves -> Grvs
- Grovet -> Gr
- Gully -> Gly
- Harbor -> Hbr
- Harbors -> Hbrs
- Harbour -> Hbr
- Haven -> Hvn
- Head -> Hd
@@ -194,45 +235,56 @@
- Highroad -> Hird
- Highroad -> Hrd
- Highway -> Hwy
- Hill -> Hill
- Hill -> Hl
- Hills -> Hl
- Hills -> Hls
- Hollow -> Holw
- Hospital -> Hosp
- House -> Ho
- House -> Hse
- Industrial -> Ind
- Inlet -> Inlt
- Interchange -> Intg
- International -> Intl
- Island -> I
- Island -> Is
- Islands -> Iss
- Junction -> Jct
- Junction -> Jctn
- Junction -> Jnc
- Junctions -> Jcts
- Junior -> Jr
- Key -> Key
- Key -> Ky
- Keys -> Kys
- Knoll -> Knl
- Knolls -> Knls
- Lagoon -> Lgn
- Lake -> Lk
- Lakes -> L
- Lakes -> Lks
- Landing -> Ldg
- Landing -> Lndg
- Lane -> La
- Lane -> Lane
- Lane -> Ln
- Laneway -> Lnwy
- Line -> Line
- Light -> Lgt
- Lights -> Lgts
- Line -> Ln
- Link -> Link
- Link -> Lk
- Little -> Lit
- Little -> Lt
- Loaf -> Lf
- Lock -> Lck
- Locks -> Lcks
- Lodge -> Ldg
- Lookout -> Lkt
- Loop -> Loop
- Loop -> Lp
- Lower -> Low
- Lower -> Lr
- Lower -> Lwr
- Mall -> Mall
- Mall -> Ml
- Manor -> Mnr
- Manors -> Mnrs
- Mansions -> Mans
- Market -> Mkt
- Meadow -> Mdw
@@ -244,49 +296,56 @@
- Meander -> Mr
- Medical -> Med
- Memorial -> Mem
- Mews -> Mews
- Mews -> Mw
- Middle -> Mid
- Middle School -> MS
- Mile -> Mi
- Military -> Mil
- Mill -> Ml
- Mills -> Mls
- Mission -> Msn
- Motorway -> Mtwy
- Motorway -> Mwy
- Mount -> Mt
- Mountain -> Mtn
- Mountains -> Mtn
- Mountains -> Mtns
- Municipal -> Mun
- Museum -> Mus
- National Park -> NP
- National Recreation Area -> NRA
- National Wildlife Refuge Area -> NWRA
- Neck -> Nck
- Nook -> Nk
- Nook -> Nook
- North -> N
- Northeast -> NE
- Northwest -> NW
- Orchard -> Orch
- Outlook -> Out
- Outlook -> Otlk
- Overpass -> Opas
- Parade -> Pde
- Paradise -> Pdse
- Park -> Park
- Park -> Pk
- Parklands -> Pkld
- Parkway -> Pkwy
- Parkway -> Pky
- Parkway -> Pwy
- Pass -> Pass
- Parkways -> Pkwy
- Pass -> Ps
- Passage -> Psge
- Path -> Path
- Pathway -> Phwy
- Pathway -> Pway
- Pathway -> Pwy
- Piazza -> Piaz
- Pike -> Pk
- Pine -> Pne
- Pines -> Pnes
- Place -> Pl
- Plain -> Pl
- Plain -> Pln
- Plains -> Pl
- Plains -> Plns
- Plateau -> Plat
- Plaza -> Pl
- Plaza -> Plz
@@ -294,38 +353,43 @@
- Pocket -> Pkt
- Point -> Pnt
- Point -> Pt
- Port -> Port
- Points -> Pts
- Port -> Prt
- Port -> Pt
- Ports -> Prts
- Post Office -> PO
- Prairie -> Pr
- Precinct -> Pct
- Promenade -> Prm
- Promenade -> Prom
- Quad -> Quad
- Quadrangle -> Qdgl
- Quadrant -> Qdrt
- Quadrant -> Qd
- Quay -> Qy
- Quays -> Qy
- Quays -> Qys
- Radial -> Radl
- Ramble -> Ra
- Ramble -> Rmbl
- Ranch -> Rnch
- Range -> Rge
- Range -> Rnge
- Rapid -> Rpd
- Rapids -> Rpds
- Reach -> Rch
- Reservation -> Res
- Reserve -> Res
- Reservoir -> Res
- Rest -> Rest
- Rest -> Rst
- Retreat -> Rt
- Retreat -> Rtt
- Return -> Rtn
- Ridge -> Rdg
- Ridge -> Rdge
- Ridges -> Rdgs
- Ridgeway -> Rgwy
- Right of Way -> Rowy
- Rise -> Ri
- Rise -> Rise
- River -> R
- River -> Riv
- River -> Rvr
@@ -336,7 +400,6 @@
- Roadside -> Rdsd
- Roadway -> Rdwy
- Roadway -> Rdy
- Robert -> Robt
- Rocks -> Rks
- Ronde -> Rnde
- Rosebowl -> Rsbl
@@ -344,25 +407,29 @@
- Round -> Rnd
- Route -> Rt
- Route -> Rte
- Row -> Row
- Rue -> Rue
- Run -> Run
- Saint -> St
- Saints -> SS
- Senior -> Sr
- Serviceway -> Swy
- Serviceway -> Svwy
- Shoal -> Shl
- Shore -> Shr
- Shores -> Shrs
- Shunt -> Shun
- Siding -> Sdng
- Sister -> Sr
- Skyway -> Skwy
- Slope -> Slpe
- Sound -> Snd
- South -> S
- South -> Sth
- Southeast -> SE
- Southwest -> SW
- Spur -> Spur
- Spring -> Spg
- Springs -> Spgs
- Spurs -> Spur
- Square -> Sq
- Squares -> Sqs
- Stairway -> Strwy
- State Highway -> SH
- State Highway -> SHwy
@@ -371,68 +438,78 @@
- Station -> Stn
- Strand -> Sd
- Strand -> Stra
- Stravenue -> Stra
- Stream -> Strm
- Street -> St
- Streets -> Sts
- Strip -> Strp
- Subway -> Sbwy
- Summit -> Smt
- Tarn -> Tn
- Tarn -> Tarn
- Terminal -> Term
- Terrace -> Tce
- Terrace -> Ter
- Terrace -> Terr
- Thoroughfare -> Thfr
- Thoroughfare -> Thor
- Throughway -> Trwy
- Tollway -> Tlwy
- Tollway -> Twy
- Top -> Top
- Tor -> Tor
- Towers -> Twrs
- Township -> Twp
- Trace -> Trce
- Track -> Tr
- Track -> Trak
- Track -> Trk
- Trafficway -> Trfy
- Trail -> Trl
- Trailer -> Trlr
- Triangle -> Tri
- Trunkway -> Tkwy
- Tunnel -> Tun
- Tunnel -> Tunl
- Turn -> Tn
- Turn -> Trn
- Turn -> Turn
- Turnpike -> Tpk
- Turnpike -> Tpke
- Underpass -> Upas
- Underpass -> Ups
- Union -> Un
- Unions -> Uns
- University -> Uni
- University -> Univ
- Upper -> Up
- Upper -> Upr
- Vale -> Va
- Vale -> Vale
- Valley -> Vly
- Valley -> Vy
- Valleys -> Vlys
- Viaduct -> Vdct
- Viaduct -> Via
- Viaduct -> Viad
- View -> Vw
- View -> View
- Views -> Vws
- Village -> Vill
- Village -> Vlg
- Villages -> Vlgs
- Villas -> Vlls
- Ville -> Vl
- Vista -> Vis
- Vista -> Vst
- Vista -> Vsta
- Walk -> Walk
- Walk -> Wk
- Walk -> Wlk
- Walks -> Walk
- Walkway -> Wkwy
- Walkway -> Wky
- Waters -> Wtr
- Way -> Way
- Way -> Wy
- Well -> Wl
- Wells -> Wls
- West -> W
- Wharf -> Whrf
- William -> Wm
- Wynd -> Wyn
- Wynd -> Wynd
- Yard -> Yard
- Yard -> Yd
- lang: en
country: ca

View File

@@ -30,7 +30,6 @@
- Bloque -> Blq
- Bulevar -> Blvr
- Boulevard -> Blvd
- Calle -> C/
- Calle -> C
- Calle -> Cl
- Calleja -> Cllja

View File

@@ -3,20 +3,16 @@
words:
- Abbaye -> ABE
- Agglomération -> AGL
- Aire -> AIRE
- Aires -> AIRE
- Allée -> ALL
- Allée -> All
- Allées -> ALL
- Ancien chemin -> ACH
- Ancienne route -> ART
- Anciennes routes -> ART
- Anse -> ANSE
- Arcade -> ARC
- Arcades -> ARC
- Autoroute -> AUT
- Avenue -> AV
- Avenue -> Av
- Barrière -> BRE
- Barrières -> BRE
- Bas chemin -> BCH
@@ -28,16 +24,11 @@
- Berges -> BER
- Bois -> BOIS
- Boucle -> BCLE
- Boulevard -> Bd
- Boulevard -> BD
- Bourg -> BRG
- Butte -> BUT
- Cité -> CITE
- Cités -> CITE
- Côte -> COTE
- Côteau -> COTE
- Cale -> CALE
- Camp -> CAMP
- Campagne -> CGNE
- Camping -> CPG
- Carreau -> CAU
@@ -56,17 +47,13 @@
- Chaussées -> CHS
- Chemin -> Ch
- Chemin -> CHE
- Chemin -> Che
- Chemin vicinal -> CHV
- Cheminement -> CHEM
- Cheminements -> CHEM
- Chemins -> CHE
- Chemins vicinaux -> CHV
- Chez -> CHEZ
- Château -> CHT
- Cloître -> CLOI
- Clos -> CLOS
- Col -> COL
- Colline -> COLI
- Collines -> COLI
- Contour -> CTR
@@ -74,9 +61,7 @@
- Corniches -> COR
- Cottage -> COTT
- Cottages -> COTT
- Cour -> COUR
- Cours -> CRS
- Cours -> Crs
- Darse -> DARS
- Degré -> DEG
- Degrés -> DEG
@@ -87,11 +72,8 @@
- Domaine -> DOM
- Domaines -> DOM
- Écluse -> ECL
- Écluse -> ÉCL
- Écluses -> ECL
- Écluses -> ÉCL
- Église -> EGL
- Église -> ÉGL
- Enceinte -> EN
- Enclave -> ENV
- Enclos -> ENC
@@ -100,21 +82,16 @@
- Espace -> ESPA
- Esplanade -> ESP
- Esplanades -> ESP
- Étang -> ETANG
- Étang -> ÉTANG
- Faubourg -> FG
- Faubourg -> Fg
- Ferme -> FRM
- Fermes -> FRM
- Fontaine -> FON
- Fort -> FORT
- Forum -> FORM
- Fosse -> FOS
- Fosses -> FOS
- Foyer -> FOYR
- Galerie -> GAL
- Galeries -> GAL
- Gare -> GARE
- Garenne -> GARN
- Grand boulevard -> GBD
- Grand ensemble -> GDEN
@@ -134,13 +111,9 @@
- Haut chemin -> HCH
- Hauts chemins -> HCH
- Hippodrome -> HIP
- HLM -> HLM
- Île -> ILE
- Île -> ÎLE
- Immeuble -> IMM
- Immeubles -> IMM
- Impasse -> IMP
- Impasse -> Imp
- Impasses -> IMP
- Jardin -> JARD
- Jardins -> JARD
@@ -150,13 +123,11 @@
- Lieu-dit -> LD
- Lotissement -> LOT
- Lotissements -> LOT
- Mail -> MAIL
- Maison forestière -> MF
- Manoir -> MAN
- Marche -> MAR
- Marches -> MAR
- Maréchal -> MAL
- Mas -> MAS
- Monseigneur -> Mgr
- Mont -> Mt
- Montée -> MTE
@@ -168,13 +139,9 @@
- Métro -> MÉT
- Nouvelle route -> NTE
- Palais -> PAL
- Parc -> PARC
- Parcs -> PARC
- Parking -> PKG
- Parvis -> PRV
- Passage -> PAS
- Passage -> Pas
- Passage -> Pass
- Passage à niveau -> PN
- Passe -> PASS
- Passerelle -> PLE
@@ -191,19 +158,14 @@
- Petite rue -> PTR
- Petites allées -> PTA
- Place -> PL
- Place -> Pl
- Placis -> PLCI
- Plage -> PLAG
- Plages -> PLAG
- Plaine -> PLN
- Plan -> PLAN
- Plateau -> PLT
- Plateaux -> PLT
- Pointe -> PNT
- Pont -> PONT
- Ponts -> PONT
- Porche -> PCH
- Port -> PORT
- Porte -> PTE
- Portique -> PORQ
- Portiques -> PORQ
@@ -211,25 +173,19 @@
- Pourtour -> POUR
- Presquîle -> PRQ
- Promenade -> PROM
- Promenade -> Prom
- Pré -> PRE
- Pré -> PRÉ
- Périphérique -> PERI
- Péristyle -> PSTY
- Quai -> QU
- Quai -> Qu
- Quartier -> QUA
- Raccourci -> RAC
- Raidillon -> RAID
- Rampe -> RPE
- Rempart -> REM
- Roc -> ROC
- Rocade -> ROC
- Rond point -> RPT
- Roquet -> ROQT
- Rotonde -> RTD
- Route -> RTE
- Route -> Rte
- Routes -> RTE
- Rue -> R
- Rue -> R
@@ -245,7 +201,6 @@
- Sentier -> SEN
- Sentiers -> SEN
- Square -> SQ
- Square -> Sq
- Stade -> STDE
- Station -> STA
- Terrain -> TRN
@@ -254,13 +209,11 @@
- Terre plein -> TPL
- Tertre -> TRT
- Tertres -> TRT
- Tour -> TOUR
- Traverse -> TRA
- Vallon -> VAL
- Vallée -> VAL
- Venelle -> VEN
- Venelles -> VEN
- Via -> VIA
- Vieille route -> VTE
- Vieux chemin -> VCHE
- Villa -> VLA
@@ -269,7 +222,6 @@
- Villas -> VLA
- Voie -> VOI
- Voies -> VOI
- Zone -> ZONE
- Zone artisanale -> ZA
- Zone d'aménagement concerté -> ZAC
- Zone d'aménagement différé -> ZAD
@@ -289,7 +241,6 @@
- Esplanade -> ESPL
- Passage -> PASS
- Plateau -> PLAT
- Rang -> RANG
- Rond-point -> RDPT
- Sentier -> SENT
- Subdivision -> SUBDIV

View File

@@ -29,7 +29,6 @@
- Prima -> I
- Primo -> I
- Primo -> 1
- Primo -> 1°
- Quarta -> IV
- Quarto -> IV
- Quattro -> IV

View File

@@ -1,11 +1,10 @@
# Source: https://wiki.openstreetmap.org/wiki/Name_finder:Abbreviations#Norsk_-_Norwegian
- lang: no
- lang: "no"
words:
# convert between Nynorsk and Bookmal here
- vei, veg => v,vn,vei,veg
- veien, vegen -> v,vn,veien,vegen
- gate -> g,gt
- ~vei, ~veg -> v,vei,veg
- ~veien, ~vegen -> vn,veien,vegen
# convert between the two female forms
- gaten, gata => g,gt,gaten,gata
- gate, gaten, gata -> g,gt
- plass, plassen -> pl
- sving, svingen -> sv

View File

@@ -46,7 +46,7 @@ sanitizers:
- step: strip-brace-terms
- step: tag-analyzer-by-language
filter-kind: [".*name.*"]
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,"no",pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
use-defaults: all
mode: append
- step: tag-japanese
@@ -158,7 +158,7 @@ token-analysis:
mode: variant-only
variants:
- !include icu-rules/variants-nl.yaml
- id: no
- id: "no"
analyzer: generic
mode: variant-only
variants:
@@ -183,9 +183,6 @@ token-analysis:
mode: variant-only
variants:
- !include icu-rules/variants-ru.yaml
mutations:
- pattern: ё
replacements: ["ё", "е"]
- id: sk
analyzer: generic
mode: variant-only