Files
file-online-preview/server/libreoffice/share/numbertext/hu_Hung.sor

181 lines
4.1 KiB
Java
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Old Hungarian script (ISO 15924 code: Hung)
# Transliterate numbers and words
# convert words with traditional or foreign "i" written as "y"
# e.g. Áprily, Champs-Élysées, Élysée-palota, Dolly, Folly, Hollywood, jolly...
"^(Áp?ri?l|Champs-Él|[cC]i?t|Do?lák-Sa?l|[dfhjDFHJ]ol?l|Él|Fesz?t|[gG]rizz?l|Ha?rasz?t|Hat?va?n|Husz?t|[iI]n?ter?ci?t|Kéth?l|Ku?ko?r?el?l|Mind?szen?t|Nosz?t|[pP]enn|Pes?t|Re?gu?l|So?n|Szi?l|Szte?va?no?vi?t|Thö?kö?l|Vö?rös?mar?t|[zZ][lł]ot)y(.*) 0$" $1𐳐$2
"^(ÁP?RI?L|CHAMPS-ÉL|CI?T|DO?LÁK-SA?L|[DFHJ]OL?L|ÉL|FESZ?T|GRIZZ?L|HA?RASZ?T|HAT?VA?N|HUSZ?T|IN?TER?CI?T|KÉTH?L|KU?KO?R?EL?L|MIND?SZEN?T|NOSZ?T|PEN?N|PES?T|REGU?L|SON|SZI?L|SZTE?VA?NO?VI?T|THÖ?KÖ?L|VÖ?RÖS?MAR?T|Z[LŁ]OT)Y(.*) 0$" $1𐲐$2
# if the original word contains an unknown character, return without modification
"^(.*[^-0-9a-zA-ZáéëóöőúüűÁÉËÓÖŐÚÜŰ,„”\?\;]) 0$" \1
# words with y
"^y(ard.*) 0$" 𐳒$1
"^Y([aA][rR][dD].*|[uU][cC][oO][nN].*) 0$" 𐲒$1
"^Y([bB][lL].*) 0$" 𐲑$1
"^Y(vet?te.*) 0$" 𐲐$1
"^([bB]o|[cC]owbo|[dD]ispla|[gG]ra|[pP]la)y(.*) 0$" $1𐳒$2
"^(BO|COWBO|DISPLA|GRA|PLA)Y(.*) 0$" $1𐲒$2
# don't transliterate other words with starting y
"(^[yY].*) 0$" \1
# don't transliterate words with q, but not with qu
"(^.*[qQ][^uU].*) 0$" \1
# avoid of exceeding recursion depth
# convert by 200-character parts
(.{200})(.+) $1$2
# numbers
# remove space separated zero (in LibreOffice integration)
"(\d+) 0" $1
"0: (.*) (.*)"
"1: (.*) (.*)" \1
"2: (.*) (.*)" \1\1
"3: (.*) (.*)" \1\1\1
"4: (.*) (.*)" \1\1\1\1
"5: (.*) (.*)" \2
"6: (.*) (.*)" \2\1
"7: (.*) (.*)" \2\1\1
"8: (.*) (.*)" \2\1\1\1
"9: (.*) (.*)" \2\1\1\1\1
(\d) $(\1: 𐳺 𐳻)
(\d)(\d) $(\1: 𐳼 𐳽)$2
1(\d\d) $1𐳾
(\d)(\d\d) $1𐳾$2
1(\d\d\d)$ $1𐳿
(\d{1,3})(\d\d\d) $1𐳿$2
1(\d{6})$ $1𐳿𐳿
(\d{1,3})(\d{6}) $1𐳿𐳿$2
1(\d{9})$ $1𐳿𐳿𐳿
(\d{1,3})(\d{9}) $1𐳿𐳿𐳿$2
# numbers with letters, for example dates with affixes
"(\d+)([^ ]+)" $1$2
# letters
"^(.*) 0$" $1
a(.*) 𐳀$1
A(.*) 𐲀$1
á(.*) 𐳁$1
Á(.*) 𐲁$1
b(.*) 𐳂$1
B(.*) 𐲂$1
ccs(.*) 𐳆𐳆$1
CCS(.*) 𐲆𐲆$1
cs(.*) 𐳆$1
C[sS](.*) 𐲆$1
c(.*) 𐳄$1
C(.*) 𐲄$1
d(.*) 𐳇$1
D(.*) 𐲇$1
e(.*) 𐳉$1
E(.*) 𐲉$1
é(.*) 𐳋$1
É(.*) 𐲋$1
ä(.*) 𐳋$1
Ä(.*) 𐲋$1
ë(.*) 𐳊$1
Ë(.*) 𐲊$1
f(.*) 𐳌$1
F(.*) 𐲌$1
ggy(.*) 𐳎𐳎$1
GGY(.*) 𐲎𐲎$1
gy(.*) 𐳎$1
G[yY](.*) 𐲎$1
g(.*) 𐳍$1
G(.*) 𐲍$1
h(.*) 𐳏$1
H(.*) 𐲏$1
i(.*) 𐳐$1
I(.*) 𐲐$1
í(.*) 𐳑$1
Í(.*) 𐲑$1
j(.*) 𐳒$1
J(.*) 𐲒$1
k(.*) 𐳓$1
K(.*) 𐲓$1
lly(.*) 𐳗𐳗$1
LLY(.*) 𐲗𐲗$1
ly(.*) 𐳗$1
L[yY](.*) 𐲗$1
l(.*) 𐳖$1
L(.*) 𐲖$1
m(.*) 𐳘$1
M(.*) 𐲘$1
nny(.*) 𐳚𐳚$1
NNY(.*) 𐲚𐲚$1
ny(.*) 𐳚$1
N[ny](.*) 𐲚$1
n(.*) 𐳙$1
N(.*) 𐲙$1
o(.*) 𐳛$1
O(.*) 𐲛$1
ó(.*) 𐳜$1
Ó(.*) 𐲜$1
ö(.*) 𐳞$1
Ö(.*) 𐲞$1
ő(.*) 𐳟$1
Ő(.*) 𐲟$1
p(.*) 𐳠$1
P(.*) 𐲠$1
qu(.*) 𐳓𐳮$1 # qu->kv
Qu(.*) 𐲓𐳮$1 # Qu->Kv
QU(.*) 𐲓𐲮$1 # QU->KV
r(.*) 𐳢$1
R(.*) 𐲢$1
ssz(.*) 𐳥𐳥$1
SSZ(.*) 𐲥𐲥$1
sz(.*) 𐳥$1
S[zZ](.*) 𐲥$1
sch(.*) 𐳤$1
Sch(.*) 𐲤$1
s(.*) 𐳤$1
S(.*) 𐲤$1
tty(.*) 𐳨𐳨$1
TTY(.*) 𐲨𐲨$1
ty(.*) 𐳨$1
T[yY](.*) 𐲨$1
t(.*) 𐳦$1
T(.*) 𐲦$1
u(.*) 𐳪$1
U(.*) 𐲪$1
ú(.*) 𐳫$1
Ú(.*) 𐲫$1
ü(.*) 𐳭$1
Ü(.*) 𐲭$1
ű(.*) 𐳬$1
Ű(.*) 𐲬$1
[vw](.*) 𐳮$1
[VW](.*) 𐲮$1
x(.*) 𐳓𐳥$1 # x->ksz
^X 𐲓𐳥$1 # X->KSz
X 𐲓𐲥$1 # X->KSZ
X([A-ZÁÉËÍÓÖŐÚÜŰ].*) 𐲓𐲥$1 # X->KSZ
X(.*) 𐲓𐳥$1 # X->Ksz
y(.*) 𐳐$1 # .+y->i
Y(.*) 𐲐$1 # .+Y->I
zzs(.*) 𐳰𐳰$1
ZZS(.*) 𐲰𐲰$1
zs(.*) 𐳰$1
Z[sS](.*) 𐲰$1
z(.*) 𐳯$1
Z(.*) 𐲯$1
# remove ZWSP (used for consonant disambiguation)
(.*) $1
# punctuation
(.*) $1
\;(.*) $1
\?(.*) $1
,(.*) $1
(.*) $1
# don't modify unknown characters
(.)(.*) \1$2
(.*) \1