Modul:bho-Kthi-translit
A modult a Modul:bho-Kthi-translit/doc lapon tudod dokumentálni
-- Transliteration for Bhojpuri
local export = {}
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local conv = {
-- consonants
['𑂍'] = 'k', ['𑂎'] = 'kh', ['𑂏'] = 'g', ['𑂐'] = 'gh', ['𑂑'] = 'ṅ',
['𑂒'] = 'c', ['𑂓'] = 'ch', ['𑂔'] = 'j', ['𑂕'] = 'jh', ['𑂖'] = 'ñ',
['𑂗'] = 'ṭ', ['𑂘'] = 'ṭh', ['𑂙'] = 'ḍ', ['𑂛'] = 'ḍh', ['𑂝'] = 'ṇ',
['𑂞'] = 't', ['𑂟'] = 'th', ['𑂠'] = 'd', ['𑂡'] = 'dh', ['𑂢'] = 'n',
['𑂣'] = 'p', ['𑂤'] = 'ph', ['𑂥'] = 'b', ['𑂦'] = 'bh', ['𑂧'] = 'm',
['𑂨'] = 'y', ['𑂩'] = 'r', ['𑂪'] = 'l', ['𑂫'] = 'v', ['𑂫'] = 'v', ['ळ'] = 'ḷ',
['𑂬'] = 'ś', ['𑂭'] = 'ṣ', ['𑂮'] = 's', ['𑂯'] = 'h',
['𑂚'] = 'ṛ', ['𑂚'] = 'ṛ', ['𑂜'] = 'ṛh', ['𑂜'] = 'ṛh',
-- ['𑂔𑂹𑂖'] = 'gy',
-- vowel diacritics
['𑂱'] = 'i', ['𑂳'] = 'u', ['𑂵'] = 'e', ['𑂷'] = 'o',
['𑂰'] = 'ā', ['𑂲'] = 'ī', ['𑂴'] = 'ū',
['𑂶'] = 'ai', ['𑂸'] = 'au',
-- vowel signs
['𑂃'] = 'a', ['𑂅'] = 'i', ['𑂇'] = 'u', ['𑂉'] = 'e', ['𑂋'] = 'o',
['𑂄'] = 'ā', ['𑂆'] = 'ī', ['𑂈'] = 'ū',
['𑂊'] = 'ai', ['𑂌'] = 'au',
['ॐ'] = 'om',
-- chandrabindu
['𑂀'] = '̃',
-- anusvara
['𑂁'] = 'ṁ',
-- visarga
['𑂂'] = 'ḥ',
-- virama
['𑂹'] = '',
-- numerals
['०'] = '0', ['१'] = '1', ['२'] = '2', ['३'] = '3', ['४'] = '4',
['५'] = '5', ['६'] = '6', ['७'] = '7', ['८'] = '8', ['९'] = '9',
-- punctuation
['𑃀'] = '.', -- danda
['+'] = '', -- compound separator
-- abbreviation sign
['𑂻'] = '.',
}
local nasal_assim = {
['𑂍'] = '𑂑', ['𑂎'] = '𑂑', ['𑂏'] = '𑂑', ['𑂐'] = '𑂑',
['𑂒'] = '𑂖', ['𑂓'] = '𑂖', ['𑂔'] = '𑂖', ['𑂕'] = '𑂖',
['𑂗'] = '𑂝', ['𑂘'] = '𑂝', ['𑂙'] = '𑂝', ['𑂛'] = '𑂝',
['𑂣'] = '𑂧', ['𑂤'] = '𑂧', ['𑂥'] = '𑂧', ['𑂦'] = '𑂧', ['𑂧'] = '𑂧',
}
local perm_cl = {
['𑂧𑂹𑂪'] = true, ['𑂫𑂹𑂪'] = true, ['𑂫𑂹𑂪'] = true, ['𑂢𑂹𑂪'] = true,
}
local all_cons, special_cons = '𑂍𑂎𑂏𑂐𑂑𑂒𑂓𑂔𑂕𑂖𑂗𑂘𑂙𑂚𑂛𑂜𑂞𑂟𑂠𑂡𑂣𑂤𑂥𑂦𑂬𑂭𑂮𑂨𑂩𑂪𑂫𑂯𑂝𑂢𑂧', '𑂨𑂩𑂪𑂥𑂫𑂯𑂢𑂧'
local vowel, vowel_sign = 'a𑂰𑂱𑂲𑂳𑂴𑂵𑂶𑂷𑂸', '𑂃𑂄𑂅𑂆𑂇𑂈𑂉𑂊𑂋𑂌'
local syncope_pattern = '([' .. vowel .. vowel_sign .. '])(𑂺?[' .. all_cons .. '])a(𑂺?[' .. gsub(all_cons, "𑂨", "") .. '])([𑂁𑂀]?[' .. vowel .. vowel_sign .. '])'
local function rev_string(text)
local result, length = {}, mw.ustring.len(text)
for i = 1, length do
table.insert(result, mw.ustring.sub(text, length - i + 1, length - i + 1))
end
return table.concat(result)
end
function export.tr(text, lang, sc)
text = gsub(
text,
'([' .. all_cons .. ']𑂺?)([' .. vowel .. '𑂹]?)',
function(c, d)
return c .. (d == "" and 'a' or d)
end
)
for word in mw.ustring.gmatch(text, "[𑂀-𑃁a]+") do
local orig_word = word
word = rev_string(word)
word = gsub(
word,
'^a(𑂺?)([' .. all_cons .. '])(.)(.?)',
function(opt, first, second, third)
local a = ""
if match(first, '[' .. special_cons .. ']')
and match(second, '𑂹')
and not perm_cl[first..second..third]
or match(first .. second, '𑂨[𑂲𑂵𑂶]') then
a = "a"
end
return a .. opt .. first .. second .. third
end
)
while match(word, syncope_pattern) do
word = gsub(word, syncope_pattern, '%1%2%3%4')
end
word = gsub(
word,
'(.?)𑂁(.)',
function(succ, prev)
local mid = nasal_assim[succ] or "n"
if succ..prev == "a" then
mid = "𑂺𑂧"
elseif succ == "" and match(prev, '[' .. vowel .. ']') then
mid = "̃"
end
return succ .. mid .. prev
end
)
text = gsub(text, orig_word, rev_string(word))
end
text = gsub(text, '.𑂺?', conv)
text = gsub(text, 'a([iu])̃', 'a͠%1')
text = gsub(text, '𑂔𑂹𑂖', conv)
return mw.ustring.toNFC(text)
end
return export