Modul:he-utilities
A modult a Modul:he-utilities/doc lapon tudod dokumentálni
local export = {}
local m_utilities = require("Module:utilities")
local lang = require("Module:languages").getByCode("he")
local sc = require("Module:languages").getByCode("Hebr")
-- A wrapper function allowing the contents of this module to be called from
-- templates. For example, '{{#invoke:he-utilities|main|otSofit|כ}}' produces
-- 'ך', as does '{{#invoke:he-utilities|main|letters|kafSofit}}'.
function export.main(frame)
if type(export[frame.args[1]]) == 'function' then
return export[frame.args[1]](frame.args[2])
else
return export[frame.args[1]][frame.args[2]]
end
end
-- A mapping from strings containing letter-names, e.g. "alef", to strings
-- containing letters, e.g. "\215\144". (The latter is a UTF-8-encoded string
-- containing the single Unicode character U+05D0 HEBREW LETTER ALEF.)
export.letters = {}
for i, name in ipairs({ 'alef', 'bet', 'gimel', 'dalet', 'hei', 'vav', 'zayen',
'khet', 'tet', 'yud', 'kafSofit', 'kaf', 'lamed',
'memSofit', 'mem', 'nunSofit', 'nun', 'samekh', 'ayin',
'peiSofit', 'pei', 'tsadiSofit', 'tsadi', 'kuf', 'resh',
'shin', 'tav' }) do
export.letters[name] = mw.ustring.char(0x05D0 + i - 1)
end
export.letters.sin = export.letters.shin
-- Same as previous, but for vowels instead of letters. (It also includes a few
-- marks and diacritics that aren't quite "vowels", but are in the same sequence
-- of Unicode characters.)
export.vowels = {}
for i, name in ipairs({ 'shva', 'khatafSegol', 'khatafPatakh', 'khatafKamats',
'khirik', 'tseirei', 'segol', 'patakh', 'kamats',
'kholam', 'ignoreMe', 'kubuts', 'dagesh', 'meteg',
'makaf', 'rafe', 'pasek', 'shinDot', 'sinDot',
'sofPasuk', 'upperDot' }) do
export.vowels[name] = mw.ustring.char(0x05B0 + i - 1)
end
export.vowels.ignoreMe = nil
export.vowels.mapik = export.vowels.dagesh
-- If letter is kaf, mem, nun, pei, or tsadi, returns kaf sofit, mem sofit,
-- etc., respectively; otherwise, just returns letter.
function export.otSofit(letter)
if letter == export.letters.kaf or letter == export.letters.mem
or letter == export.letters.nun or letter == export.letters.pei
or letter == export.letters.tsadi then
return mw.ustring.char(mw.ustring.codepoint(letter) - 1)
else
return letter
end
end
-- Same as previous, except that if letter is kaf or kaf sofit, also tacks on
-- a sh'va (since kaf sofit is written with a sh'va when it has no other vowel).
function export.otSofitShva(letter)
letter = export.otSofit(letter)
if letter == export.letters.kafSofit then
return letter .. export.vowels.shva
else
return letter
end
end
-- If letter is kaf sofit, mem sofit, etc., returns kaf, mem, etc.; otherwise,
-- just returns letter.
function export.otLoSofit(letter)
if letter == export.letters.kafSofit or letter == export.letters.memSofit
or letter == export.letters.nunSofit or letter == export.letters.peiSofit
or letter == export.letters.tsadiSofit then
return mw.ustring.char(mw.ustring.codepoint(letter) + 1)
else
return letter
end
end
-- If letter is bet, gimel, dalet, kaf, pei, or tav, returns letter plus a
-- dagesh; otherwise, just returns letter.
function export.dageshKal(letter)
if letter == export.letters.bet or letter == export.letters.gimel
or letter == export.letters.dalet or letter == export.letters.kaf
or letter == export.letters.pei or letter == export.letters.tav then
return letter .. export.vowels.dagesh
else
return letter
end
end
-- If letter is shin plus a shin or sin dot, returns shin (without the dot);
-- otherwise, just returns letter.
function export.dotlessShin(letter)
if letter == export.letters.shin .. export.letters.shinDot
or letter == export.letters.sin .. export.letters.sinDot then
return export.letters.shin
else
return letter
end
end
local letters = "[א-ת]"
local modifiers = "[ּׁׂׄ]?"
local separators = "[-־ %.,!|]?"
local regex = "(" .. letters .. modifiers .. ")" .. separators
local medial_radicals = {
["א"] = true,
["ב"] = true,
["ג"] = true,
["ד"] = true,
["ה"] = true,
["הּ"] = false,
["ו"] = true,
["ז"] = true,
["ח"] = true,
["ט"] = true,
["י"] = true,
["כ"] = true,
["ל"] = true,
["מ"] = true,
["נ"] = true,
["ס"] = true,
["ע"] = true,
["פ"] = true,
["צ"] = true,
["ק"] = true,
["ר"] = true,
["שׁ"] = true,
["שׂ"] = true,
["ת"] = true,
}
local initial_radicals = medial_radicals
local final_radicals = {
["א"] = true,
["ב"] = true,
["ג"] = true,
["ד"] = true,
["ה"] = true,
["הּ"] = true,
["ו"] = false,
["ז"] = true,
["ח"] = true,
["ט"] = true,
["י"] = false,
["ך"] = true,
["ל"] = true,
["ם"] = true,
["ן"] = true,
["ס"] = true,
["ע"] = true,
["ף"] = true,
["ץ"] = true,
["ק"] = true,
["ר"] = true,
["שׁ"] = true,
["שׂ"] = true,
["ת"] = true,
}
local radical_romanizations = {
["א"] = "ʾ",
["ב"] = "b",
["ג"] = "g",
["ד"] = "d",
["ה"] = "h",
["ו"] = "w",
["ז"] = "z",
["ח"] = "ḥ",
["ט"] = "ṭ",
["י"] = "y",
["[כך]"] = "k",
["ל"] = "l",
["[מם]"] = "m",
["[נן]"] = "n",
["ס"] = "s",
["ע"] = "ʿ",
["[פף]"] = "p",
["[ץצ]"] = "ṣ",
["ק"] = "q",
["ר"] = "r",
["שׁ"] = "š",
["שׂ"] = "ś",
["ת"] = "t",
}
local function transliterate_root(root_string)
local romanized_root = root_string:gsub("־", "-")
for k, v in pairs(radical_romanizations) do
romanized_root = mw.ustring.gsub(romanized_root, k, v)
end
return romanized_root
end
function export.plain_root(frame)
local radicals = {}
local len = 0
local subber = function(radical)
table.insert(radicals, radical)
len = len + 1
return ""
end
local scraps = mw.ustring.gsub(frame.args[1], regex, subber)
if scraps ~= "" then
error("Unrecognized characters in root.")
end
if len < 2 then
error("Root must have at least two radicals.")
end
for i, radical in ipairs(radicals) do
if i == 1 then -- initial
if not initial_radicals[radical] then
error("Unrecognized initial radical " .. radical .. ".")
end
elseif i == len then -- final
if not final_radicals[radical] then
error("Unrecognized final radical " .. radical .. ".")
end
else -- medial
if not medial_radicals[radical] then
error("Unrecognized medial radical " .. radical .. ".")
end
end
end
return table.concat(radicals, "־")
end
function export.romanized_root(frame)
local root = export.plain_root(frame)
return transliterate_root(root)
end
function export.catfix()
return m_utilities.catfix(lang, sc)
end
return export