local export = {}
local m_data = mw.loadData("Module:typing-aids/data")
local reorderDiacritics = require("Module:grc-utilities").reorderDiacritics
local formatLink = require("Module:template link").format_link
local listToSet = require("Module:table").listToSet
--[=[
Other data modules:
-- [[Module:typing-aids/data/ar]]
-- [[Module:typing-aids/data/fa]]
-- [[Module:typing-aids/data/gmy]]
-- [[Module:typing-aids/data/grc]]
-- [[Module:typing-aids/data/hit]]
-- [[Module:typing-aids/data/hy]]
-- [[Module:typing-aids/data/sa]]
-- [[Module:typing-aids/data/sux]]
-- [[Module:typing-aids/data/got]]
-- [[Module:typing-aids/data/psu]]
--]=]
local U = mw.ustring.char
local gsub = mw.ustring.gsub
local find = mw.ustring.find
local acute = U(0x0301)
local macron = U(0x0304)
local function load_or_nil(module_name)
local success, module = pcall(mw.loadData, module_name)
if success then
return module
end
end
-- Try to load a list of modules. Return the first successfully loaded module
-- and its name.
local function get_module_and_title(...)
for i = 1, select("#", ...) do
local module_name = select(i, ...)
if module_name then
local module = load_or_nil(module_name)
if module then
return module, module_name
end
end
end
end
local function clone_args(frame)
local args = frame.getParent and frame:getParent().args or frame
local newargs = {}
for k, v in pairs(args) do
if v ~= "" then
newargs[k] = v
end
end
return newargs
end
local function tag(text, lang)
if lang and not find(lang, "%-tr$") then
return '<span lang="' .. lang .. '">' .. text .. '</span>'
else
return text
end
end
local acute_decomposer
-- compose Latin text, then decompose into sequences of letter and combining
-- accent, either partly or completely depending on the language.
local function compose_decompose(text, lang)
if lang == "sa" or lang == "hy" or lang == "xcl" or lang == "psu" then
acute_decomposer = acute_decomposer or m_data.acute_decomposer
text = mw.ustring.toNFC(text)
text = gsub(text, ".", acute_decomposer)
else
text = mw.ustring.toNFD(text)
end
return text
end
local function doSequentialSimpleReplacements(text, seriesOfReplacements)
for _, replacements in ipairs(seriesOfReplacements) do
for k, v in pairs(replacements) do
text = text:gsub(k, v)
end
end
return text
end
local function doComplexReplacements(text, replacements)
local empty = { "", "" }
for shortcut, symbol in pairs(replacements) do
if type(symbol) == "table" then
local before =
symbol.before and { "("..symbol.before..")", "%1" }
or empty
local after =
symbol.after and { "("..symbol.after..")", symbol.before and "%2" or "%1" }
or empty
text = gsub(text, before[1]..shortcut..after[1], before[2]..symbol[1]..after[2])
elseif type(symbol) == "string" then
text = gsub(text, shortcut, symbol)
end
end
text = mw.text.trim(text)
return text
end
local function doSequentialComplexReplacements(text, seriesOfReplacements)
for _, v in ipairs(seriesOfReplacements) do
text = doComplexReplacements(text, v)
end
return text
end
local function getReplacements(lang, script)
local module_data = m_data.modules[lang]
local replacements_module
if not module_data then
replacements_module = m_data
else
local success
local resolved_name = "Module:typing-aids/data/"
.. (module_data[1] or module_data[script] or module_data.default)
replacements_module = load_or_nil(resolved_name)
if not replacements_module then
error("Data module " .. resolved_name
.. " specified in 'modules' table of [[Module:typing-aids/data]] does not exist.")
end
end
local replacements
if not module_data then
if lang then
replacements = replacements_module[lang]
else
replacements = replacements_module.all
end
elseif module_data[2] then
replacements = replacements_module[module_data[2]]
else
replacements = replacements_module
end
return replacements
end
local function interpretShortcuts(text, origlang, script, untouchedDiacritics, moduleName)
mw.log(text, origlang, script, untouchedDiacritics, moduleName)
if not text or type(text) ~= "string" then
return nil
end
local lang = origlang
if lang == "xcl" then lang = "hy" end
local replacements = moduleName and load_or_nil("Module:typing-aids/data/" .. moduleName)
or getReplacements(lang, script)
or error("The language code \"" .. tostring(origlang) ..
"\" does not have a set of replacements in Module:typing-aids/data or its submodules.")
-- Hittite transliteration must operate on composed letters, because it adds
-- diacritics to Basic Latin letters: s -> š, for instance.
if lang ~= "hit-tr" then
text = compose_decompose(text, lang)
end
if lang == "ae" or lang == "sa" or lang == "got" or lang == "hy" or lang == "xcl" or lang == "psu" then
local replacements_module, replacements_module_name =
get_module_and_title("Module:typing-aids/data/" .. lang,
script and "Module:typing-aids/data/" .. script or nil)
local transliterationTable = replacements_module[lang .. "-tr"]
or script and replacements_module[script .. "-tr"]
or error("Field " .. lang .. "-tr"
.. (script and " or " .. script .. "-tr" or "")
.. " not found in [[" .. replacements_module_name .. "]].")
text = doSequentialSimpleReplacements(text, transliterationTable)
text = compose_decompose(text, lang)
text = doSequentialComplexReplacements(text, replacements)
else
if replacements[1] then
text = doSequentialComplexReplacements(text, replacements)
else
text = doComplexReplacements(text, replacements)
end
if lang == "grc" and not untouchedDiacritics then
text = reorderDiacritics(text)
end
end
return text
end
local function hyphenSeparatedReplacements(text, lang)
local module = mw.loadData("Module:typing-aids/data/" .. lang)
local replacements = module[lang] or module
if not replacements then
error("??")
end
text = text:gsub("<sup>(.-)</sup>%-?", "%1-")
if replacements.pre then
for k, v in pairs(replacements.pre) do
text = gsub(text, k, v)
end
end
local output = {}
-- Find groups of characters that aren't hyphens or whitespace.
for symbol in text:gmatch("([^%-%s]+)") do
table.insert(output, replacements[symbol] or symbol)
end
return table.concat(output)
end
local function addParameter(list, args, key, content)
if not content then content = args[key] end
args[key] = nil
if not content then return false end
if find(content, "=") or type(key) == "string" then
table.insert(list, key .. "=" .. content)
else
while list.maxarg < key - 1 do
table.insert(list, "")
list.maxarg = list.maxarg + 1
end
table.insert(list, content)
list.maxarg = key
end
return true
end
local function addAndConvertParameter(list, args, key, altkey1, altkey2, trkey, lang)
if altkey1 and args[altkey1] then
addAndConvertParameter(list, args, key, nil, nil, nil, lang)
key = altkey1
elseif altkey2 and args[altkey2] then
addAndConvertParameter(list, args, key, nil, nil, nil, lang)
key = altkey2
end
local content = args[key]
if trkey and args[trkey] then
if not content then
content = args[trkey]
args[trkey] = nil
else
if args[trkey] ~= "-" then
error("Can't specify manual translit " .. trkey .. "=" ..
args[trkey] .. " along with parameter " .. key .. "=" .. content)
end
end
end
if not content then return false end
local trcontent = nil
-- If Sanskrit or Sauraseni Prakrit and there's an acute accent specified somehow or other
-- in the source content, preserve the translit, which includes the
-- accent when the Devanagari doesn't.
if lang == "sa" or lang == "psu" then
local proposed_trcontent = interpretShortcuts(content, lang .. "-tr")
if find(proposed_trcontent, acute) then
trcontent = proposed_trcontent
end
end
-- If Gothic and there's a macron specified somehow or other
-- in the source content that remains after canonicalization, preserve
-- the translit, which includes the accent when the Gothic doesn't.
if lang == "got" then
local proposed_trcontent = interpretShortcuts(content, "got-tr")
if find(proposed_trcontent, macron) then
trcontent = proposed_trcontent
end
end
--[[
if lang == "gmy" then
local proposed_trcontent = interpretShortcuts(content, "gmy-tr")
if find(proposed_trcontent, macron) then
trcontent = proposed_trcontent
end
end
--]]
if lang == "hit" or lang == "akk" then
trcontent = interpretShortcuts(content, lang .. "-tr")
content = hyphenSeparatedReplacements(content, lang)
elseif lang == "sux" or lang == "gmy" then
content = hyphenSeparatedReplacements(content, lang)
else
content = interpretShortcuts(content, lang, args.sc, nil, args.module)
end
addParameter(list, args, key, content)
if trcontent then
addParameter(list, args, trkey, trcontent)
end
return true
end
local isCompound = listToSet{ "affix", "af", "suffix", "suf", "prefix", "pre", }
-- Technically lang, ux, and uxi aren't link templates, but they have many of the same parameters.
local isLinkTemplate = listToSet{
"m", "m+", "langname-mention", "l", "ll",
"cog", "noncog", "cognate", "ncog",
"m-self", "l-self",
"desc", "lang", "usex", "ux", "uxi"
}
local isTwoLangLinkTemplate = listToSet{ "der", "inh", "bor", "calque", "cal", "translit" }
local isTransTemplate = listToSet{ "t", "t+", "t-check", "t+check" }
local function printTemplate(args)
local parameters = {}
for key, value in pairs(args) do
parameters[key] = value
end
local template = parameters[1]
local result = { }
local lang = nil
result.maxarg = 0
addParameter(result, parameters, 1)
lang = parameters[2]
addParameter(result, parameters, 2)
if isLinkTemplate[template] then
addAndConvertParameter(result, parameters, 3, "alt", 4, "tr", lang)
for _, param in ipairs({ 5, "gloss", "t" }) do
addParameter(result, parameters, param)
end
elseif isTwoLangLinkTemplate[template] then
lang = parameters[3]
addParameter(result, parameters, 3)
addAndConvertParameter(result, parameters, 4, "alt", 5, "tr", lang)
for _, param in ipairs({ 6, "gloss", "t" }) do
addParameter(result, parameters, param)
end
elseif isTransTemplate[template] then
addAndConvertParameter(result, parameters, 3, "alt", nil, "tr", lang)
local i = 4
while true do
if not parameters[i] then
break
end
addParameter(result, parameters, i)
end
elseif isCompound[template] then
local i = 1
while true do
local sawparam = addAndConvertParameter(result, parameters, i + 2, "alt" .. i, nil, "tr" .. i, lang)
if not sawparam then
break
end
for _, param in ipairs({ "id", "lang", "sc", "t", "pos", "lit" }) do
addParameter(result, parameters, param .. i)
end
i = i + 1
end
else
error("Unrecognized template name '" .. template .. "'")
end
-- Copy any remaining parameters
for k in pairs(parameters) do
addParameter(result, parameters, k)
end
return "{{" .. table.concat(result, "|") .. "}}"
end
function export.link(frame)
local args = frame.args or frame
return printTemplate(args)
end
function export.replace(frame)
local args = clone_args(frame)
local text, lang
if args[4] or args[3] or args.tr then
return printTemplate(args)
else
if args[2] then
lang, text = args[1], args[2]
else
lang, text = "all", args[1]
end
end
if lang == "akk" or lang == "gmy" or lang == "hit" or lang == "sux" then
return hyphenSeparatedReplacements(text, lang)
else
text = interpretShortcuts(text, lang, args.sc, args.noreorder, args.module)
end
return text or ""
end
function export.example(frame)
local args = clone_args(frame)
local text, lang
if args[2] then
lang, text = args[1], args[2]
else
lang, text = "all", args[1]
end
local textparam
if find(text, "=") then
textparam = "2="..text -- Currently, "=" is only used in the shortcuts for Greek, and Greek is always found in the second parameter, since the first parameter specify the language, "grc".
else
textparam = text
end
local template = {
[1] = "subst:chars",
[2] = lang ~= "all" and lang or textparam,
[3] = lang ~= "all" and textparam or nil,
}
local output = { formatLink(template) }
table.insert(output, "\n| ")
table.insert(output, lang ~= "all" and "<span lang=\""..lang.."\">" or "")
table.insert(output, export.replace({lang, text}))
table.insert(output, lang ~= "all" and "</span>" or "")
return table.concat(output)
end
function export.examples(frame)
local args = frame.getParent and frame:getParent().args or frame.args[1] and frame.args or frame
local examples = args[1] and mw.text.split(args[1], ";%s+") or error('No content in the first parameter.')
local lang = args["lang"]
local output = {
[[
{| class="wikitable"
! shortcut !! result
]]
}
local row = [[
|-
| templateCode || result
]]
for _, example in pairs(examples) do
local textparam
if find(example, "=") then
textparam = "2=" .. example -- Currently, "=" is only used in the shortcuts for Greek, and Greek is always found in the second parameter, since the first parameter specify the language, "grc".
else
textparam = example
end
local template = {
[1] = "subst:chars",
[2] = lang or textparam,
[3] = lang and textparam,
}
local result = export.replace{lang, example}
local content = {
templateCode = formatLink(template),
result = tag(result, lang),
}
local function addContent(item)
if content[item] then
return content[item]
else
return 'No content for "' .. item .. '".'
end
end
local row = gsub(row, "%a+", addContent)
table.insert(output, row)
end
return table.concat(output) .. "|}"
end
return export