Modul:zh-forms
A modult a Modul:zh-forms/doc lapon tudod dokumentálni
local export = {}
local m_zh = require("Module:zh")
local links = require("Module:links")
local lang = require("Module:languages")
local find = mw.ustring.find
local gsub = mw.ustring.gsub
local match = mw.ustring.match
local len = mw.ustring.len
local sc = {
["trad"] = "Hant",
["simp"] = "Hans",
["both"] = "Hani",
}
function export.make(frame)
local params = {
[1] = { list = true, allow_holes = true, allow_empty = true },
["s"] = { list = true },
["t"] = { list = true },
["alt"] = {}, ["type"] = {}, ["delink"] = {}, ["lit"] = {}, ["note"] = {}, ["gloss"] = {}
}
local args = require("Module:parameters").process(frame:getParent().args, params)
local comp_type = args["type"]
local s, t = {}, {}
local annotation = {}
if not frame:getParent().args["t"] then
table.insert(t, 1, mw.title.getCurrentTitle().subpageText)
end
local function insert_st(set, text)
if find(text, "^ ") or find(text, " $") then
error("Please remove the leading and / or trailing space(s) in the 's' and 't' parameters.")
end
table.insert(set, text)
end
for i = 1, #args.s do insert_st(s, args.s[i]) end
for i = 1, #args.t do insert_st(t, args.t[i]) end
--temp tracking
if #s == 0 and (gsub(t[1], '.', require("Module:zh/data/ts").ts)) ~= t[1] then
require('Module:debug').track('zh-forms/entry possibly missing a simplified form')
end
if #t == 1 and find(t[1], "[濕溼裡裏群羣床牀衛衞污汚為爲偽僞炮砲秘祕麵麪喧諠嘩譁鄰隣臺輓遊閒線綫]") then
require('Module:debug').track('zh-forms/entry possibly missing a variant form')
end
s.name = "simp"
t.name = "trad"
if #t ~= 1 and #s == 0 then
table.insert(s, t[1])
end
local function asterisk(term, iscomp)
if iscomp and len(t[1]) > 1 then return "" end
if term == mw.title.getCurrentTitle().subpageText or not (mw.title.new(term) or {}).exists then return "" end
local content = mw.title.new(term):getContent()
content = gsub(content, "zh%-pron", "Ꙁ")
content = gsub(content, "zh%-see", "Ꙁ")
content = gsub(content, "[^Ꙁ]", "")
return mw.ustring.len(content) > 1 and '<sup><span class="explain" title="This form has one or more other meanings.">*</span></sup>' or ''
end
local function var_fmt(length, color)
return '\n|-\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
'em;border: 1px solid #aaa;background: #' .. (color or 'E0FFFF') ..
';font-weight: normal;font-size: smaller;" colspan="2" |'
end
local function char_gap(length, script, last)
return '\n| style="padding: 0.' .. (length > 8 and '3' or '5') ..
'em; background-color:white;' ..
(last and 'border-right: 1px solid #aaa;border' .. (length ~= 1 and '-bottom' or '') .. ': 1px solid #aaa; '
or 'border-bottom: 1px solid #aaa; ') ..
'font-size:x-large" lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '" | '
end
local function header(length, var_count)
return ((length > 3 or var_count * length > 5) and ':{|' or '{| align=right') ..
' style="clear: right;margin: 1em;border-collapse: collapse;text-align: center"' ..
(length ~= 1 and '\n|-\n! colspan=2|' or '')
end
local function gloss_fmt(word, colspan, length)
return '\n! style="padding: 0.' .. (length > 8 and '3' or '5') ..
'em;border: 1px solid #aaa;background:#F5F5DC;font-weight: normal;font-size: 85%; width:' ..
(length <= 8 and (30 * word + 30) or (25 * word + 25)) .. 'px" colspan=' .. (colspan or 1) ..'|'
end
local function form_fmt(text, length, script)
local fmtd_text = {}
for i,value in ipairs(text) do
fmtd_text[i] = links.language_link({ lang = lang.getByCode("zh"), term = value }) .. asterisk(value, false)
end
return length ~= 1 and ((length > 8 and '' or '<span style="font-size:140%">') ..
'(<span lang="zh-' .. sc[script] .. '" class="' .. sc[script] .. '">' .. table.concat(fmtd_text, "/") .. '</span>)' ..
(length > 8 and '' or '</span>')) or ''
end
local function char_fmt(text, length, script)
return (#text ~= 1 and (char_gap(length, script) .. table.concat(text, char_gap(length, script), 1, #text-1)) or '') .. char_gap(length, script, true) .. text[#text]
end
local test_word = t[1]
local length = len(test_word)
local word_division = {}
local i = 1
local decomposable
if comp_type then
for index in mw.text.gsplit(comp_type, "", true) do
if find(mw.ustring.sub(test_word, i, i), '[,%-]') then
table.insert(word_division, { i, i } )
i = i + 1
elseif mw.ustring.sub(test_word, i, i) == '…' then
table.insert(word_division, { i, i + 1 } )
i = i + 2
end
table.insert(word_division, { i, i + index - 1 } )
i = i + index
end
if i - 1 ~= len(gsub(test_word, '…+$', '')) and not find(table.concat(t) .. table.concat(s), "[⿰⿱⿲⿳⿴⿵⿶⿷⿸⿹⿺⿻]") then
error("'type' parameter does not match word length.")
end
else
for i = 1, length do
table.insert(word_division, { i, i } )
end
decomposable = len(gsub(test_word, '…+$', '')) > 2 and true or false
end
local delink = {}
if args["delink"] then
if args["delink"] == "y" then
for del_index, _ in ipairs(word_division) do
delink[del_index] = "yes"
end
else
for position in mw.text.gsplit(args["delink"], ",") do
delink[tonumber(position)] = "yes"
end
end
end
local char_set = { ['simp'] = {}, ['trad'] = {} }
local identity = #s == 0 and {t} or {s,t}
local uncreated = {}
for _, id in ipairs(identity) do
for i, position in ipairs(word_division) do
local char_string = ""
for j = 1, #id do
local word_form = mw.ustring.sub(id[j], position[1], position[2])
if not find(char_string, word_form) then
char_string = (char_string ~= "" and (char_string .. '/') or "") .. word_form
end
end
if not find(char_string, '[,%-]') then
local hash = {}
for thing in mw.text.gsplit(char_string, (delink[i] and "" or "/")) do
table.insert(hash, links.language_link({ lang = lang.getByCode("zh"), term = thing }) .. asterisk(thing, true))
end
char_string = table.concat(hash, (delink[i] and "" or "/"))
end
table.insert(char_set[id.name], char_string)
end
for _, item in ipairs(id) do
if not (mw.title.new(item) or {}).exists and item ~= mw.title.getCurrentTitle().subpageText then
table.insert(uncreated, '"[[' .. item .. ']]"')
end
end
end
local scripts = { ['一-鿼㐀-䶿𠀀-𪛝𪜀-𫜴𫝀-𫠝𫠠-𬺡𬺰-𮯠𰀀-𱍊'] = 'Hani' , ['a-zA-ZāēīōūĀĒĪŌŪa-zA-Z'] = 'Latn', ['0-90-9'] = 'Numb', ['Ͱ-Ͽ'] = 'Grek' }
local script = {}
for range, script_name in pairs(scripts) do
if find(test_word, '[' .. range .. ']') then
table.insert(script, script_name)
end
end
if find(t[1], "([^─…]+)%1") and args['gloss'] ~= '-' and len(t[1]) < 7 then
if gsub(comp_type or "", "1", "") == "" then
table.insert(annotation, '[[Category:Chinese reduplications]]')
elseif find(t[1], "([^…][^…]+)%1") or find(table.concat(char_set['trad'], " "), "([^─…%[%]a-z]+)%1") then
local evil
for _, component in ipairs(char_set['trad']) do
if len(component) > 1 then
local comp_content = mw.title.new(links.remove_links(component)):getContent() or false
if not comp_content or find(comp_content, "|gloss=-") or not find(comp_content, "==Chinese==") then
evil = true
end
end
end
if not evil then table.insert(annotation, '[[Category:Chinese reduplications]]') end
end
end
table.insert(annotation, #script > 1 and '[[Category:Chinese terms written in multiple scripts]]' or nil)
--table.insert(annotation, (decomposable and args['gloss'] ~= '-' and not args['note'] and not args['lit']) and '[[Category:Chinese entries with potentially decomposable titles]]' or nil)
--table.insert(annotation, (#uncreated > 0 and mw.title.getCurrentTitle().nsText == "") and '[[Category:Chinese terms with uncreated forms]]' ..
--'<small class="attentionseeking">(' .. (#uncreated == 1 and 'This form' or 'These forms') ..
--' in the hanzi box ' .. (#uncreated == 1 and 'is' or 'are') .. ' uncreated: ' ..
--table.concat(uncreated, ", ") .. '.)</small>' or nil)
local altforms = ""
if args["alt"] then
local altform_list = {}
for altform in mw.text.gsplit(args["alt"], ",") do
local altdecomp = mw.text.split(altform, ":")
local altdecomp2 = mw.text.split(altdecomp[1], "-")
altdecomp3 = altdecomp2[2] and ' <span style="font-size:70%"><i>' .. gsub(altdecomp2[2], "‡", frame:expandTemplate{
title = "Template:zh-historical-dict",
args = { type = "form", nocat = "1" }
}) .. '</i></span>' or ''
table.insert(altform_list, '<span style="white-space:nowrap;">' ..
m_zh.link(nil, nil, { altdecomp2[1], tr = (altdecomp[2] or "-") }, mw.title.getCurrentTitle().subpageText) ..
altdecomp3 .. '</span>')
end
if #altform_list > 5 then
altforms = '<div class="vsSwitcher" data-toggle-category="Chinese alternative forms"><span class="vsToggleElement"> </span>' ..
'<div class="vsShow">' .. table.concat(altform_list, "<br>", 1, 5) ..
'</div><div class="vsHide">' .. table.concat(altform_list, "<br>") .. '</div></div>'
else
altforms = table.concat(altform_list, "<br>")
end
altforms = var_fmt(length, 'F0FFE0') .. 'alternative forms' .. mw.ustring.sub(char_gap(length, "trad", true), 1, -45)
.. 'font-size:90%; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. altforms
end
local anagram = ""
if len(t[1]) == 2 and not find(t[1], "(.)%1") then
local anagram_link = mw.ustring.sub(t[1], 2, 2) .. mw.ustring.sub(t[1], 1, 1)
local anagram_content = mw.title.new(anagram_link):getContent() or false
if anagram_content and find(anagram_content, "==Chinese==") then
anagram = var_fmt(length, 'F0FFE0') .. 'anagram' .. mw.ustring.sub(char_gap(length, 'trad', true), 1, -45)
.. 'font-size:90%; padding-top: 10px; padding-bottom: 10px; border-right: 1px solid #aaa; border-bottom: 1px solid #aaa"'
.. ' colspan="' .. #word_division .. '"|' .. m_zh.link(nil, nil, { anagram_link, tr = "-" }, mw.title.getCurrentTitle().subpageText)
end
end
local literal = (args["lit"] or args["note"]) and '\n|-' ..
gloss_fmt(length, #word_division + 2, length) ..
(args["lit"] and '<i>Literally:</i> “' .. args["lit"] ..
(find(args["lit"], "%.$") and "”" or "”.") or args["note"]) or ""
local gloss = {}
if args['gloss'] == '-' then
gloss = { gloss_fmt(length * 1.6, #word_division, length) .. '<i>phonetic</i>' }
elseif length == 1 then
gloss = {}--{ gloss_fmt(1.6, #word_division) .. '-' }
else
for i, position in ipairs(word_division) do
local character = mw.ustring.sub(t[1], position[1], position[2])
local gloss_text = args[1][i]
-- Load glosses module if no gloss was supplied and the word is a single character.
if not gloss_text and position[1] == position[2] then
local glosses = mw.loadData("Module:zh/data/glosses")
gloss_text = glosses.glosses[character] or ""
gloss_text = gsub(gloss_text, "{{taxlink|([^{}]+)}}",
function (taxlink_text)
local taxlink_args, argi = {}, 1
for arg in mw.text.gsplit(taxlink_text, "|") do
local arg_split = mw.text.split(arg, "=")
if arg_split[2] then
taxlink_args[arg_split[1]] = arg_split[2]
else
taxlink_args[argi] = (arg ~= "" and arg or nil)
argi = argi + 1
end
end
local frame = mw.getCurrentFrame()
return frame:expandTemplate{
title = 'taxlink',
args = taxlink_args
}
end)
if gloss_text == "" and find(character, "^[一-龯㐀-䶵]+$") and not find(glosses.nonlemma, character) then
require('Module:debug').track('zh-forms/no gloss found for Chinese character')
end
end
if gloss_text == "-" then gloss_text = "''phonetic''" end
if not gloss_text then
gloss_text = ""
end
--[[
To ensure that suffixes are not broken up between lines, like this:
-
ist
]]
if find(gloss_text, "-", nil, true) then
local nonbreaking_hyphen = mw.ustring.char(0x2011)
gloss_text = mw.ustring.gsub(gloss_text, "^%-", nonbreaking_hyphen)
gloss_text = mw.ustring.gsub(gloss_text, "(%s)%-", "%1" .. nonbreaking_hyphen)
end
if gloss_text == "" and position[2] > position[1] then
local senses = {}
local content = mw.title.new(character):getContent() or false
if content then
gloss_text = m_zh.extract_gloss(content, false)
if gloss_text == "" and find(character, "^[一-龯㐀-䶵]+$") then
require('Module:debug').track('zh-forms/no gloss found but entry exists')
end
if not string.match(content, character) then
require('Module:debug').track('zh-forms/compounds not mentioned in derived terms on the component pages')
end
else
if gloss_text == "" and find(character, "^[一-龯㐀-䶵]+$") then
require('Module:debug').track('zh-forms/no gloss found with a nonexistent entry')
end
end
end
gloss_text = gsub(gloss_text, ";[^a-zA-Z0-9]+;", ";")
gloss_text = gsub(gloss_text, ";[^a-zA-Z0-9]*$", "")
gloss_text = gsub(gloss_text, ";+", ";")
if len(gsub(gloss_text, '[^;]', '')) > 3 then
gloss_text = '<div class="vsSwitcher" data-toggle-category="glosses"><span class="vsToggleElement"> </span><div class="vsShow">' ..
match(gloss_text, '^[^;]+;[^;]+;[^;]+') .. '</div></div>'
end
local word_length = match(character, '[,…%-]') and 0 or
(delink[i] and len(character) or len(character) * (len(gsub(char_set['trad'][i], '[^/]', '')) + 1))
table.insert(gloss, gloss_fmt(word_length, 1, length) .. gloss_text)
end
end
local PAGENAME = mw.title.getCurrentTitle().text
local content = mw.title.new(PAGENAME):getContent()
if content then
local applicable_pos = { ["Noun"] = 1, ["Verb"] = 1, ["Adjective"] = 1, ["Adverb"] = 1,
["Definitions"] = 1, ["Pronunciation"] = 1 }
local previous_level = 2
local subheading_wanted
--[=[
-- Used under headers for Chinese varieties, for instance in [[āu-piah]]
local Chinese_section = string.match(content, "\n==Chinese==\n(.-)\n==[^=]")
if not Chinese_section then
error("No Chinese section found.")
end
--]=]
if length == 1 and not string.match(content, "===Definitions===") then
require('Module:debug').track('zh-forms/no definitions section found')
end
if length == 1 and not string.match(content, "===Compounds===") and string.match(content, "zh-der") then
require('Module:debug').track('zh-forms/derived terms probably needing renaming')
end
for equals, heading_text in string.gmatch(content, "\n(%=%=+)([^%=]+)%=%=+") do
local current_level = #equals
if subheading_wanted and current_level ~= previous_level + 1 then
if applicable_pos[heading_text] then
error("The heading \"===Etymology ''n''===\" or \"===Pronunciation ''n''===\" must be followed by a subheading one level lower.")
end
end
previous_level = current_level
subheading_wanted = string.find(heading_text, " [0-9]+") and true or false
end
end
local simp_note = ""
--This is not a complete list!
local unified_char = "[匕比化北溍鄑懊燠墺隩澳噢房窗捔桷角埆确斛槲蔛薢解檞懈廨蟹獬澥嶰邂起骨滑猾磆螖嗗愲蓇尳榾搰酓馠谽唅含肣筨梒鋡莟琀浛盦韽班玪妗欦黔雂念梣枔岑笒涔侺今黅衿衾坅搇琴芩庈耹靲吟霠矜低氐袛羝眡岻奃趆柢邸底坻抵牴觝弤骶砥胝疷秪泜蚳彽阺祗厎茋令怜零需冷跉阾柃袊旍舲囹蛉苓伶泠瓴拎刢玲聆竛笭翎昤彾呤狑秢岭紷砱羚姈蕶澪辶近迎道述蚤慅搔溞瘙瑵糔鼜甚商罕深虎唬沿船咎昝晷倃致恐鞏築嬴捌瘟塭殟嗢熬敖嗷螯聱遨獒滶蔜謷嶅嗸摮嫯鏊傲磝艹艾芃芊芍芏芔芝芣芥芫花芸芽苄苊苗苞苟苤苦苧英苴𢯽苹苽茂范茉茜茶茸茹荅草荍荏荐荒荔荷荽荾莈莊莞莰菙菂菅菇菊菍菓菜華菰菲萄萌萑萣落葚葛葰葱葵蒈蒲蓄蓬蓳蓺蓼蔡蔽蕉蕹薄薇薏薦薪薹藏藠藤警擎憼擏儆㯳璥曔檠蟼㢣望碳炭湠㮴艘嫂瞍叟傁廋醙遚搜溲獀鄋螋瘦裯啁禂翢雕凋琱彫奝蜩惆婤稠椆周郮晭倜害犗幰攇瀗割磍瞎豁丰妦仹邦梆垹蚌玤祓蛂拔菝袚茇盋帗跋魃䣮馛炦妭犮胈苃坺冹瞂黻翇娽椂氯琭盝睩碌粶菉觮趢逯邍醁騄鵦龣㖨㪖㫽䎑䎼䐂䚄䟿䩮䰁䱚碌䘵䩮拰任壬栠袵妊衽挰逞裎悜睈呈程酲珵徎郢浧桯鞓驖戜珊涊忍荵刃仞肕牣杒僙墴潢璜癀磺穔簧蟥檨秫次舌踭埩鬇搢梁尨妒芮妠蚋笍犟勥莒非啡罪匪扉肋肌肚肛肝股肢肥肩肯育肺胃背胎胖胚胞胯胰胱胳胸能脆脊脖腔腕腥腮腰腴腹腺腿膀膈膊膏膛膜膝臂臆臑筋然炍炐炒炙炢炬烇烵焓焫煽熊蒸燃燧㸆入全佺拴栓跧痊荃𣗎醛銓筌輇硂駩絟恮峑牷穴空究穹窄垂涶唾睡箠硾陲腄最八分公翁嗡滃菘倯松彸反仮坂岅㤆扳汳返阪昄板炍版𦙀瓪畈皈眅叛𢆕粄𥾵舨䛀𦤇𧿨䡊𠭤魬㽹蝂敢㒈噉𡑒㜟𢕭㦑撖澉㺖䧩𣊟橄瞰𥕵𥼲𦗪𦪧𧗐豃𨅺𮡜𩍉𡪯𭗐䆻𥴊𫶔憨饏𠣽𭅞𠪚𤺍鬫微毒垔堙湮凐歅禋陻黫鄄甄薽籈新感慢侻娧帨裞痥㙂傜徭猺墟憈歔料米籽粉粑粒粗粘粞粟粥粱粲粳粹粺精粿糅糊糍糒糕糖糗糙糜糞糟糠糯糱敬儆擎警雚灌獾罐嚿鬼媿魂瑰魄魏嵬巍蒐魅魁魑魍羌麻嘛嫲麾磨魔彗慧斥坼拆]"
if #identity == 1 and find(test_word, unified_char) then
simp_note = '<sup><span class="explain" title="Using the same code point' .. (length>1 and 's' or '') .. ' as the traditional form due to Han unification. Without proper font support, it may be displayed as the same as the traditional form.">#</span></sup>'
char_set['simp'] = char_set['trad']
end
return table.concat{
header(length, math.max(#t, #s)), table.concat(gloss, ""),
((#identity == 1 and simp_note == '') and
(var_fmt(length) .. '[[w:Egyszerűsített kínai írás|egyszerűsített]] és [[w:Hagyományos kínai írás|hagyományos]]<br>' ..
form_fmt(t, length, 'both') .. char_fmt(char_set['trad'], length, 'both'))
or
var_fmt(length) .. '[[w:Hagyományos kínai írás|hagyományos]] ' ..
form_fmt(t, length, 'trad') .. char_fmt(char_set['trad'], length, 'trad') ..
(var_fmt(length) .. '[[w:Egyszerűsített kínai írás|egyszerűsített]] ' .. simp_note ..
form_fmt(#s == 0 and t or s, length, 'simp') .. char_fmt(char_set['simp'], length, 'simp'))
), altforms, anagram, literal, '\n|}', table.concat(annotation)
}
end
return export