Modul:de-IPA
A modult a Modul:de-IPA/doc lapon tudod dokumentálni
--[=====[
Currently missing:
* Function for final obstruent devoicing of d, g, b, s, r (ɐ̯)
* Function for pre-consonantal obstruent devoicing of d, g, b, s
* Function to reduce geminates
* List of environments which trigger the palatalisation of /x/ (liquids + non-low front vowels)
* Function to determine if H is word initial (> /h/) or non-initial (> 0) (⟨-ehe⟩- should be /eː/ in verbs only)
* Function to put stress in general, function to check for prefixes and realign stress accordingly.
* Function to convert ⟨e⟩ in unstressed syllables to ə > Function to reduce -ər to -r + "devoicing"
* Function to convert ⟨c⟩ before front vowels to /t͡s/?
* An input whether the word is Germanic or Romanic might make a lot of exceptions
predictable/automatable, e.g. /ɪ, ɔ, ʊ/ > /i, o, u/ for short vowels in closed syllables,
penultimate or final stress
* The unseparable prefixes do not take stress > Stress on the 2nd syllable
** A complete list could be compiled and the process automated, instead of making the user enter the stress by hand
* Rules to determine when to make vowels short vs. long. There will need to be
ways to override this; I think they should be adding an h to force length,
and either using a breve (e.g. ă ĕ ĭ) or maybe doubling the following consonant
to force shortness. What should those rules be? Some guesses:
- vowels should be short before a geminate consonant
- vowels should be long in a stressed open syllable
- vowels should probably be long in a stressed final syllable before a single
consonant (but with possible exceptions, e.g. '-eg')
- vowels should probably be short before two consonants (except possibly 'st'?)
- syllables with secondary stress should be treated as if stressed
- syllables directly following a known prefix (aus-, zu-, über-, ge-, etc.)
should be treated as if stressed, whether they are actually stressed or not
- when there's an explicit slash to separate compounds, all parts should be
treated as if they were separate words for vowel-length purposes (e.g.
'-tag' in 'Reichs/tag' should be long)
- what about other unstressed syllables?
--]=====]
local export = {}
local u = mw.ustring.char
local rfind = mw.ustring.find
local rsubn = mw.ustring.gsub
local rmatch = mw.ustring.match
local rsplit = mw.text.split
local ulower = mw.ustring.lower
local uupper = mw.ustring.upper
local usub = mw.ustring.sub
local ulen = mw.ustring.len
local function format_ipa(text)
local lang = require("Module:languages").getByCode("de")
return require("Module:IPA").format_IPA_full(lang, {{pron = text}})
end
-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
local retval = rsubn(term, foo, bar)
return retval
end
-- apply rsub() repeatedly until no change
local function rsub_repeatedly(term, foo, bar)
while true do
local new_term = rsub(term, foo, bar)
if new_term == term then
return term
end
term = new_term
end
end
local function ine(x)
if x == "" then return nil else return x end
end
local AC = u(0x0301)
local GR = u(0x0300)
local BREVE = u(0x0306)
local stress_accent = AC .. GR
local stress_accent_c = "[" .. stress_accent .. "]"
local accent = stress_accent .. BREVE
local accents_r = "[" .. accent .. "]*"
local DIA = u(0x0308)
local vowel = "aeiouyäöüæœ" .. accent
local vowel_c = "[" .. vowel .. "]"
local cons_c = "[^" .. vowel .. ".⁀ %-()]"
local cons_or_boundary_c = "[^" .. vowel .. "rl. %-()]" -- includes ⁀ -- I have added /l/ & /r/ as a stopgap against Brücke -> /ˈprʏkə/, but this may need a new name.
local front_vowel = "eiyæœ"
local front_vowel_c = "[" .. front_vowel .. "]"
local devoiced_cons = {b="p", d="t", g="k", r="ɐ̯"}
local sequences = {
["a"] = {
["a" ] = "a";
["ah" ] = "aː";
["ai" ] = "aɪ̯";
["au" ] = "aʊ̯";
["auch"] = { "aʊ̯", "x" };
};
["ä"] = {
["ä" ] = "ɛ";
["äh" ] = "ɛː";
["äu" ] = "ɔʏ̯";
};
["b"] = {
["b" ] = "b";
["bb" ] = "b";
};
["c"] = {
["c" ] = "ts"; -- ???
["ch" ] = "ç";
};
["d"] = {
["d" ] = "d";
["dd" ] = "d";
["dsch"] = "dʒ";
};
["e"] = {
["e" ] = "ɛ";
["ee" ] = "eː";
["ei" ] = "aɪ̯";
["eich"] = { "aɪ̯", "ç" };
["eu" ] = "ɔʏ̯";
};
["f"] = "f";
["g"] = "ɡ";
["h"] = "h";
["i"] = {
["i" ] = "ɪ";
["ie" ] = "iː";
};
["j"] = "j";
["k"] = {
["k" ] = "k";
["kk" ] = "k";
["ck" ] = "k";
};
["l"] = "l";
["m"] = "m";
["n"] = {
["n" ] = "n";
["ng" ] = "ŋ";
["nn" ] = "n";
};
["o"] = {
["oo" ] = "oː";
["os" ] = { "ɔ", "s" };
["o" ] = "ɔ";
};
["ö"] = {
-- XXX: manchmal /øː/
["ö" ] = "œ";
["ös" ] = { "œ", "s" };
};
["p"] = {
["ph" ] = "f";
["pp" ] = "p";
["p" ] = "p";
};
["q"] = {
["qu" ] = { "k", "f" };
["q" ] = "k"; -- XXX
};
["r"] = {
-- XXX: /ʀ/? /r/?; manchmal /ɐ/ ("Uhr"); auch /ər/ ("oder")
["r" ] = "r";
["rr" ] = "r";
};
["s"] = {
["s" ] = "s";
["sch" ] = "ʃ";
["sp" ] = { "ʃ", "p" };
["ss" ] = "s";
["st" ] = { "ʃ", "t" };
};
["t"] = {
["t" ] = "t";
["tsch"] = "t͡ʃ";
["tt" ] = "t";
["tion"] = { "t͡s", "i̯", "o", "n" };
};
["u"] = {
["u" ] = "ʊ";
["uch" ] = { "ʊ", "x" };
};
["ü"] = {
["ü" ] = "yː";
["üh" ] = "yː";
};
["v"] = "f";
["w"] = "ʋ";
["x"] = { "k", "s" }; -- XXX
["y"] = "i";
["z"] = "z"; -- already converted from s
["ß"] = "s";
["́"] = "ˈ"; -- FIXME
["-"] = {};
}
function export.IPA(text, orig, pos)
if type(text) == 'table' then
text, orig, pos = ine(text.args[1]), ine(text.args.orig), ine(text.args.pos)
end
text = text or mw.title.getCurrentTitle().text
text = ulower(text)
-- decompose, then recompose umlauted vowels, and convert ae oe ue to
-- umlauted vowels
text = mw.ustring.toNFD(text)
-- while we're doing this, don't get confused by wrongly-ordered umlauts/e's
-- and other accents
text = rsub(text, "(" .. accents_r .. ")([e" .. DIA .. "])", "%2%1")
text = rsub(text, "([aou])[e" .. DIA .. "]", {a="ä", o="ö", u="ü"})
-- put breves before acute/grave accents
text = rsub(text, "(" .. stress_accent_c .. ")" .. BREVE, BREVE .. "%1")
-- To simplify checking for word boundaries and liaison markers, we
-- add ⁀ at the beginning and end of all words, and remove it at the end.
-- Note that the liaison marker is ‿.
text = rsub(text, "%s*,%s*", '⁀⁀ | ⁀⁀')
text = rsub(text, "%s+", '⁀ ⁀')
text = rsub(text, "%-+", '⁀-⁀')
text = '⁀⁀' .. text .. '⁀⁀'
text = rsub(text, "([aou]" .. accents_r .. ")" .. "ch", "%1χ")
text = rsub(text, "sch", "ʃ")
text = rsub(text, "ch", "ç")
text = rsub(text, "ck", "kk")
text = rsub(text, "z", "c")
text = rsub(text, "s(" .. vowel_c .. ")", "z%1")
text = rsub(text, "([bdgr])(" .. cons_or_boundary_c .. ")",
function(c1, c2)
return devoiced_cons[c1] .. c2
end)
-- Buchstaben in Foneme konvertieren
local phones, i, n = {}, 1, ulen(text)
while i <= n do
local bid = ulower(usub(text, i, i))
local value = sequences[bid]
if (type(value) == 'table') and not value[1] then
local bidl = ulen(bid)
for seq in pairs(value) do
local seql = ulen(seq)
if seql > bidl then
if (ulower(usub(text, i, i + seql - 1)) == seq) then
bid = seq
bidl = ulen(bid)
end
end
end
value = value[bid]
end
if type(value) == 'string' then
table.insert(phones, value)
elseif not value then
table.insert(phones, bid)
else
for _, phone in ipairs(value) do
table.insert(phones, phone)
end
end
i = i + ulen(bid)
end
text = table.concat(phones)
--remove hyphens and word-boundary markers
text = rsub(text, '[⁀%-]', '')
return format_ipa('/' .. text .. '/')
end
return export