A modult a Modul:hit-translit/doc lapon tudod dokumentálni

local export = {}

local bit32 = require('bit32')
local m_table = require('Module:table')
local m_tag = require('Module:hit-translit/tag')
local sign_list = mw.loadData('Module:hit-translit/data')

local ulen = mw.ustring.len
local usub = mw.ustring.sub

local segments = {
	-- vowels
	['a'] = 'a',
	['á'] = 'a',
	['à'] = 'a',
	['e'] = 'e',
	['é'] = 'e',
	['è'] = 'e',
	['i'] = 'i',
	['í'] = 'i',
	['ì'] = 'i',
	['u'] = 'u',
	['ú'] = 'u',
	['ù'] = 'u',
	
	-- consonants with voicing alternaternates
	['b'] = 'b',
	['p'] = 'p',
	['d'] = 'd',
	['t'] = 't',
	['g'] = 'g',
	['k'] = 'k',
	['q'] = 'q',
	
	-- single consonants
	['ḫ'] = 'h',
	['r'] = 'r',
	['l'] = 'l',
	['m'] = 'm',
	['n'] = 'n',
	['š'] = 's',
	['z'] = 'z',
	['y'] = 'y',
	['w'] = 'w',
	
	-- numbers
	['0'] = '0',
	['1'] = '1',
	['2'] = '2',
	['3'] = '3',
	['4'] = '4',
	['5'] = '5',
	['6'] = '6',
	['7'] = '7',
	['8'] = '8',
	['9'] = '9',
}

--[=[
-- Set up bit array to for marking which onsets and codas are available for ambiguous characters
]=]
local sort_order = {
	-- vowels
	['a'] = 2 ^ 0,
	['i'] = 2 ^ 1, -- I've chosen "i" over "e"
	['e'] = 2 ^ 2,
	['u'] = 2 ^ 3,
	
	-- consonants with voicing alternaternates
	['p'] = 2 ^ 4,
	['b'] = 2 ^ 5,
	['t'] = 2 ^ 6,
	['d'] = 2 ^ 7,
	['k'] = 2 ^ 8,
	['g'] = 2 ^ 9,
	['q'] = 2 ^ 10,
	
	-- single consonants
	['h'] = 2 ^ 11,
	['r'] = 2 ^ 12,
	['l'] = 2 ^ 13,
	['m'] = 2 ^ 14,
	['n'] = 2 ^ 15,
	['s'] = 2 ^ 16,
	['z'] = 2 ^ 17,
	['y'] = 2 ^ 18,
	['w'] = 2 ^ 19,
	
	-- numbers
	['0'] = 2 ^ 20,
	['1'] = 2 ^ 21,
	['2'] = 2 ^ 22,
	['3'] = 2 ^ 23,
	['4'] = 2 ^ 24,
	['5'] = 2 ^ 25,
	['6'] = 2 ^ 26,
	['7'] = 2 ^ 27,
	['8'] = 2 ^ 28,
	['9'] = 2 ^ 29,
}

local function inplace_multikey_sort(t)
	-- Sorts a table inplace by the onset and then coda
	table.sort(t, function(a, b)
		if a.o ~= b.o then
			return sort_order[a.o] < sort_order[b.o]
		end
		
		return sort_order[a.c] < sort_order[b.c]
	end)
	return t
end

local function find_seg(syl, rev)
	-- [=[
	-- A helper function that iterates forwards or backwards (if "rev" is set)
	-- 	in order to find the first phonetic segment and return the normalized
	-- 	form of that segment. Thus:
	-- 		find_seg("šaq") gives "s"
	-- 		find_seg("luḫ", true) gives "h"
	-- -- ]=]
	local f
	for i = 1, ulen(syl) do
		f = usub(syl, rev and -i or i, rev and -i or i)
		if segments[f] then --
			return segments[f]
		end
	end
	error('Could not find a ' .. (rev and 'coda' or 'onset') .. ' for the syllable "' .. syl .. '".')
end

function export.find_onset(syl)
	-- [=[
	-- Find the normalized onset character of a syllable
	-- -- ]=]
	return find_seg(syl)
end

function export.find_coda(syl)
	-- [=[
	-- Find the normalized coda character of a syllable
	-- -- ]=]
	return find_seg(syl, true)
end

function export.hash_sign(sign)
	-- [=[
	-- Turn the list of Hittite syllables into a list of list containing:
	-- 		The syllable
	-- 		The normalized onset character of the syllable
	-- 		The normalized coda character of the syllable
	-- And add a hashes of the onsets and codas in the syllables. Thus
	-- 		{ "it", "id", "et", "ed", hit = true }
	-- 		becomes:
	-- 		{
	-- 			{ "it", o = "i", c = "t" },
	-- 			{ "id", o = "i", c = "d" },
	-- 			{ "et", o = "e", c = "t" },
	-- 			{ "ed", o = "e", c = "d" },
	--			o_hash = 6, c_hash = 192, hit = true
	-- 		}
	-- -- ]=]
	sign.o_hash, sign.c_hash = 0, 0 -- init onset and coda hashes for signs
	for i, syl in ipairs(sign) do
		sign[i] = { syl, o = export.find_onset(syl), c = export.find_coda(syl) }
		sign.o_hash = bit32.bor(sign.o_hash, sort_order[sign[i].o])
		sign.c_hash = bit32.bor(sign.c_hash, sort_order[sign[i].c])
	end
end

function export.copy_sign(sign)
	-- copy, sort, and set up new sign
	local new = m_table.deepcopy(sign_list[sign], true)
	if new.hit then -- has Hittite signs
		export.hash_sign(new)
		inplace_multikey_sort(new)
	end
	
	return new
end

local function remove_syls(first, second, mask)
	-- [=[
	-- For two adjacent sets of Hittite syllables and a mask of their shared characters,
	-- 	go through each one and remove the unnecessary values, and update the hashes.
	-- -- ]=]
	local new_o_hash, new_c_hash, new_first, new_second = 0, 0, { hit = true }, { hit = true }
	for _, syl in ipairs(first) do
		if bit32.band(sort_order[syl.c], mask) > 0 then
			table.insert(new_first, syl)
			new_o_hash = bit32.bor(new_o_hash, sort_order[syl.o]) -- unnecessary, but useful for tracking
		end
	end
	new_first.o_hash = new_o_hash
	new_first.c_hash = mask
	
	for _, syl in ipairs(second) do
		if bit32.band(sort_order[syl.o], mask) > 0 then
			table.insert(new_second, syl)
			new_c_hash = bit32.bor(new_c_hash, sort_order[syl.c])
		end
	end
	new_second.o_hash = mask
	new_second.c_hash = new_c_hash
	
	return new_first, new_second
end

local related_character_masks = {
	-- voicing alternates
	bit32.bor(sort_order['p'], sort_order['b']),
	bit32.bor(sort_order['t'], sort_order['d']),
	bit32.bor(sort_order['k'], sort_order['g'], sort_order['q']),
	
	-- "u" patterns next to "w"
	bit32.bor(sort_order['u'], sort_order['w']),
	
	-- numbers pattern together
	bit32.bor(sort_order['0'], sort_order['1'], sort_order['2'], sort_order['3'], sort_order['4'],
		sort_order['5'], sort_order['6'], sort_order['7'], sort_order['8'], sort_order['9']),
}

local function approx_match(first_hash, second_hash)
	-- [=[
	-- Builds a bit mask for all approximate matches like "p" and "b", or "t" and "d".
	-- -- ]=]
	local new_mask = 0
	for _, mask in ipairs(related_character_masks) do
		if bit32.band(mask, first_hash) > 0 and bit32.band(mask, second_hash) > 0 then
			new_mask = bit32.bor(new_mask, mask)
		end
	end
	return new_mask
end

function export.fit_signs(first, second)
	-- [=[
	-- Takes two adjacent signs and removes unlikely Hittite syllables.
	-- -- ]=]
	if first and second then -- two signs
		if first.hit and second.hit then -- both have Hittite syllables
			local match_mask = bit32.band(first.c_hash, second.o_hash)
			if match_mask > 0 then -- there are matching chars in each
				return remove_syls(first, second, match_mask)
			end
			
			match_mask = approx_match(first.c_hash, second.o_hash)
			if match_mask > 0 then -- there are approxiamtely matching chars in each
				return remove_syls(first, second, match_mask)
			end
		end
	elseif first then -- final sign
		-- nothing yet
	else -- initial sing
		-- nothing yet
	end
	return first, second
end

local function assemble_word(signs)
	-- [=[
	-- Choose all the signs, tag when appropriate, then concatenate
	-- -- ]=]
	local word = {}
	for _, sign in ipairs(signs) do
		if sign.hit then -- If Hittite, take first sign
			table.insert(word, sign[1][1])
		elseif sign.sum then -- If Sumerogram, take and  tag first sign
			table.insert(word, m_tag.tag_sumerogram(sign[1]))
		elseif sign.akk then -- If Akkadogram, take and tag first sign
			table.insert(word, m_tag.tag_akkadogram(sign[1]))
		elseif sign.hurr then -- If Hurrian, take and tag first sign
			table.insert(word, m_tag.tag_hurrian_tr(sign[1]))
		elseif sign.hatt then -- If Hattic, take and tag first sign
			table.insert(word, m_tag.tag_hattic_tr(sign[1]))
		elseif sign.glossenkeil then -- If Glossenkeil, display it
			table.insert(word, m_tag.glossenkeil())
		end
	end
	
	return table.concat(word, '-')
end

function export.transpose(text)
	-- [=[
	-- Takes a continuous Cuneiform string and converts it to transliteration
	-- -- ]=]
	local signs = {}
	while ulen(text) > 0 do
		if sign_list[usub(text, 1, 3)] then
			table.insert(signs, export.copy_sign(usub(text, 1, 3))) -- add in new sign
			text = usub(text, 4) -- truncate string
		elseif sign_list[usub(text, 1, 2)] then
			table.insert(signs, export.copy_sign(usub(text, 1, 2)))
			text = usub(text, 3)
		elseif sign_list[usub(text, 1, 1)] then
			table.insert(signs, export.copy_sign(usub(text, 1, 1)))
			text = usub(text, 2)
		end
		signs[#signs - 1], signs[#signs] = export.fit_signs(signs[#signs - 1], signs[#signs]) -- fit two signs
	end
	
	signs[#signs] = export.fit_signs(signs[#signs], nil) -- fit end of word
	
	return assemble_word(signs)
end

function export.tr(text, lang, sc)
	if sc ~= "Xsux" then
		return nil
	end
	
	text = mw.ustring.gsub(text, '[𒀀-𒑱]+', export.transpose)
	
	return m_tag.tag_hittite_tr(text)
end

return export