Module:yue-pron

From Wiktionary, the free dictionary
Jump to navigation Jump to search

(Aiming to be) a Jyutping-to-anything converter. Currently: Jyutping-to-IPA, Jyutping-to-Yale, Jyutping-to-Cantonese-Pinyin, Jyutping-to-Guangdong-Romanization.


local export = {}
local m_string_utils = require("Module:string utilities")

local gsplit = m_string_utils.gsplit
local gsub = m_string_utils.gsub
local len = m_string_utils.len
local lower = m_string_utils.lower
local split = m_string_utils.split

local entering = {
	p = 1, t = 1, k = 1
}

local entering_tones = {
	["1"] = "7", ["3"] = "8", ["6"] = "9"
}

local ipa_allophones = {
	ei = "eri",
	eoi = "eoy",
	ing = "irng",
	ik = "irk",
	ou = "oru",
	ung = "urng",
	uk = "urk",
}

local ipa_initial = {
	["b"] = "p", ["p"] = "pʰ", ["m"] = "m", ["f"] = "f", 
	["d"] = "t", ["t"] = "tʰ", ["n"] = "n", ["l"] = "l", 
	["g"] = "k", ["k"] = "kʰ", ["ng"] = "ŋ", ["gw"] = "kʷ", ["kw"] = "kʷʰ",
-- ["zh"] = "t͡ʃ", ["ch"] = "t͡ʃʰ", ["sh"] = "ʃ",
	["z"] = "t͡s", ["c"] = "t͡sʰ", ["s"] = "s", 
	["h"] = "h", ["w"] = "w", ["j"] = "j", 
	[""] = ""
}

-- vowels with "r" only appear as allophones and should not appear in the input
local ipa_nucleus = {
	["aa"] = "aː", ["a"] = "ɐ", 
	["e"] = "ɛː", ["er"] = "e", 
	["i"] = "iː", ["ir"] = "ɪ", 
	["o"] = "ɔː", ["or"] = "o", 
	["oe"] = "œː", ["eo"] = "ɵ", 
	["u"] = "uː", ["ur"] = "ʊ", 
	["yu"] = "yː"
}

local ipa_coda = {
	["i"] = "i̯", ["u"] = "u̯", ["y"] = "y̯", 
	["m"] = "m", ["n"] = "n", ["ng"] = "ŋ", 
	["p"] = "p̚", ["t"] = "t̚", ["k"] = "k̚", 
	[""] = ""
}

local ipa_tone = {
	["1"] = "<span style=\"cursor:help\" title=\"or 53\">⁵⁵</span>", 
	["2"] = "³⁵", 
	["3"] = "³³", 
	["4"] = "<span style=\"cursor:help\" title=\"or 11\">²¹</span>", 
	["5"] = "¹³", 
	["6"] = "²²", 
	["7"] = "⁵", 
	["8"] = "³", 
	["9"] = "²", 
	[""] = ""
}

local ipa_tone_sandhi = {
	["-"] = "⁻", [""] = ""
}

local ipa_syllabic = {
	["m"] = "m̩", ["ng"] = "ŋ̍"
}


-- display `main`, but show `option` on hovering
local function alt(main,option)
	return '<span style="cursor:help" title="or ' .. option .. '">' .. main .. '</span>'
end

local acute = {
	a="á", e="é", i="í", o="ó", u="ú", m="ḿ", ng="ńg"
}

local grave = {
	a="à", e="è", i="ì", o="ò", u="ù", m="m̀", ng="ǹg"
}

local macron = {
	a="ā", e="ē", i="ī", o="ō", u="ū", m="m̄", ng="n̄g"
}

-- "?" indicates finals that are not supported by Yale
local yale_final = {
	["a"] = "?", ["aa"] = "a",
	["eu"] = "?", ["em"] = "?", ["en"] = "?", ["ep"] = "?", ["et"] = "?",
	["om"] = "?", ["op"] = "?",
	["um"] = "?", ["up"] = "?",
	["oe"] = "eu", ["oem"] = "?", ["oen"] = "?", ["oeng"] = "eung", ["oep"] = "?", ["oet"] = "?", ["oek"] = "euk", 
	["eoi"] = "eui", ["eon"] = "eun", ["eot"] = "eut",
}

-- The core function to handle conversion to Yale.
-- For non-syllabics, there is exactly one vowel cluster in the syllable:
-- the first vowel is inputted as `a`, and the rest of the vowels is `b`, and `t` is the tone.
--- (e.g. "keui" -> a="e",b="ui"; "keung" -> a="e",b="u")
-- Conversion rule:
--- if `t` is 4, 5, or 6, then "h" is inserted after `b`.
--- if `t` is 1, 2, 4, or 5, then the corresponding accent is put on `a`.
-- (finally, for syllabics, the whole syllabic is `a`, and `b` is empty)
local function yale_tone(a,b,t)
	local h = ""
	if t == "4" or t == "5" or t == "6" then
		h = "h"
	end
	if t == "1" then a = alt(macron[a], grave[a]) end
	if t == "4" then a = grave[a] end
	if t == "2" or t == "5" then a = acute[a] end
	return a..b..h
end

function export.jyutping_to_ipa(text)
	if type(text) == "table" then text = text.args[1] end
	
	text = text:gsub(", "," "):gsub("%.%.%.", " "):gsub(" $",""):gsub(" / ","/, /")
		-- :gsub("([zcs])yu", "%1hyu")
		-- :gsub("([zcs])oe", "%1hoe")
		-- :gsub("([zcs])eo", "%1heo")
		:gsub("(%l+)(%d)(%-?)(%d?)", function(main,tone,symbol,tone2)
			-- try initial+final
			local initial, final = main:match("^([bpmfdtnlgknzcshwj]?[gw]?)([aeiouy]%l*)$")
			if not initial then
				-- otherwise try initial+syllabic
				local syllabic
				initial, syllabic = main:match("^(h?)([mn]g?)$")
				if not initial then
					error("Invalid Jyutping syllable: " .. main)
				end
				main = ipa_initial[initial]
					.. (ipa_syllabic[syllabic] or error("Unrecognised syllabic: " .. syllabic)) -- really?
			else
				-- e.g. convert <ei> (which would be */ɛːi̯/) to <eri> (/ei̯/)
				final = ipa_allophones[final] or final
				local nucleus, coda = final:match("^(y?[aeiou][aeor]?)([iuymnptk]?g?)$")
				if not nucleus then
					error("Invalid Jyutping final: " .. final)
				end
				if entering[coda] then
					tone = entering_tones[tone] or tone
					tone2 = entering_tones[tone2] or tone2
				end
				main = (ipa_initial[initial] or error(("Unrecognised initial: \"%s\""):format(initial)))
					.. (ipa_nucleus[nucleus] or error(("Unrecognised nucleus: \"%s\""):format(nucleus)))
					.. (ipa_coda[coda] or error(("Unrecognised coda: \"%s\""):format(coda)))
			end
			return main .. ipa_tone[tone] .. ipa_tone_sandhi[symbol] .. ipa_tone[tone2]
		end)
	return text
end

function export.jyutping_to_yale(text)
	if type(text) == "table" then text = text.args[1] end
	
	text = text:gsub("jy?","y")
		:gsub("[cz]",{z="j",c="ch"})
		--:gsub("[1-6]%-","")
		:gsub("(%l+)(%d)(%-?)(%d?)", function(main,tone,symbol,tone2)
			if tone2 ~= "" then
				tone = tone2
			end
			-- find the first vowel letter
			local initial,final = main:match("^([^aeiou]*)([aeiou].*)$")
			local a,b,c
			if initial then
				final = yale_final[final] or final
				if final == "?" then
					return "?"
				end
				a,b,c = final:match("^([aeiou])([aiu]*)([mnptk]?g?)$")
			else -- otherwise it is a syllabic
				initial,a = main:match("(h?)([mn]g?)")
				b,c = "",""
			end
			return initial..yale_tone(a,b,tone)..c
		end)
	
	if text:find("?") then
		return false
	end
	return text
end

function export.jyutping_to_cantonese_pinyin(text)
	if type(text) == "table" then text = text.args[1] end
	
	if text:find("oe[mnpt][^g]") then -- unsupported finals
		return false
	end
	
	text = text:gsub("yu","y")
		:gsub("eo[int]",{eoi="oey",eon="oen",eot="oet"})
		:gsub("[zc]",{z="dz",c="ts"})
		:gsub("([ptk])([1-6])(%-?)([1-6]?)",function(a,b,c,d)
			return a .. (entering_tones[b] or b) .. c .. (entering_tones[d] or d)
		end)
	
	return text
end

function export.jyutping_to_guangdong(text)
	if type(text) == 'table' then text = text.args[1] end
	
	-- unsupported finals
	if text:find("%f[a]a%d") or text:find("oe[mnpt][^g]")
			or text:find("[ou][mp]") or text:find("e[un][^g]") then
		return false
	end
	
	text = text:gsub("yu","ü")
		:gsub("j","y")
		:gsub("[zcs]%f[iü]",{z="j",c="q",s="x"})  -- ü=\xC3\xBC
		:gsub("([jqxy])ü","%1u")
		:gsub("eoi","êu")
		:gsub("[aeo][aeo]?",{aa="a",a="e",e="é",oe="ê",eo="ê"})
		:gsub("([ae])u","%1o")
		:gsub("([gk])w","%1u")
		:gsub("[ptk]%f[%d]",{p="b",t="d",k="g"})
	
	return text
end

-- substitute changed tones for finding homophones
function export.jyutping_format(text)
	text = text:gsub("[1-6]%-([1-6])","%1")
	return split(text," / ")
end

function export.jyutping_headword(frame)
	local params = {
		["head"] = {},
	}
	
	local args = require("Module:parameters").process(frame:getParent().args, params, nil, "yue-pron", "jyutping_headword")
	
	local head = args.head or mw.title.getCurrentTitle().text
	
	local yue = require("Module:languages").getByCode("yue")
	local Latn = require("Module:scripts").getByCode("Latn")
	
	head = head:gsub("%d[%-%*]?%d?", "<sup>%0</sup>")
	
	return require("Module:headword").full_headword{lang = yue, sc = Latn, heads = {head}, pos_category = "jyutping"}
end

local function Consolas(text)
	return '<span style="font-family: Consolas, monospace;">' .. text .. "</span>"
end

local function format_IPA(text)
	return '<span class="IPA">' .. text .. "</span>"
end

local function make_superscript(text)
	text = text:gsub("%d%-?%d?","<sup>%0</sup>")
	return text
end

-- the only allowed punctuations are:
--- ", ": represents a comma (or a break of any sort)
--- "...": represents a slot where a text can go to (e.g. [[一……就……]])
--- ",": separates alternate readings
local function validate(c_rom)
	if c_rom:find("[7-9]") then error("Invalid tone in Jyutping.") end
	if c_rom:find("[A-Z]") then error("Please do not capitalize the Jyutping.") end
	if c_rom:find("%-[a-z]") then error("Please do not hyphenate the Jyutping.") end
	if c_rom:find("[0-9][a-z]") then error("Error in Jyutping: please use spaces to separate syllables.") end
	if c_rom:find("[zcs]h") then error("'zh'/'ch'/'sh' are non-valid Jyutping, use 'z'/'c'/'s' instead.") end
	if c_rom:find("y[^u]") then error("Wrong usage of 'y' in Jyutping.") end
	if c_rom:find("oei") then error("Invalid rime oei in Jyutping. Did you mean eoi?") end
	if c_rom:find("eong") then error("Invalid rime eong in Jyutping. Did you mean oeng?") end
	if c_rom:find("eok") then error("Invalid rime eok in Jyutping. Did you mean oek?") end
	if c_rom:find("r") then error("Invalid letter \"r\" in Jyutping.") end
	if c_rom:find("%d%d") then error("Invalid Jyutping: please use a hyphen to indicate a changed tone.") end
	
	c_rom = c_rom:gsub("^%.%.%.",""):gsub("%.%.%.%f[%z,]",""):gsub("%.%.%."," "):gsub(", ?"," ")
	if c_rom:find("^ ") or c_rom:find("  ") or c_rom:find(" $") then
		error("Empty syllable detected.")
	end
	if c_rom:find("[^a-z1-6%- ]") then
		error("Invalid character found.")
	end
	
	-- ensure that each syllable matches `^%l+%d%-?%d?$`
	for syllable in c_rom:gmatch("%S+") do
		if not syllable:match("^%l+%d%-?%d?$") then
			error("Invalid Jyutping syllable: " .. syllable)
		end
	end
end

-- generate the shown text of Standard Cantonese
-- if the pagename is one character long, then generate links to all the readings
function export.show_standard(c_rom, is_single_hanzi)
	validate(c_rom)
	c_rom = c_rom:gsub(",%f[^ ,]"," / ")
	if is_single_hanzi then
		for reading in c_rom:gmatch("[^ ,./]+") do
			require('Module:debug').track('yue-pron/'..reading)
		end
		c_rom = c_rom:gsub("(%l+)(%d%-?%d?)","[[%1%2|%1<sup>%2</sup>]]")
	else
		c_rom = make_superscript(c_rom)
	end
	return c_rom
end

-- generate the collapsed text of Standard Cantonese
-- generate all the different romanisations, as well as homophones
function export.hide_standard(c_rom, is_single_hanzi)
	local res = ""
	c_rom = c_rom:gsub(",%f[^ ,]"," / ")
	
	-- generate IPA first because the error-catching is located there
	local c_ipa = export.jyutping_to_ipa(c_rom)
	local c_yale = export.jyutping_to_yale(c_rom)
	local c_cp = export.jyutping_to_cantonese_pinyin(c_rom)
	local c_gd = export.jyutping_to_guangdong(c_rom)
	
	local c_hom = mw.loadData("Module:yue-pron/hom")
	local c_hom_exists = false
	for _,c_first in ipairs(export.jyutping_format(c_rom)) do
		if c_hom[c_first] then
			c_hom_exists = c_first
			break
		end
	end
	
	res = res .. "\n** <small>(<i>[[w:Standard Cantonese|Standard Cantonese]], [[w:Guangzhou Cantonese|Guangzhou]]–[[w:Hong Kong Cantonese|Hong Kong]]</i>)</small>"
	if not c_hom_exists and not is_single_hanzi then
		res = res .. '<sup><small><abbr title="Add Cantonese homophones"><span class="plainlinks">['
		res = res .. tostring(mw.uri.fullUrl("Module:yue-pron/hom",{["action"]="edit"}))
		res = res .. " +]</span></abbr></small></sup>"
	end
	res = res .. "\n*** <small><i>[[w:Jyutping|Jyutping]]</i></small>: "
	res = res .. Consolas(make_superscript(c_rom))
	if c_yale then
		res = res .. "\n*** <small><i>[[w:Yale romanization of Cantonese|Yale]]</i></small>: "
		res = res .. Consolas(c_yale)
	end
	if c_cp then
		res = res .. "\n*** <small><i>[[w:Cantonese Pinyin|Cantonese Pinyin]]</i></small>: "
		res = res .. Consolas(make_superscript(c_cp))
	end
	if c_gd then
		res = res .. "\n*** <small><i>[[w:Guangdong Romanization|Guangdong Romanization]]</i></small>: "
		res = res .. Consolas(make_superscript(c_gd))
	end
	res = res .. "\n*** <small>Sinological [[Wiktionary:International Phonetic Alphabet|IPA]] <sup>([[w:Cantonese phonology|key]])</sup></small>: "
	res = res .. format_IPA("/" .. c_ipa .. "/")
	if c_hom_exists then
		res = res .. '\n*** <small>Homophones</small>: <table class="wikitable mw-collapsible mw-collapsed" style="width:15em;margin:0;'
		res = res .. 'position:left; text-align:center"><tr><th></th></tr><tr><td><div style="float: right; clear: right;"><sup>'
		res = res .. '<span class="plainlinks">['
		res = res .. tostring(mw.uri.fullUrl("Module:yue-pron/hom",{["action"]="edit"}))
		res = res .. ' edit]</span></sup></div><div style="visibility:hidden; float:left"><sup><span style="color:#FFF">edit</span></sup></div>'
		local hom_text = {}
		local yue = require("Module:languages").getByCode("yue")
		for _,hom in ipairs(c_hom[c_hom_exists]) do
			table.insert(hom_text, require("Module:links").full_link( { term = hom, lang = yue, tr = "-" } ))
		end
		res = res .. table.concat(hom_text, "<br>")
		res = res .. "</td></tr></table>"
		res = res .. "[[Category:Cantonese terms with homophones]]"
	end
	return res
end

return export