Module:character info: difference between revisions
Jump to navigation
Jump to search
Content deleted Content added
Theknightwho (talk | contribs) One more Module:string utilities function. |
major cleanup of argument handling; image=-, aliases=-, next_codepoint_title=- and previous_codepoint_title=- must be used to suppress these rather than supplying an empty string; fix issue with displaying the composition of characters like ≯ made out of undisplayable components |
||
Line 10: | Line 10: | ||
local ulen = m_str_utils.len |
local ulen = m_str_utils.len |
||
local m_unicode = require( |
local m_unicode = require("Module:Unicode data") |
||
local char_to_script = require( |
local char_to_script = require("Module:scripts").charToScript |
||
local export = {} |
local export = {} |
||
Line 51: | Line 51: | ||
local function get_codepoint(codepoint, param_name) |
local function get_codepoint(codepoint, param_name) |
||
codepoint = tonumber(codepoint) or decode_entities(codepoint) |
|||
if codepoint then |
|||
if type(codepoint) == "string" and ulen(codepoint) == 1 then |
|||
codepoint = |
codepoint = cp(codepoint) |
||
elseif type(codepoint) ~= "number" then |
|||
⚫ | |||
⚫ | |||
elseif type(codepoint) ~= "number" then |
|||
⚫ | |||
.. " parameter") |
|||
end |
|||
end |
end |
||
return codepoint |
return codepoint |
||
Line 65: | Line 61: | ||
function export._show(args, parent_title) |
function export._show(args, parent_title) |
||
local codepoint = args.codepoint |
local codepoint = args.codepoint |
||
⚫ | |||
local title = mw.title.getCurrentTitle() |
local title = mw.title.getCurrentTitle() |
||
local |
local pagename = args.pagename or mw.loadData("Module:headword/data").pagename |
||
local namespace = mw.title.getCurrentTitle().nsText |
local namespace = mw.title.getCurrentTitle().nsText |
||
if codepoint |
if codepoint then |
||
codepoint = get_codepoint(codepoint, "codepoint") |
codepoint = get_codepoint(codepoint, "codepoint") |
||
else |
else |
||
if title.fullText == parent_title then |
if not args.pagename and title.fullText == parent_title then |
||
codepoint = 0xfffd |
codepoint = 0xfffd |
||
elseif ulen( |
elseif ulen(pagename) == 1 then |
||
codepoint = cp( |
codepoint = cp(pagename) |
||
else |
else |
||
if title.nsText == "Template" then return "" end |
|||
error("Page title is not a single Unicode character") |
error("Page title is not a single Unicode character") |
||
end |
end |
||
end |
end |
||
⚫ | |||
args.image = args.image and mw.text.trim(args.image) |
|||
if args.image == "" then |
if args.image == "-" then |
||
image = nil |
image = nil |
||
else |
else |
||
Line 120: | Line 114: | ||
end |
end |
||
local script_code = args.sc or char_to_script(codepoint) |
local script_code = args.sc and args.sc:getCode() or char_to_script(codepoint) |
||
local script_data = mw.loadData("Module:scripts/data")[script_code] |
local script_data = mw.loadData("Module:scripts/data")[script_code] |
||
or error("No data for script code " .. script_code .. ".") |
or error("No data for script code " .. script_code .. ".") |
||
Line 140: | Line 134: | ||
local aliases |
local aliases |
||
if args.aliases == "" then |
if args.aliases == "-" then |
||
aliases = nil |
aliases = nil |
||
else |
else |
||
aliases = mw.loadData( |
aliases = mw.loadData("Module:Unicode data/aliases")[codepoint] |
||
end |
end |
||
local function parse_aliases(aliases) |
local function parse_aliases(aliases) |
||
local result = {} |
local result = {} |
||
Line 161: | Line 154: | ||
if classif.correction then |
if classif.correction then |
||
for i, name in ipairs(classif.correction) do |
for i, name in ipairs(classif.correction) do |
||
local category = |
local category = "[[Category:Character boxes with corrected names]]" |
||
if namespace == "" then |
if namespace == "" then |
||
table.insert(result, |
table.insert(result, |
||
( |
("[[Category:Character boxes with corrected names]]Corrected: %s"):format( |
||
name |
name |
||
) |
) |
||
Line 170: | Line 163: | ||
else |
else |
||
table.insert(result, |
table.insert(result, |
||
( |
("Corrected: %s"):format( |
||
name |
name |
||
) |
) |
||
Line 180: | Line 173: | ||
if classif.alternate then |
if classif.alternate then |
||
for i, name in ipairs(classif.alternate) do |
for i, name in ipairs(classif.alternate) do |
||
local category = |
local category = "[[Category:Character boxes with alternative names]]" |
||
if namespace == "" then |
if namespace == "" then |
||
table.insert(result, |
table.insert(result, |
||
( |
("[[Category:Character boxes with alternative names]]Alternative: %s"):format( |
||
name |
name |
||
) |
) |
||
Line 189: | Line 182: | ||
else |
else |
||
table.insert(result, |
table.insert(result, |
||
( |
("Alternative: %s"):format( |
||
name |
name |
||
) |
) |
||
Line 198: | Line 191: | ||
if classif.abbreviation then |
if classif.abbreviation then |
||
local category = |
local category = "[[Category:Character boxes with abbreviations]]" |
||
if namespace == "" then |
if namespace == "" then |
||
table.insert(result, |
table.insert(result, |
||
( |
("[[Category:Character boxes with abbreviations]]Abbreviation: %s"):format( |
||
table.concat(classif.abbreviation, ", ") |
table.concat(classif.abbreviation, ", ") |
||
) |
) |
||
Line 207: | Line 200: | ||
else |
else |
||
table.insert(result, |
table.insert(result, |
||
( |
("Abbreviation: %s"):format( |
||
table.concat(classif.abbreviation, ", ") |
table.concat(classif.abbreviation, ", ") |
||
) |
) |
||
Line 216: | Line 209: | ||
local parsed_result = table.concat(result, ", ") |
local parsed_result = table.concat(result, ", ") |
||
return |
return "<div>(" .. parsed_result .. ")</div>" |
||
end |
end |
||
Line 251: | Line 244: | ||
local function parse_composition() |
local function parse_composition() |
||
local result = nil |
local result = nil |
||
if block_name == "Hangul Syllables" then |
if block_name == "Hangul Syllables" then |
||
⚫ | |||
result = ((ti ~= 0) and |
result = ((ti ~= 0) and |
||
'<big class="Kore" lang="">[[&#%u;]] + [[&#%u;]] + [[&#%u;]]</big>' or |
'<big class="Kore" lang="">[[&#%u;]] + [[&#%u;]] + [[&#%u;]]</big>' or |
||
Line 263: | Line 254: | ||
final_to_letter[ti] |
final_to_letter[ti] |
||
) |
) |
||
else |
else |
||
local nfd = toNFD(u(codepoint)) |
local nfd = toNFD(u(codepoint)) |
||
Line 279: | Line 269: | ||
local character_text = |
local character_text = |
||
link_target and ('[[ |
link_target and ('[[%s|<span class="%s">%s&#%u;</span> [U+%04X]]]') |
||
:format( |
:format(link_target, script, dotted_circle, nfdcp, nfdcp) |
||
or ('<span class="%s">%s&#%u;</span> [U+%04X]') |
or ('<span class="%s">%s&#%u;</span> [U+%04X]') |
||
:format(script, dotted_circle, nfdcp, nfdcp) |
:format(script, dotted_circle, nfdcp, nfdcp) |
||
table.insert(compo, '<span class="character-sample-secondary">' .. character_text .. |
table.insert(compo, '<span class="character-sample-secondary">' .. character_text .. "</span> ") |
||
end |
end |
||
result = table.concat(compo, " + ") |
result = table.concat(compo, " + ") |
||
end |
end |
||
end |
end |
||
Line 297: | Line 286: | ||
return nil |
return nil |
||
end |
end |
||
-- [[ Egyptian Hieroglyphs |
-- [[ Egyptian Hieroglyphs |
||
local function parse_gardiner() |
local function parse_gardiner() |
||
⚫ | |||
if args.gardiner then |
if args.gardiner then |
||
⚫ | |||
⚫ | |||
result = |
|||
( |
|||
⚫ | |||
args.gardiner, args.gardiner |
args.gardiner, args.gardiner |
||
) |
) |
||
return "Gardiner number", result, "[[Category:Character boxes with additional information for Egyptian Hieroglyphs]]" |
return "Gardiner number", result, "[[Category:Character boxes with additional information for Egyptian Hieroglyphs]]" |
||
end |
end |
||
return nil |
return nil |
||
end |
end |
||
local function parse_mdc() |
local function parse_mdc() |
||
⚫ | |||
if args.mdc then |
if args.mdc then |
||
⚫ | |||
result = args.mdc |
|||
⚫ | |||
end |
end |
||
return nil |
return nil |
||
end |
end |
||
local function parse_egpz() |
local function parse_egpz() |
||
local result = nil |
|||
if args.egpz then |
if args.egpz then |
||
⚫ | |||
result = args.egpz |
|||
⚫ | |||
end |
end |
||
return nil |
return nil |
||
end |
end |
||
Line 353: | Line 317: | ||
local function middle_part() |
local function middle_part() |
||
local rows = {} |
local rows = {} |
||
local function insert_row(row_title, row_contents, row_category) |
local function insert_row(row_title, row_contents, row_category) |
||
if row_contents then |
if row_contents then |
||
mw.log("Row contents: " .. row_contents) |
|||
table.insert(rows, |
table.insert(rows, |
||
('<tr><td style="text-align: left">%s:</td><td>%s%s</td></tr>'):format(row_title, row_contents, row_category)) |
('<tr><td style="text-align: left">%s:</td><td>%s%s</td></tr>'):format(row_title, row_contents, row_category)) |
||
end |
end |
||
end |
end |
||
Line 373: | Line 333: | ||
if rows[1] then |
if rows[1] then |
||
return ('<table style="margin: 0 auto;">%s</table>') |
return ('<table style="margin: 0 auto;">%s</table>') |
||
:format(table.concat(rows, "")) |
:format(table.concat(rows, "")) |
||
end |
end |
||
return "" |
return "" |
||
end |
end |
||
Line 387: | Line 344: | ||
local link_target |
local link_target |
||
if combining then |
if combining == nil then |
||
combining = to_boolean(combining) |
|||
else |
|||
combining = m_unicode.is_combining(codepoint) |
combining = m_unicode.is_combining(codepoint) |
||
end |
end |
||
if printable then |
if printable == nil then |
||
printable = to_boolean(printable) |
|||
else |
|||
printable = m_unicode.is_printable(codepoint) |
printable = m_unicode.is_printable(codepoint) |
||
end |
end |
||
Line 402: | Line 355: | ||
if title == "self" or page_exists(char) then |
if title == "self" or page_exists(char) then |
||
link_target = char |
link_target = char |
||
elseif title ~= "" then |
elseif title ~= "-" then |
||
link_target = m_unicode.get_entry_title(codepoint) |
link_target = m_unicode.get_entry_title(codepoint) |
||
end |
end |
||
Line 408: | Line 361: | ||
if printable then |
if printable then |
||
display = ('<span class="character-sample-secondary %s">%s&#x%04X;</span>'):format( |
display = ('<span class="character-sample-secondary %s">%s&#x%04X;</span>'):format( |
||
script or char_to_script(codepoint), |
script and script:getCode() or char_to_script(codepoint), |
||
combining and "◌" or "", codepoint |
combining and "◌" or "", codepoint |
||
) |
) |
||
Line 415: | Line 368: | ||
local arrow_and_maybe_char |
local arrow_and_maybe_char |
||
if np then |
if np then |
||
arrow_and_maybe_char = (display or "") .. |
arrow_and_maybe_char = (display or "") .. " →" |
||
else |
else |
||
arrow_and_maybe_char = |
arrow_and_maybe_char = "← " .. (display or "") |
||
end |
end |
||
Line 425: | Line 378: | ||
if link_target then |
if link_target then |
||
return ( |
return ("[[" .. link_target .. "|" .. text .. "]]") |
||
else |
else |
||
return text |
return text |
||
Line 446: | Line 399: | ||
local previous_codepoint = |
local previous_codepoint = |
||
get_codepoint(args.previous_codepoint, "previous_codepoint") |
args.previous_codepoint and get_codepoint(args.previous_codepoint, "previous_codepoint") |
||
or get_next(codepoint, -1) |
|||
local next_codepoint = get_codepoint(args.next_codepoint, "next_codepoint") |
local next_codepoint = |
||
args.next_codepoint and get_codepoint(args.next_codepoint, "next_codepoint") |
|||
or get_next(codepoint, 1) |
|||
local combining |
local combining = args.combining |
||
if |
if combining == nil then |
||
combining = to_boolean(args.combining) |
|||
else |
|||
combining = m_unicode.is_combining(codepoint) |
combining = m_unicode.is_combining(codepoint) |
||
end |
end |
||
Line 460: | Line 412: | ||
table.insert(table_markup, |
table.insert(table_markup, |
||
'|-\n| style="width: 70px;" colspan="2" | ' .. |
'|-\n| style="width: 70px;" colspan="2" | ' .. |
||
"<table>" .. |
|||
"<tr>" .. |
|||
"<td>" .. |
|||
('<span class="character-sample-primary %s">%s&#%u;</span>') |
('<span class="character-sample-primary %s">%s&#%u;</span>') |
||
:format(script_code, combining and "◌" or "", codepoint) .. |
:format(script_code, combining and "◌" or "", codepoint) .. |
||
"</td>" .. |
|||
"<td>" .. |
|||
( |
(" [https://util.unicode.org/UnicodeJsps/character.jsp?a=%.4X U+%.4X]"):format(codepoint, codepoint) .. |
||
", [[w:List of XML and HTML character entity references|&#" .. codepoint .. ";]]\n" .. |
|||
'<div class="character-sample-name">' .. |
'<div class="character-sample-name">' .. |
||
encode_entities(args.name or m_unicode.lookup_name(codepoint)) .. |
encode_entities(args.name or m_unicode.lookup_name(codepoint)) .. |
||
"</div>" .. |
|||
parse_aliases(aliases) .. |
parse_aliases(aliases) .. |
||
"</td>" .. |
|||
"</tr>" .. |
|||
"</table>" |
|||
) |
) |
||
Line 496: | Line 448: | ||
if previous_unassigned_first <= previous_unassigned_last or next_unassigned_first <= next_unassigned_last then |
if previous_unassigned_first <= previous_unassigned_last or next_unassigned_first <= next_unassigned_last then |
||
if previous_unassigned_first < previous_unassigned_last then |
if previous_unassigned_first < previous_unassigned_last then |
||
left_unassigned_text = ( |
left_unassigned_text = ("[unassigned: U+%.4X–U+%.4X]"):format(previous_unassigned_first, previous_unassigned_last) |
||
elseif previous_unassigned_first == previous_unassigned_last then |
elseif previous_unassigned_first == previous_unassigned_last then |
||
left_unassigned_text = ( |
left_unassigned_text = ("[unassigned: U+%.4X]"):format(previous_unassigned_first) |
||
end |
end |
||
if next_unassigned_first < next_unassigned_last then |
if next_unassigned_first < next_unassigned_last then |
||
right_unassigned_text = ( |
right_unassigned_text = ("[unassigned: U+%.4X–U+%.4X]"):format(next_unassigned_first, next_unassigned_last) |
||
elseif next_unassigned_first == next_unassigned_last then |
elseif next_unassigned_first == next_unassigned_last then |
||
right_unassigned_text = ( |
right_unassigned_text = ("[unassigned: U+%.4X]"):format(next_unassigned_first) |
||
end |
end |
||
end |
end |
||
local unassignedsRow = |
local unassignedsRow = |
||
mw.html.create( |
mw.html.create("table"):css("width", "100%"):css("font-size", "80%"):css("white-space", "nowrap") |
||
:tag( |
:tag("tr") |
||
:tag( |
:tag("td"):css("width", "50%"):css("text-align", "left"):wikitext(left_unassigned_text or ""):done() |
||
:tag( |
:tag("td"):css("width", "50%"):css("text-align", "right"):wikitext(right_unassigned_text or ""):done() |
||
:allDone() |
:allDone() |
||
table.insert(table_markup, tostring(unassignedsRow) .. |
table.insert(table_markup, tostring(unassignedsRow) .."\n") |
||
local previous_codepoint_text = "" |
local previous_codepoint_text = "" |
||
local next_codepoint_text = ( |
local next_codepoint_text = ("%s\n") |
||
:format(present_codepoint(next_codepoint, true, |
:format(present_codepoint(next_codepoint, true, |
||
args.next_codepoint_sc, args.next_codepoint_combining, |
args.next_codepoint_sc, args.next_codepoint_combining, |
||
Line 524: | Line 476: | ||
if previous_codepoint > 0 then |
if previous_codepoint > 0 then |
||
previous_codepoint_text = ( |
previous_codepoint_text = ("%s\n") |
||
:format(present_codepoint(previous_codepoint, false, |
:format(present_codepoint(previous_codepoint, false, |
||
args.previous_codepoint_sc, args.previous_codepoint_combining, |
args.previous_codepoint_sc, args.previous_codepoint_combining, |
||
Line 531: | Line 483: | ||
end |
end |
||
local block_name_text = ( |
local block_name_text = ("[[Appendix:Unicode/%s|%s]]") |
||
:format(block_name, block_name) |
:format(block_name, block_name) |
||
if namespace == "" then |
if namespace == "" then |
||
block_name_text = block_name_text .. ( |
block_name_text = block_name_text .. ("[[Category:%s block|*%010d]]\n") |
||
:format(block_name, codepoint) |
:format(block_name, codepoint) |
||
else |
else |
||
block_name_text = block_name_text .. |
block_name_text = block_name_text .. "\n" |
||
end |
end |
||
local lastRow = |
local lastRow = |
||
mw.html.create( |
mw.html.create("table"):css("width", "100%"):css("text-align", "center") |
||
:tag( |
:tag("tr") |
||
:tag( |
:tag("td"):css("width", "20%"):wikitext(previous_codepoint_text):done() |
||
-- :tag( |
-- :tag("td"):css("width", "15%") |
||
-- :tag( |
-- :tag("span"):wikitext(left_unassigned_text and "'''...'''" or ""):attr("title", left_unassigned_text or ""):done():done() |
||
:tag( |
:tag("td"):css("width", "60%"):css("font-size", "110%"):css("font-weight", "bold"):wikitext(block_name_text) |
||
-- :tag( |
-- :tag("td"):css("width", "15%") |
||
-- :tag( |
-- :tag("span"):wikitext(right_unassigned_text and "'''...'''" or ""):attr("title", right_unassigned_text or ""):done():done() |
||
:tag( |
:tag("td"):css("width", "20%"):wikitext(next_codepoint_text):done() |
||
:allDone() |
:allDone() |
||
table.insert(table_markup, tostring(lastRow) .. |
table.insert(table_markup, tostring(lastRow) .."\n") |
||
table.insert(table_markup, |
table.insert(table_markup, "|}") |
||
if cat_name and namespace == "" then |
if cat_name and namespace == "" then |
||
Line 566: | Line 518: | ||
function export.show(frame) |
function export.show(frame) |
||
⚫ | |||
[1] = {alias_of = "codepoint"}, |
|||
⚫ | |||
["previous_codepoint"] = {}, |
|||
["next_codepoint"] = {}, |
|||
["name"] = {}, |
|||
["previous_codepoint_name"] = {}, |
|||
["next_codepoint_name"] = {}, |
|||
["combining"] = {type = "boolean"}, |
|||
["previous_codepoint_combining"] = {type = "boolean"}, |
|||
["next_codepoint_combining"] = {type = "boolean"}, |
|||
["printable"] = {type = "boolean"}, |
|||
["previous_codepoint_printable"] = {type = "boolean"}, |
|||
["next_codepoint_printable"] = {type = "boolean"}, |
|||
["previous_title"] = {}, |
|||
["next_title"] = {}, |
|||
["sc"] = {type = "script"}, |
|||
["next_codepoint_sc"] = {type = "script"}, |
|||
["previous_codepoint_sc"] = {type = "script"}, |
|||
["caption"] = {}, |
|||
["image"] = {}, |
|||
["block"] = {}, |
|||
["gardiner"] = {}, |
|||
["mdc"] = {}, |
|||
["egpz"] = {}, |
|||
["nocat"] = {type = "boolean"}, |
|||
["pagename"] = {}, -- for testing etc. |
|||
⚫ | |||
local parent_frame = frame:getParent() |
local parent_frame = frame:getParent() |
||
local args = require("Module:parameters").process(parent_frame.args, params) |
|||
return export._show(args, parent_frame:getTitle()) |
|||
end |
end |
||
Revision as of 04:10, 17 May 2024
- The following documentation is located at Module:character info/documentation. [edit]
- Useful links: subpage list • links • transclusions • testcases • sandbox (diff)
This module generates content for {{character info}}
and determines the condition under which {{editnotice-exotic symbols}}
is displayed when in edit mode in the main namespace (through MediaWiki:Editnotice-0).
To be fixed:
- Code points with labels beginning in
<
are given as unassigned (see box for U+007E in ~ and box for U+F900 in 豈).
local m_str_utils = require("Module:string utilities")
local cp = m_str_utils.codepoint
local decode_entities = m_str_utils.decode_entities
local encode_entities = m_str_utils.encode_entities
local floor = math.floor
local gcodepoint = m_str_utils.gcodepoint
local toNFD = mw.ustring.toNFD
local u = m_str_utils.char
local ulen = m_str_utils.len
local m_unicode = require("Module:Unicode data")
local char_to_script = require("Module:scripts").charToScript
local export = {}
local dingbat_scripts = {
["Zsym"] = true;
["Zmth"] = true;
["Zyyy"] = true;
}
local function page_exists(title)
local ok, title_obj = pcall(mw.title.new, title)
if ok and title_obj then
local ok, exists = pcall(function() return title_obj.exists end)
return ok and exists
else
return false
end
end
function export.exotic_symbol_warning(frame)
local title = mw.title.getCurrentTitle()
if title.exists then
return ""
end
if ulen(title.fullText) ~= 1 then
return ""
end
local codepoint = cp(title.fullText)
local script_code = char_to_script(codepoint)
if dingbat_scripts[script_code] then
return frame:expandTemplate { title = "editnotice-exotic symbols" }
end
return ""
end
local function get_codepoint(codepoint, param_name)
codepoint = tonumber(codepoint) or decode_entities(codepoint)
if type(codepoint) == "string" and ulen(codepoint) == 1 then
codepoint = cp(codepoint)
elseif type(codepoint) ~= "number" then
error("Unrecognised string given for the " .. param_name .. " parameter")
end
return codepoint
end
function export._show(args, parent_title)
local codepoint = args.codepoint
local title = mw.title.getCurrentTitle()
local pagename = args.pagename or mw.loadData("Module:headword/data").pagename
local namespace = mw.title.getCurrentTitle().nsText
if codepoint then
codepoint = get_codepoint(codepoint, "codepoint")
else
if not args.pagename and title.fullText == parent_title then
codepoint = 0xfffd
elseif ulen(pagename) == 1 then
codepoint = cp(pagename)
else
error("Page title is not a single Unicode character")
end
end
local image
if args.image == "-" then
image = nil
else
image = args.image or m_unicode.lookup_image(codepoint)
end
local table_markup = {}
table.insert(table_markup,
'{| class="wikitable floatright" style="width:25em;"\n')
if image then
if not image:match("\127") then -- <hiero> tags generate these; pass them through
if image:match("^%[?%[?[Ff]ile:") or image:match("^%[?%[?[Ii]mage:") then
image = image:gsub("^%[%[", ""):gsub("^[Ff]ile:", ""):gsub("^[Ii]mage:", ""):gsub("|.*", ""):gsub("]]", "")
end
local category = "[[Category:Character boxes with images|*" .. string.format("%010d", codepoint) .. "]]"
image = "[[File:" .. image .. "|120x140px]]"
if namespace == "" then
image = image .. category
end
end
table.insert(table_markup,
('|-\n| colspan="2" style="text-align: center;" | %s<br/>%s\n'):format(
image, args.caption or ""
)
)
elseif args.caption then
table.insert(table_markup,
('|-\n| colspan="2" style="text-align: center;" | %s\n'):format(
args.caption
)
)
end
local script_code = args.sc and args.sc:getCode() or char_to_script(codepoint)
local script_data = mw.loadData("Module:scripts/data")[script_code]
or error("No data for script code " .. script_code .. ".")
local script_name = script_data[1]
local NAMESPACE = title.namespace
local cat_name
if not args.nocat and ((NAMESPACE == 0) or (NAMESPACE == 100)) then -- main and Appendix
if script_data.character_category ~= nil then
-- false means no category, overriding the default below
cat_name = script_data.character_category or nil
elseif script_name then
cat_name = script_name .. " script characters"
end
end
local block_name = encode_entities(args.block or m_unicode.lookup_block(codepoint))
local aliases
if args.aliases == "-" then
aliases = nil
else
aliases = mw.loadData("Module:Unicode data/aliases")[codepoint]
end
local function parse_aliases(aliases)
local result = {}
if aliases then
local classif = {}
for i, alias in ipairs(aliases) do
if not classif[alias[1]] then
classif[alias[1]] = {}
end
table.insert(classif[alias[1]], encode_entities(alias[2]))
end
if classif.correction then
for i, name in ipairs(classif.correction) do
local category = "[[Category:Character boxes with corrected names]]"
if namespace == "" then
table.insert(result,
("[[Category:Character boxes with corrected names]]Corrected: %s"):format(
name
)
)
else
table.insert(result,
("Corrected: %s"):format(
name
)
)
end
end
end
if classif.alternate then
for i, name in ipairs(classif.alternate) do
local category = "[[Category:Character boxes with alternative names]]"
if namespace == "" then
table.insert(result,
("[[Category:Character boxes with alternative names]]Alternative: %s"):format(
name
)
)
else
table.insert(result,
("Alternative: %s"):format(
name
)
)
end
end
end
if classif.abbreviation then
local category = "[[Category:Character boxes with abbreviations]]"
if namespace == "" then
table.insert(result,
("[[Category:Character boxes with abbreviations]]Abbreviation: %s"):format(
table.concat(classif.abbreviation, ", ")
)
)
else
table.insert(result,
("Abbreviation: %s"):format(
table.concat(classif.abbreviation, ", ")
)
)
end
end
local parsed_result = table.concat(result, ", ")
return "<div>(" .. parsed_result .. ")</div>"
end
return ""
end
local li, vi, ti = nil, nil, nil
if block_name == "Hangul Syllables" then
local index = codepoint - 0xAC00
li, vi, ti = floor(index / 588), floor((index % 588) / 28), index % 28
end
local initial_to_letter = { [0] =
0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314A, 0x314B,
0x314C, 0x314D, 0x314E,
}
local vowel_to_letter = { [0] =
0x314F, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156,
0x3157, 0x3158, 0x3159, 0x315A, 0x315B, 0x315C, 0x315D, 0x315E,
0x315F, 0x3160, 0x3161, 0x3162, 0x3163,
}
local final_to_letter = {
0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3139,
0x313A, 0x313B, 0x313C, 0x313D, 0x313E, 0x313F, 0x3140, 0x3141,
0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314A, 0x314B,
0x314C, 0x314D, 0x314E,
}
local function parse_composition()
local result = nil
if block_name == "Hangul Syllables" then
result = ((ti ~= 0) and
'<big class="Kore" lang="">[[&#%u;]] + [[&#%u;]] + [[&#%u;]]</big>' or
'<big class="Kore" lang="">[[&#%u;]] + [[&#%u;]]</big>'):format(
initial_to_letter[li],
vowel_to_letter[vi],
final_to_letter[ti]
)
else
local nfd = toNFD(u(codepoint))
if ulen(nfd) ~= 1 then
local compo = {}
for nfdcp in gcodepoint(nfd) do
local dotted_circle = (m_unicode.is_combining(nfdcp) and "◌" or "")
local link_target = m_unicode.get_entry_title(nfdcp)
if not link_target or not page_exists(link_target) then
link_target = nil
end
local script = char_to_script(nfdcp)
local character_text =
link_target and ('[[%s|<span class="%s">%s&#%u;</span> [U+%04X]]]')
:format(link_target, script, dotted_circle, nfdcp, nfdcp)
or ('<span class="%s">%s&#%u;</span> [U+%04X]')
:format(script, dotted_circle, nfdcp, nfdcp)
table.insert(compo, '<span class="character-sample-secondary">' .. character_text .. "</span> ")
end
result = table.concat(compo, " + ")
end
end
if result then
return "Composition", result, "[[Category:Character boxes with compositions]]"
end
return nil
end
-- [[ Egyptian Hieroglyphs
local function parse_gardiner()
if args.gardiner then
local result = (
"[http://vincent.euverte.free.fr/Rosette/Rosette_410.php?Hiero=%s&Lang=E %s]\n"):format(
args.gardiner, args.gardiner
)
return "Gardiner number", result, "[[Category:Character boxes with additional information for Egyptian Hieroglyphs]]"
end
return nil
end
local function parse_mdc()
if args.mdc then
return "Manuel de Codage", args.mdc, "[[Category:Character boxes with additional information for Egyptian Hieroglyphs]]"
end
return nil
end
local function parse_egpz()
if args.egpz then
return "EGPZ 1.0", args.egpz, "[[Category:Character boxes with additional information for Egyptian Hieroglyphs]]"
end
return nil
end
-- ]]
local function middle_part()
local rows = {}
local function insert_row(row_title, row_contents, row_category)
if row_contents then
mw.log("Row contents: " .. row_contents)
table.insert(rows,
('<tr><td style="text-align: left">%s:</td><td>%s%s</td></tr>'):format(row_title, row_contents, row_category))
end
end
insert_row(parse_composition())
insert_row(parse_gardiner())
insert_row(parse_egpz())
insert_row(parse_mdc())
if rows[1] then
return ('<table style="margin: 0 auto;">%s</table>')
:format(table.concat(rows, ""))
end
return ""
end
local function present_codepoint(codepoint, np, script, combining, name, printable, title)
local display
local link_target
if combining == nil then
combining = m_unicode.is_combining(codepoint)
end
if printable == nil then
printable = m_unicode.is_printable(codepoint)
end
local char = u(codepoint)
if title == "self" or page_exists(char) then
link_target = char
elseif title ~= "-" then
link_target = m_unicode.get_entry_title(codepoint)
end
if printable then
display = ('<span class="character-sample-secondary %s">%s&#x%04X;</span>'):format(
script and script:getCode() or char_to_script(codepoint),
combining and "◌" or "", codepoint
)
end
local arrow_and_maybe_char
if np then
arrow_and_maybe_char = (display or "") .. " →"
else
arrow_and_maybe_char = "← " .. (display or "")
end
local text = ('<span title="%s">%s<br><small>[U+%04X]</small></span>')
:format(encode_entities(name or m_unicode.lookup_name(codepoint)),
arrow_and_maybe_char, codepoint)
if link_target then
return ("[[" .. link_target .. "|" .. text .. "]]")
else
return text
end
end
local function get_next(codepoint, step)
-- Skip past noncharacters and reserved characters (Cn), private-use
-- characters (Co), surrogates (Cs), and control characters (Cc), all
-- of which have a label beginning in "<" rather than a proper name.
if step < 0 and 0 < codepoint or step > 0 and codepoint < 0x10FFFF then
repeat
codepoint = codepoint + step
until m_unicode.lookup_name(codepoint):sub(1, 1) ~= "<"
or not (0 < codepoint and codepoint < 0x10FFFF)
end
return codepoint
end
local previous_codepoint =
args.previous_codepoint and get_codepoint(args.previous_codepoint, "previous_codepoint")
or get_next(codepoint, -1)
local next_codepoint =
args.next_codepoint and get_codepoint(args.next_codepoint, "next_codepoint")
or get_next(codepoint, 1)
local combining = args.combining
if combining == nil then
combining = m_unicode.is_combining(codepoint)
end
table.insert(table_markup,
'|-\n| style="width: 70px;" colspan="2" | ' ..
"<table>" ..
"<tr>" ..
"<td>" ..
('<span class="character-sample-primary %s">%s&#%u;</span>')
:format(script_code, combining and "◌" or "", codepoint) ..
"</td>" ..
"<td>" ..
(" [https://util.unicode.org/UnicodeJsps/character.jsp?a=%.4X U+%.4X]"):format(codepoint, codepoint) ..
", [[w:List of XML and HTML character entity references|&#" .. codepoint .. ";]]\n" ..
'<div class="character-sample-name">' ..
encode_entities(args.name or m_unicode.lookup_name(codepoint)) ..
"</div>" ..
parse_aliases(aliases) ..
"</td>" ..
"</tr>" ..
"</table>"
)
table.insert(table_markup,
middle_part()
)
local previous_unassigned_first = previous_codepoint + 1
local previous_unassigned_last = codepoint - 1
local next_unassigned_first = codepoint + 1
local next_unassigned_last = next_codepoint - 1
local left_unassigned_text
local right_unassigned_text
if previous_codepoint == 0 then
previous_unassigned_first = 0
end
if previous_unassigned_first <= previous_unassigned_last or next_unassigned_first <= next_unassigned_last then
if previous_unassigned_first < previous_unassigned_last then
left_unassigned_text = ("[unassigned: U+%.4X–U+%.4X]"):format(previous_unassigned_first, previous_unassigned_last)
elseif previous_unassigned_first == previous_unassigned_last then
left_unassigned_text = ("[unassigned: U+%.4X]"):format(previous_unassigned_first)
end
if next_unassigned_first < next_unassigned_last then
right_unassigned_text = ("[unassigned: U+%.4X–U+%.4X]"):format(next_unassigned_first, next_unassigned_last)
elseif next_unassigned_first == next_unassigned_last then
right_unassigned_text = ("[unassigned: U+%.4X]"):format(next_unassigned_first)
end
end
local unassignedsRow =
mw.html.create("table"):css("width", "100%"):css("font-size", "80%"):css("white-space", "nowrap")
:tag("tr")
:tag("td"):css("width", "50%"):css("text-align", "left"):wikitext(left_unassigned_text or ""):done()
:tag("td"):css("width", "50%"):css("text-align", "right"):wikitext(right_unassigned_text or ""):done()
:allDone()
table.insert(table_markup, tostring(unassignedsRow) .."\n")
local previous_codepoint_text = ""
local next_codepoint_text = ("%s\n")
:format(present_codepoint(next_codepoint, true,
args.next_codepoint_sc, args.next_codepoint_combining,
args.next_codepoint_name, args.next_codepoint_printable,
args.next_codepoint_title))
if previous_codepoint > 0 then
previous_codepoint_text = ("%s\n")
:format(present_codepoint(previous_codepoint, false,
args.previous_codepoint_sc, args.previous_codepoint_combining,
args.previous_codepoint_name, args.previous_codepoint_printable,
args.previous_codepoint_title))
end
local block_name_text = ("[[Appendix:Unicode/%s|%s]]")
:format(block_name, block_name)
if namespace == "" then
block_name_text = block_name_text .. ("[[Category:%s block|*%010d]]\n")
:format(block_name, codepoint)
else
block_name_text = block_name_text .. "\n"
end
local lastRow =
mw.html.create("table"):css("width", "100%"):css("text-align", "center")
:tag("tr")
:tag("td"):css("width", "20%"):wikitext(previous_codepoint_text):done()
-- :tag("td"):css("width", "15%")
-- :tag("span"):wikitext(left_unassigned_text and "'''...'''" or ""):attr("title", left_unassigned_text or ""):done():done()
:tag("td"):css("width", "60%"):css("font-size", "110%"):css("font-weight", "bold"):wikitext(block_name_text)
-- :tag("td"):css("width", "15%")
-- :tag("span"):wikitext(right_unassigned_text and "'''...'''" or ""):attr("title", right_unassigned_text or ""):done():done()
:tag("td"):css("width", "20%"):wikitext(next_codepoint_text):done()
:allDone()
table.insert(table_markup, tostring(lastRow) .."\n")
table.insert(table_markup, "|}")
if cat_name and namespace == "" then
table.insert(table_markup, "[[Category:" .. cat_name .. "| " .. u(codepoint) .. "]]")
end
table.insert(table_markup, require("Module:TemplateStyles")("Template:character info/style.css"))
return table.concat(table_markup)
end
function export.show(frame)
local params = {
[1] = {alias_of = "codepoint"},
["codepoint"] = {},
["previous_codepoint"] = {},
["next_codepoint"] = {},
["name"] = {},
["previous_codepoint_name"] = {},
["next_codepoint_name"] = {},
["combining"] = {type = "boolean"},
["previous_codepoint_combining"] = {type = "boolean"},
["next_codepoint_combining"] = {type = "boolean"},
["printable"] = {type = "boolean"},
["previous_codepoint_printable"] = {type = "boolean"},
["next_codepoint_printable"] = {type = "boolean"},
["previous_title"] = {},
["next_title"] = {},
["sc"] = {type = "script"},
["next_codepoint_sc"] = {type = "script"},
["previous_codepoint_sc"] = {type = "script"},
["caption"] = {},
["image"] = {},
["block"] = {},
["gardiner"] = {},
["mdc"] = {},
["egpz"] = {},
["nocat"] = {type = "boolean"},
["pagename"] = {}, -- for testing etc.
}
local parent_frame = frame:getParent()
local args = require("Module:parameters").process(parent_frame.args, params)
return export._show(args, parent_frame:getTitle())
end
return export