Module:Ortografeyes
I gn a nén co di dzo-pådje /doc pol notule d’ esplikêyes. Clitchîz cial po l’ ahiver.
-- This module makes a list of different
-- orthographies for a word based on a list of well known references.
-- Only use with Modele:Orto as :
--[[
{{Orto
|bondjoû = C8, C9, E34, O0, O3, O4
|bondjou = E1, O2
|bôdjoû = E34
}}
]]--
local p = {}
local base = require("Module:Base")
local sourdant = require("Module:Sourdant");
local category = require("Module:Categoreyes");
local lingaedje = require("Module:Lingaedje");
local riscr = require("Module:Riscrijhaedje");
local lang = "wa"
local croejh = ""
local langObj = mw.language.new(lang)
local cat = "mots"
local pagename = ""
local references = {}
local groups = {}
local groupsID = {}
local words = {}
local output_lst = {}
local total_known = 0
--- pad str to the left
string.lpad = function(str, len, char)
if char == nil then char = ' ' end
return string.rep(char, len - #str) .. str
end
--- encode string for url
string.url_encode = function(str)
local str, t = string.gsub(str,"[^%w]",function(c)
return string.format("%%%X",string.byte(c)) end)
return str
end
-- escape % in string replacement with gsub
string.escape_pattern = function(str)
local str, t = string.gsub(str, "([%(%)%.%%%+%-%*%?%[%^%$%]])", "%%%1" )
return str
end
function p.build_cat_grp(word, grpID)
-- if lang is walloon
if word == pagename and lang == "wa" and
grpID ~= 'rif' and
output_lst['rif'] ~= nil and
output_lst['rif'][word] ~= nil then
for word,ref in pairs(words[word]) do
if(references[ref] ~=nil and
references[ref].group ~= 'rif') then
catPrefix = langObj:ucfirst(cat)
category.store(catPrefix.." ki l' rifondowe est dins "..ref, true, 0, true)
end
-- for words before 1900
if (grpID == "d1900") then
category.store("Mots ki l' rifondowe egzistéve dedja dins ene ortografeye di dvant Feller", true)
-- for words in scripta
elseif (grpID == "vscr" ) then
category.store("Mots ki l' rifondowe egzistéve dedja dins li scrîta", true)
end
end
end
end
-- make a group of references
function p.build_groups()
html = ""
for k, grp in ipairs(groups) do
grpID =grp.id
if output_lst[grpID] ~= nil then
html = html .. '<div class="TBox og og-'..grpID..'">'
if grpID == "not" then
refs = output_lst[grpID]["-"]
html = html .. '<i>Li mot n’ est nén dins :</i> '..refs
else
local li = ''
local cat = ''
local style_columns = ''
local tli = 0
local ling = lang
if ling == "wa" then
if grpID == "vscr" or grpID == "d1900" then
ling = ling .. "-vsis"
elseif grpID == "s20es" then
ling = ling .. "-fel"
end
end
for word,refs in pairs(output_lst[grpID]) do
local fw = string.match(word, "%[%[")
local lw = word
if not fw then
-- lw = "[["..word.."]]"
-- handle things like: "r(i)gon (rigon / r'gon)"
-- we add two following [^(] to _NOT_ match things like "s(i)tucler / (e)stucler"
-- ustring instead of string because of things like " (è)spitant"
fw = mw.ustring.find(word, " %([^%)][^%)]")
if fw then
local strbegin = string.sub(word, 1, fw - 1)
strbegin = riscr.get_article_link(strbegin, croejh, ling)
local strend = string.sub(word, fw + 2)
strend = string.gsub(strend, "%)$", "")
-- if "/text/" then it is a phonetic transcription, keep unchanged
-- else we linkify it
if not string.match(strend, "/[^ ][^/]+[^ ]/") then
strend = riscr.get_article_link(strend, croejh, ling)
if string.match(strend, " [u/] ") then
strend = riscr.split_article_link( strend )
end
end
lw = strbegin .. " ( " .. strend .. " )"
else
fw = string.find(word, ", ")
if fw then
local strbegin = string.sub(word, 1, fw - 1)
strbegin = riscr.get_article_link(strbegin, croejh, ling)
local strend = string.sub(word, fw + 1)
strend = riscr.get_article_link(strend, croejh, ling)
-- just in case
if string.match(strbegin, " [u/] ") then
strbegin = riscr.split_article_link( strbegin )
end
if string.match(strend, " [u/] ") then
strend = riscr.split_article_link( strend )
end
lw = strbegin .. ", " .. strend
else
fw = string.find(word, " [u/] ")
if fw then
local strbegin = string.sub(word, 1, fw - 1)
strbegin = riscr.get_article_link(strbegin, croejh, ling)
local strend = string.sub(word, fw + 2)
strend = riscr.get_article_link(strend, croejh, ling)
local sep = string.sub(word, fw, fw + 2)
-- just in case
if string.match(strbegin, " [u/] ") then
strbegin = riscr.split_article_link( strbegin )
end
if string.match(strend, " [u/] ") then
strend = riscr.split_article_link( strend )
end
lw = strbegin .. sep .. strend
else
lw = riscr.get_article_link(word, croejh, ling)
end
end
end
-- bold to itself is lost if there is an anchor
-- we need to check also pagename for special chars to escape
fw = string.gsub(lw,
"%[%[" .. string.escape_pattern(pagename) .. "#([^%]]*)|([^%]]*)%]%]",
"'''" .. "%2" .. "'''" )
if fw ~= nil then
lw = fw
end
end
-- change anchor for old spellings of rifondou
if ling == "wa" and string.match(refs, "rifondaedje ricandjî") then
lw = string.gsub(lw, "#Walon%|", "#Walon (viye rifondowe)|")
end
li = li .. '<li>'..lw..' : '..refs..'</li>'
tli = tli+1
-- make cat if needed
p.build_cat_grp(word, grpID)
end
-- display list by columns for numerous items
if tli >=9 then
style_columns = ' c-3"'
elseif tli >=4 then
style_columns = ' c-2"'
end
html = html .. '<div class="TBox-title ogt">'..grp.title..' :</div>'
html = html .. '<div class="TBox-content'..style_columns..'"><ul>'..li..'</ul></div>'
html = html .. cat -- add all cats
end
html = html .. '</div>'
end
end
return html
end
-- if word needs to be transformed for external ressource
function p.word_transform (word, transfrom)
if transfrom == "ucfirst" then
local w1 = mw.ustring.upper(mw.ustring.sub(word, 1, 1))
local w2 = mw.ustring.sub(word, 2)
word = w1 .. w2
elseif transfrom == "uc" then
word = mw.ustring.upper(word)
word = string.gsub(word, '/', ',')
-- space to underscore and ’ to '
elseif transfrom == "wiki" then
word = string.gsub(word, ' ', '_')
word = string.gsub(word, ' ', '_') -- no breaking space
word = string.gsub(word, '’', '\'')
-- ’ to '
elseif transfrom == "apostrofe" then
word = string.gsub(word, ' ', ' ') -- no breaking space
word = string.gsub(word, '’', '\'')
end
return word
end
-- make a reference in wikicode
function p.build_ref (ref, refID, word, refQ)
if refID ~= nil and references[refID] ~= nil then
local r = nil
local refID_real = refID
if references[refID].alias ~= nil then
r = references[refID].alias
else
r = references[refID]
end
-- ref link
-- only replace the FIRST occurrence
if ref == refID then
ref = "[[Sourdant:"..ref.."|"..ref.."]]"
elseif refQ ~=nil then
local refIDQ = refID_real..":"..refQ
ref = string.gsub(ref, string.escape_pattern(refIDQ), '[[Sourdant:'..refID..'|'..refID_real..']]',1)
else
ref = string.gsub(ref, string.escape_pattern(refID_real), '[[Sourdant:'..refID..'|'..refID_real..']]',1)
end
if word ~= '-' then
-- add link to wikisource
if r.wikisource ~= nil then
local w = r.wikisource
if w.title ~= nil then
-- for E89:other_word
if refQ ~= nil then
word = mw.text.trim(refQ)
end
if word ~= "" then
if w.transform ~= nil then
word = p.word_transform(word, w.transform)
end
local t = string.gsub(w.title, '__WORD__', word)
local l = ""
if w.lang ~= "wa" then l = ":"..w.lang end
ref = ref.. ' <sup>([[:oldwikisource'..l..':'..t..'|lére]])</sup>'
end
end
-- add external link
elseif r.external ~= nil then
local ext = r.external
local query = nil
-- for R10:1841, Ref1:OtherWord, ...
if ext.queryID ~= nil and refQ ~= nil then
query = ext.queryID
word = refQ
elseif ext.query ~= nil then
query = ext.query
if refQ ~= nil and refQ == '' then
word = ''
elseif refQ ~= nil then
-- compatibility with old R11 template
if refID == "R11" and string.match(refQ, '^[1-9]') then
word = word .. '_' .. refQ
else
word = refQ
end
end
end
if query ~= nil and word ~= '' then
if ext.transform ~= nil then
word = p.word_transform(word, ext.transform)
else
if refQ == nil and string.match( word, "%(")
and (lang == "wa" or lang == "wa-fel" or lang == "wa-vsis")
then
word = riscr.betchete_cogne_motli(word)
end
end
query = string.gsub(query, '__WORD__', string.escape_pattern(string.url_encode(word)))
ref = ref.. ' <sup class="plainlinks">(['..query..' lére])</sup>'
end
end
end
end
return ref
end
-- sort references by group
function p.sort_ref (word, ref, lang)
local li = ""
local g = "unknown"
local refID = nil
local refQ = nil
-- escape spaces in syntax ID:<some words with spaces>
-- alternative of ID:some_words_with_spaces
ref = string.gsub(ref, "%:<[^>]+>", function(w) return ":"..string.sub(string.gsub(w, ' ', '_'),3,-2) end)
-- get ID ref (E89, E212b, PiB2, R10:1842, ...)
local refPattern = "([A-Z]+[A-Za-z0-9-]+)(%:?[^%s]*)"
refID, refQ = string.match(ref, refPattern)
if refQ ~= nil and refQ ~= "" then
refQ = string.sub(refQ, 2)
else
refQ = nil -- string.match gives empty string
end
if refID ~= nil then
if references[refID] ~= nil or p.load_reference(refID) then
g = references[refID].group
if pagename == word then
-- not cat for author
if not references[refID].isauthor then
catPrefix = langObj:ucfirst(cat)
category.store(catPrefix.." ki sont dins "..refID, true, 0, true)
end
end
end
else
category.store("Årtikes avou des sourdants nén rkinoxhous dins l' modele ortografeyes")
end
-- forced group ([!d1900], ...)
local fg = string.match(ref, "%[![A-Za-z0-9]+%]")
if fg ~= nil then
fgid = string.sub(fg, 3, -2)
if groupsID[fgid] ~= nil then
g = fgid
ref = string.gsub(ref, "%[!"..fgid.."%]", '')
category.store("Årtikes avou des adjinçnaedjes di troke dins l' modele ortografeyes")
end
end
-- collecte refs by word
if words[word] == nil then
words[word] = {refID}
else
table.insert(words[word], refID)
end
-- for 'not' group
if word == "-" or word == "nén dins" then
g = "not"
word = "-" -- FIXME we should delete this
end
-- check that the group does exist for the loaded language
if g == nil or groupsID[g] == nil then
g = 'unknown'
end
if g == 'unknown' and groupsID['byscript'] then
g = 'byscript'
end
if g ~= "not" and g ~= "unknown" and g ~= "all" then -- and g ~= "byscript" then
total_known = total_known + 1
end
-- group by writting script
if (g == "byscript"
or g == "tif" or g == "ara" or g == "lat") -- legacy
then
-- legacy
if (g == "tif" or g == "ara" or g == "lat") then
category.store("Årtikes avou des adjinçnaedjes di troke dins l' modele ortografeyes")
end
local s = require("Module:Scripe")
local script = s.findBestScript(word, lang)
if script ~= "None" then
g = script
else
g = 'unknown'
end
end
-- add ref in group
li = p.build_ref(ref, refID, word, refQ)
if g ~=nil then
if output_lst[g] == nil then
output_lst[g] = {}
end
if output_lst[g][word] ~= nil then
output_lst[g][word] = output_lst[g][word] .. ', '..li
else
output_lst[g][word] = li
end
end
end
-- load reference
-- we don't need all informations, only local data (not Wikidata)
-- group and wikisource or external link
function p.load_reference(refID)
local ref = sourdant.get_reference(refID, false)
if ref then
references[refID] = {}
if groupsID["all"] ~= nil then
references[refID].group = 'all'
elseif ref.group ~= nil then
references[refID].group = ref.group
else
references[refID].group = 'unknown'
end
-- aliases
if ref.aliases ~= nil then
for k, a in pairs(ref.aliases) do
references[a.id] = {}
references[a.id].alias = references[refID]
if a.group ~= nil then
references[a.id].group = a.group
else
references[a.id].group = ref.group
end
end
end
-- wikisource
if ref.wikisource then
references[refID].wikisource = ref.wikisource
-- or external
elseif ref.external then
references[refID].external = ref.external
-- or wikipedia
elseif ref.wikipedia then
references[refID].wikipedia = ref.wikipedia
end
return true
end
return false
end
-- load groups
function p.load_groups()
local grps = base.load_data("Ortografeyes/groups-"..lang)
if grps then
-- because base.load_data() return a read-only table
for k, g in ipairs(grps) do
-- we need to make a copy in 'groups' table
table.insert(groups, k, g)
-- and another to access group easily by id
groupsID[g.id] = groups[k]
end
else
-- add groups 'all' when there is no group
table.insert(groups, {id="all", title = "Totes les ortografeyes"})
groupsID["all"] = groups[#groups]
end
-- add groups 'unknown' & 'not' (for 'word is not in this ref')
table.insert(groups, {id="unknown", title = "Ôtès ortografeyes <small>(avou des sourdants nén rkinoxhous)</small>"})
groupsID["unknown"] = groups[#groups]
table.insert(groups, {id="not", title = "Nén dins"})
groupsID["not"] = groups[#groups]
return true -- always ?
end
-- make list
function p.render(orto)
local html = ""
local ref = ""
-- for each word
for word, refs in pairs(orto) do
if word ~= "lang" and word ~= "cat" and word ~= "croejh" then -- FIXME see p.init()
word = mw.text.trim(word)
-- escape comma
refs = string.gsub(refs, "\\,", "\\x2C")
refs = mw.text.split(refs, ",", true)
for key,ref in ipairs(refs) do
-- comma
ref = string.gsub(ref, "\\x2C", ",")
ref = mw.text.trim(ref)
p.sort_ref(word, ref, lang)
end
end
end
local g = p.build_groups()
html = '<div class="g-or">'..g..'</div>'
if total_known == 0 and lang == "wa" then
local msg_part = "co"
if g~="" then msg_part = "cåzu" end
local linktxt = lingaedje.grammar({args={"Djivêye des motîs",lang,"",vowel='1'}});
local langtxt = langObj:ucfirst(lingaedje.get_name(lang))
category.store(lingaedje.grammar({args={"Mots",lang,"avou des ortografeyes a radjouter",vowel='1'}}))
msg_part = '<p class="info">I gn a '..msg_part..' rén dins ci hagnon ci, mins si vos irîz rnaxhî dins des ôtes motîs, et trover l’ mot « <b>'..pagename..'</b> », riherez ciddé li scrijha do mot ki vos î avoz trové, et l’ rahouca do motî el [[Wiccionaire:'..langtxt..'#Sourdants|'..linktxt..']].</p>'
if g~="" then
html = html..msg_part
else
html = msg_part
end
end
html = html .. category.get_all()
return html
end
-- Entry point of the module
function p.init(frame)
-- get args from parent modele
local modele = mw.getCurrentFrame():getParent()
local args = modele.args
-- set lang
if args.lang ~= nil then
lang = args.lang
end
-- set lang
if args.croejh ~= nil then
croejh = args.croejh
end
-- set category (dijhêye, spots and mots ???)
if args.cat ~= nil then
if args.cat == "spot" or args.cat == "spots" then
cat = "spots"
elseif args.cat == "dijhêye" or args.cat == "dijhêyes" then
cat = "dijhêyes"
end
end
-- set page name
pagename = mw.title.getCurrentTitle().text
p.load_groups()
return p.render(args)
end
return p