Module: Text Warning: You are not logged in. Your IP address will be publicly visible if you make any edits. If you log in or create an account, your edits will be attributed to your username, along with other benefits.Anti-spam check. Do not fill this in! local yesNo = require("Module:Yesno") local Text = { serial = "2022-07-21", suite = "Text" } --[=[ Text utilities ]=] -- local globals local PatternCJK = false local PatternCombined = false local PatternLatin = false local PatternTerminated = false local QuoteLang = false local QuoteType = false local RangesLatin = false local SeekQuote = false local function initLatinData() if not RangesLatin then RangesLatin = { { 7, 687 }, { 7531, 7578 }, { 7680, 7935 }, { 8194, 8250 } } end if not PatternLatin then local range PatternLatin = "^[" for i = 1, #RangesLatin do range = RangesLatin[ i ] PatternLatin = PatternLatin .. mw.ustring.char( range[ 1 ], 45, range[ 2 ] ) end -- for i PatternLatin = PatternLatin .. "]*$" end end local function initQuoteData() -- Create quote definitions if not QuoteLang then QuoteLang = { af = "bd", ar = "la", be = "labd", bg = "bd", ca = "la", cs = "bd", da = "bd", de = "bd", dsb = "bd", et = "bd", el = "lald", en = "ld", es = "la", eu = "la", -- fa = "la", fi = "rd", fr = "laSPC", ga = "ld", he = "ldla", hr = "bd", hsb = "bd", hu = "bd", hy = "labd", id = "rd", is = "bd", it = "ld", ja = "x300C", ka = "bd", ko = "ld", lt = "bd", lv = "bd", nl = "ld", nn = "la", no = "la", pl = "bdla", pt = "lald", ro = "bdla", ru = "labd", sk = "bd", sl = "bd", sq = "la", sr = "bx", sv = "rd", th = "ld", tr = "ld", uk = "la", zh = "ld", ["de-ch"] = "la", ["en-gb"] = "lsld", ["en-us"] = "ld", ["fr-ch"] = "la", ["it-ch"] = "la", ["pt-br"] = "ldla", ["zh-tw"] = "x300C", ["zh-cn"] = "ld" } end if not QuoteType then QuoteType = { bd = { { 8222, 8220 }, { 8218, 8217 } }, bdla = { { 8222, 8220 }, { 171, 187 } }, bx = { { 8222, 8221 }, { 8218, 8217 } }, la = { { 171, 187 }, { 8249, 8250 } }, laSPC = { { 171, 187 }, { 8249, 8250 }, true }, labd = { { 171, 187 }, { 8222, 8220 } }, lald = { { 171, 187 }, { 8220, 8221 } }, ld = { { 8220, 8221 }, { 8216, 8217 } }, ldla = { { 8220, 8221 }, { 171, 187 } }, lsld = { { 8216, 8217 }, { 8220, 8221 } }, rd = { { 8221, 8221 }, { 8217, 8217 } }, x300C = { { 0x300C, 0x300D }, { 0x300E, 0x300F } } } end end -- initQuoteData() local function fiatQuote( apply, alien, advance ) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code -- advance -- number, with level 1 or 2 local r = apply and tostring(apply) or "" alien = alien or "en" advance = tonumber(advance) or 0 local suite initQuoteData() local slang = alien:match( "^(%l+)-" ) suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"] if suite then local quotes = QuoteType[ suite ] if quotes then local space if quotes[ 3 ] then space = " " else space = "" end quotes = quotes[ advance ] if quotes then r = mw.ustring.format( "%s%s%s%s%s", mw.ustring.char( quotes[ 1 ] ), space, apply, space, mw.ustring.char( quotes[ 2 ] ) ) end else mw.log( "fiatQuote() " .. suite ) end end return r end -- fiatQuote() Text.char = function ( apply, again, accept ) -- Create string from codepoints -- Parameter: -- apply -- table (sequence) with numerical codepoints, or nil -- again -- number of repetitions, or nil -- accept -- true, if no error messages to be appended -- Returns: string local r = "" apply = type(apply) == "table" and apply or {} again = math.floor(tonumber(again) or 1) if again < 1 then return "" end local bad = { } local codes = { } for _, v in ipairs( apply ) do local n = tonumber(v) if not n or (n < 32 and n ~= 9 and n ~= 10) then table.insert(bad, tostring(v)) else table.insert(codes, math.floor(n)) end end if #bad > 0 then if not accept then r = tostring( mw.html.create( "span" ) :addClass( "error" ) :wikitext( "bad codepoints: " .. table.concat( bad, " " )) ) end return r end if #codes > 0 then r = mw.ustring.char( unpack( codes ) ) if again > 1 then r = r:rep(again) end end return r end -- Text.char() local function trimAndFormat(args, fmt) local result = {} if type(args) ~= 'table' then args = {args} end for _, v in ipairs(args) do v = mw.text.trim(tostring(v)) if v ~= "" then table.insert(result,fmt and mw.ustring.format(fmt, v) or v) end end return result end Text.concatParams = function ( args, apply, adapt ) -- Concat list items into one string -- Parameter: -- args -- table (sequence) with numKey=string -- apply -- string (optional); separator (default: "|") -- adapt -- string (optional); format including "%s" -- Returns: string local collect = { } return table.concat(trimAndFormat(args,adapt), apply or "|") end -- Text.concatParams() Text.containsCJK = function ( s ) -- Is any CJK code within? -- Parameter: -- s -- string -- Returns: true, if CJK detected s = s and tostring(s) or "" if not patternCJK then patternCJK = mw.ustring.char( 91, 4352, 45, 4607, 11904, 45, 42191, 43072, 45, 43135, 44032, 45, 55215, 63744, 45, 64255, 65072, 45, 65103, 65381, 45, 65500, 131072, 45, 196607, 93 ) end return mw.ustring.find( s, patternCJK ) ~= nil end -- Text.containsCJK() Text.removeDelimited = function (s, prefix, suffix) -- Remove all text in s delimited by prefix and suffix (inclusive) -- Arguments: -- s = string to process -- prefix = initial delimiter -- suffix = ending delimiter -- Returns: stripped string s = s and tostring(s) or "" prefix = prefix and tostring(prefix) or "" suffix = suffix and tostring(suffix) or "" local prefixLen = mw.ustring.len(prefix) local suffixLen = mw.ustring.len(suffix) if prefixLen == 0 or suffixLen == 0 then return s end local i = s:find(prefix, 1, true) local r = s local j while i do j = r:find(suffix, i + prefixLen) if j then r = r:sub(1, i - 1)..r:sub(j+suffixLen) else r = r:sub(1, i - 1) end i = r:find(prefix, 1, true) end return r end Text.getPlain = function ( adjust ) -- Remove wikisyntax from string, except templates -- Parameter: -- adjust -- string -- Returns: string local r = Text.removeDelimited(adjust,"<!--","-->") r = r:gsub( "(</?%l[^>]*>)", "" ) :gsub( "'''", "" ) :gsub( "''", "" ) :gsub( " ", " " ) return r end -- Text.getPlain() Text.isLatinRange = function (s) -- Are characters expected to be latin or symbols within latin texts? -- Arguments: -- s = string to analyze -- Returns: true, if valid for latin only s = s and tostring(s) or "" --- ensure input is always string initLatinData() return mw.ustring.match(s, PatternLatin) ~= nil end -- Text.isLatinRange() Text.isQuote = function ( s ) -- Is this character any quotation mark? -- Parameter: -- s = single character to analyze -- Returns: true, if s is quotation mark s = s and tostring(s) or "" if s == "" then return false end if not SeekQuote then SeekQuote = mw.ustring.char( 34, -- " 39, -- ' 171, -- laquo 187, -- raquo 8216, -- lsquo 8217, -- rsquo 8218, -- sbquo 8220, -- ldquo 8221, -- rdquo 8222, -- bdquo 8249, -- lsaquo 8250, -- rsaquo 0x300C, -- CJK 0x300D, -- CJK 0x300E, -- CJK 0x300F ) -- CJK end return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil end -- Text.isQuote() Text.listToText = function ( args, adapt ) -- Format list items similar to mw.text.listToText() -- Parameter: -- args -- table (sequence) with numKey=string -- adapt -- string (optional); format including "%s" -- Returns: string return mw.text.listToText(trimAndFormat(args, adapt)) end -- Text.listToText() Text.quote = function ( apply, alien, advance ) -- Quote text -- Parameter: -- apply -- string, with text -- alien -- string, with language code, or nil -- advance -- number, with level 1 or 2, or nil -- Returns: quoted string apply = apply and tostring(apply) or "" local mode, slang if type( alien ) == "string" then slang = mw.text.trim( alien ):lower() else slang = mw.title.getCurrentTitle().pageLanguage if not slang then -- TODO FIXME: Introduction expected 2017-04 slang = mw.language.getContentLanguage():getCode() end end if advance == 2 then mode = 2 else mode = 1 end return fiatQuote( mw.text.trim( apply ), slang, mode ) end -- Text.quote() Text.quoteUnquoted = function ( apply, alien, advance ) -- Quote text, if not yet quoted and not empty -- Parameter: -- apply -- string, with text -- alien -- string, with language code, or nil -- advance -- number, with level 1 or 2, or nil -- Returns: string; possibly quoted local r = mw.text.trim( apply and tostring(apply) or "" ) local s = mw.ustring.sub( r, 1, 1 ) if s ~= "" and not Text.isQuote( s, advance ) then s = mw.ustring.sub( r, -1, 1 ) if not Text.isQuote( s ) then r = Text.quote( r, alien, advance ) end end return r end -- Text.quoteUnquoted() Text.removeDiacritics = function ( adjust ) -- Remove all diacritics -- Parameter: -- adjust -- string -- Returns: string; all latin letters should be ASCII -- or basic greek or cyrillic or symbols etc. local cleanup, decomposed if not PatternCombined then PatternCombined = mw.ustring.char( 91, 0x0300, 45, 0x036F, 0x1AB0, 45, 0x1AFF, 0x1DC0, 45, 0x1DFF, 0xFE20, 45, 0xFE2F, 93 ) end decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" ) cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" ) return mw.ustring.toNFC( cleanup ) end -- Text.removeDiacritics() Text.sentenceTerminated = function ( analyse ) -- Is string terminated by dot, question or exclamation mark? -- Quotation, link termination and so on granted -- Parameter: -- analyse -- string -- Returns: true, if sentence terminated local r if not PatternTerminated then PatternTerminated = mw.ustring.char( 91, 12290, 65281, 65294, 65311 ) .. "!%.%?β¦][\"'%]βΉβΊΒ«Β»ββββ]*$" end if mw.ustring.find( analyse, PatternTerminated ) then r = true else r = false end return r end -- Text.sentenceTerminated() Text.ucfirstAll = function ( adjust) -- Capitalize all words -- Arguments: -- adjust = string to adjust -- Returns: string with all first letters in upper case adjust = adjust and tostring(adjust) or "" local r = mw.text.decode(adjust,true) local i = 1 local c, j, m m = (r ~= adjust) r = " "..r while i do i = mw.ustring.find( r, "%W%l", i ) if i then j = i + 1 c = mw.ustring.upper( mw.ustring.sub( r, j, j ) ) r = string.format( "%s%s%s", mw.ustring.sub( r, 1, i ), c, mw.ustring.sub( r, i + 2 ) ) i = j end end -- while i r = r:sub( 2 ) if m then r = mw.text.encode(r) end return r end -- Text.ucfirstAll() Text.uprightNonlatin = function ( adjust ) -- Ensure non-italics for non-latin text parts -- One single greek letter might be granted -- Precondition: -- adjust -- string -- Returns: string with non-latin parts enclosed in <span> local r initLatinData() if mw.ustring.match( adjust, PatternLatin ) then -- latin only, horizontal dashes, quotes r = adjust else local c local j = false local k = 1 local m = false local n = mw.ustring.len( adjust ) local span = "%s%s<span dir='auto' style='font-style:normal'>%s</span>" local flat = function ( a ) -- isLatin local range for i = 1, #RangesLatin do range = RangesLatin[ i ] if a >= range[ 1 ] and a <= range[ 2 ] then return true end end -- for i end -- flat() local focus = function ( a ) -- char is not ambivalent local r = ( a > 64 ) if r then r = ( a < 8192 or a > 8212 ) else r = ( a == 38 or a == 60 ) -- '&' '<' end return r end -- focus() local form = function ( a ) return string.format( span, r, mw.ustring.sub( adjust, k, j - 1 ), mw.ustring.sub( adjust, j, a ) ) end -- form() r = "" for i = 1, n do c = mw.ustring.codepoint( adjust, i, i ) if focus( c ) then if flat( c ) then if j then if m then if i == m then -- single greek letter. j = false end m = false end if j then local nx = i - 1 local s = "" for ix = nx, 1, -1 do c = mw.ustring.sub( adjust, ix, ix ) if c == " " or c == "(" then nx = nx - 1 s = c .. s else break -- for ix end end -- for ix r = form( nx ) .. s j = false k = i end end elseif not j then j = i if c >= 880 and c <= 1023 then -- single greek letter? m = i + 1 else m = false end end elseif m then m = m + 1 end end -- for i if j and ( not m or m < n ) then r = form( n ) else r = r .. mw.ustring.sub( adjust, k ) end end return r end -- Text.uprightNonlatin() Text.test = function ( about ) local r if about == "quote" then initQuoteData() r = { } r.QuoteLang = QuoteLang r.QuoteType = QuoteType end return r end -- Text.test() -- Export local p = { } for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do p[func] = function (frame) return Text[func]( frame.args[ 1 ] or "" ) and "1" or "" end end for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do p[func] = function (frame) return Text[func]( frame.args[ 1 ] or "" ) end end function p.char( frame ) local params = frame:getParent().args local story = params[ 1 ] local codes, lenient, multiple if not story then params = frame.args story = params[ 1 ] end if story then local items = mw.text.split( mw.text.trim(story), "%s+" ) if #items > 0 then local j lenient = (yesNo(params.errors) == false) codes = { } multiple = tonumber( params[ "*" ] ) for _, v in ipairs( items ) do j = tonumber((v:sub( 1, 1 ) == "x" and "0" or "") .. v) table.insert( codes, j or v ) end end end return Text.char( codes, multiple, lenient ) end function p.concatParams( frame ) local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent().args else args = frame.args end return Text.concatParams( args, frame.args.separator, frame.args.format ) end function p.listToFormat(frame) local lists = {} local pformat = frame.args["format"] local sep = frame.args["sep"] or ";" -- Parameter parsen: Listen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v end end -- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], sep) if #lists[i] > maxListLen then maxListLen = #lists[i] end end -- Ergebnisstring generieren local result = "" local result_line = "" for i = 1, maxListLen do result_line = pformat for j = 1, #lists do result_line = mw.ustring.gsub(result_line, "%%s", lists[j][i], 1) end result = result .. result_line end return result end function p.listToText( frame ) local args local template = frame.args.template if type( template ) == "string" then template = mw.text.trim( template ) template = ( template == "1" ) end if template then args = frame:getParent().args else args = frame.args end return Text.listToText( args, frame.args.format ) end function p.quote( frame ) local slang = frame.args[2] if type( slang ) == "string" then slang = mw.text.trim( slang ) if slang == "" then slang = false end end return Text.quote( frame.args[ 1 ] or "", slang, tonumber( frame.args[3] ) ) end function p.quoteUnquoted( frame ) local slang = frame.args[2] if type( slang ) == "string" then slang = mw.text.trim( slang ) if slang == "" then slang = false end end return Text.quoteUnquoted( frame.args[ 1 ] or "", slang, tonumber( frame.args[3] ) ) end function p.zip(frame) local lists = {} local seps = {} local defaultsep = frame.args["sep"] or "" local innersep = frame.args["isep"] or "" local outersep = frame.args["osep"] or "" -- Parameter parsen for k, v in pairs(frame.args) do local knum = tonumber(k) if knum then lists[knum] = v else if string.sub(k, 1, 3) == "sep" then local sepnum = tonumber(string.sub(k, 4)) if sepnum then seps[sepnum] = v end end end end -- sofern keine expliziten Separatoren angegeben sind, den Standardseparator verwenden for i = 1, math.max(#seps, #lists) do if not seps[i] then seps[i] = defaultsep end end -- Listen splitten local maxListLen = 0 for i = 1, #lists do lists[i] = mw.text.split(lists[i], seps[i]) if #lists[i] > maxListLen then maxListLen = #lists[i] end end local result = "" for i = 1, maxListLen do if i ~= 1 then result = result .. outersep end for j = 1, #lists do if j ~= 1 then result = result .. innersep end result = result .. (lists[j][i] or "") end end return result end function p.failsafe() return Text.serial end p.Text = function () return Text end -- p.Text return p Summary: Please note that all contributions to Christianpedia may be edited, altered, or removed by other contributors. If you do not want your writing to be edited mercilessly, then do not submit it here. You are also promising us that you wrote this yourself, or copied it from a public domain or similar free resource (see Christianpedia:Copyrights for details). Do not submit copyrighted work without permission! Cancel Editing help (opens in new window) Template used on this page: Module:Text/doc (view source) Discuss this page