Anonymous user
Module:Text: Difference between revisions
update date
m (1 revision imported from wikipedia:Module:Text) |
(update date) |
||
Line 1:
local
local Text = { serial = "2022-07-21",
suite = "Text" }
--[=[
Line 17 ⟶ 18:
local SeekQuote = false
local function initLatinData()
if not RangesLatin then
RangesLatin = { { 7, 687 },
{ 7531, 7578 },
{ 7680, 7935 },
{ 8194, 8250 } }
end
if not PatternLatin then
local range
PatternLatin = "^["
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
PatternLatin = PatternLatin ..
mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
end -- for i
PatternLatin = PatternLatin .. "]*$"
end
end
local function initQuoteData()
-- Create quote definitions
if not QuoteLang then
QuoteLang =
{ af = "bd",
ar = "la",
be = "labd",
Line 76 ⟶ 96:
["zh-tw"] = "x300C",
["zh-cn"] = "ld" }
end
if not QuoteType then
QuoteType =
{ bd = { { 8222, 8220 }, { 8218, 8217 } },
bdla = { { 8222, 8220 }, { 171, 187 } },
bx = { { 8222, 8221 }, { 8218, 8217 } },
Line 89 ⟶ 112:
x300C = { { 0x300C, 0x300D },
{ 0x300E, 0x300F } } }
end --
Line 100 ⟶ 123:
-- alien -- string, with language code
-- advance -- number, with level 1 or 2
local r = apply and tostring(apply) or ""
alien = alien or "en"
advance = tonumber(advance) or 0
local suite
initQuoteData()
local slang = alien:match( "^(%l+)-" )
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
if suite then
local quotes = QuoteType[ suite ]
Line 149 ⟶ 164:
-- accept -- true, if no error messages to be appended
-- Returns: string
local r = ""
again = math.floor(tonumber(again) or 1)
if again < 1
return
end
local codes = { }
for _, v in ipairs( apply ) do
local n = tonumber(v)
if not n or (n < 32 and n ~= 9 and n ~= 10) then
table.insert(bad, tostring(v))
else
table.insert(codes, math.floor(n))
end
end
if #bad > 0 then
if not accept then
r = tostring( mw.html.create( "span" )
:addClass( "error" )
:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
end
return r
end
if #codes > 0 then
r = mw.ustring.char( unpack( codes ) )
if again > 1 then
r = r:rep(again)
end
end
return r
end -- Text.char()
local function trimAndFormat(args, fmt)
local result = {}
if type(args) ~= 'table' then
args = {args}
end
for _, v in ipairs(args) do
v = mw.text.trim(tostring(v))
if v ~= "" then
table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
end
end
return result
end
Text.concatParams = function ( args, apply, adapt )
Line 210 ⟶ 219:
-- Returns: string
local collect = { }
return table.concat(trimAndFormat(args,adapt), apply or "|")
end -- Text.concatParams()
Text.containsCJK = function (
-- Is any CJK code within?
-- Parameter:
--
-- Returns: true, if CJK detected
if not patternCJK then
patternCJK = mw.ustring.char( 91,
Line 245 ⟶ 242:
93 )
end
end -- Text.containsCJK()
Line 258 ⟶ 252:
-- suffix = ending delimiter
-- Returns: stripped string
s = s and tostring(s) or ""
prefix = prefix and tostring(prefix) or ""
suffix = suffix and tostring(suffix) or ""
local prefixLen = mw.ustring.len(prefix)
local suffixLen = mw.ustring.len(suffix)
if prefixLen == 0 or suffixLen == 0 then
return s
end
local i = s:find(prefix, 1, true)
local r = s
Line 288:
end -- Text.getPlain()
Text.isLatinRange = function (s)
-- Are characters expected to be latin or symbols within latin texts?
--
-- s
-- Returns: true, if valid for latin only
s = s and tostring(s) or "" --- ensure input is always string
initLatinData()
return mw.ustring.match(s, PatternLatin) ~= nil
end -- Text.isLatinRange()
Text.isQuote = function (
-- Is this character any quotation mark?
-- Parameter:
--
-- Returns: true, if
s = s and tostring(s) or ""
if s == "" then
return false
end
if not SeekQuote then
SeekQuote = mw.ustring.char( 34, -- "
Line 348 ⟶ 327:
0x300F ) -- CJK
end
return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
end -- Text.isQuote()
Line 366 ⟶ 338:
-- adapt -- string (optional); format including "%s"
-- Returns: string
return mw.text.listToText(trimAndFormat(args, adapt))
end -- Text.listToText()
Line 390 ⟶ 350:
-- advance -- number, with level 1 or 2, or nil
-- Returns: quoted string
apply = apply and tostring(apply) or ""
local mode, slang
if type( alien ) == "string" then
Line 417 ⟶ 378:
-- advance -- number, with level 1 or 2, or nil
-- Returns: string; possibly quoted
local r = mw.text.trim( apply and tostring(apply) or "" )
local s = mw.ustring.sub( r, 1, 1 )
if s ~= "" and not Text.isQuote( s, advance ) then
Line 445 ⟶ 406:
93 )
end
decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" )
return mw.ustring.toNFC( cleanup )
Line 477 ⟶ 438:
Text.ucfirstAll = function ( adjust
-- Capitalize all words
--
-- adjust
-- Returns: string with all first letters in upper case
local r = mw.text.decode(adjust,true)
local i = 1
local c, j, m
while i do
i = mw.ustring.find( r, "%W%l", i )
Line 511 ⟶ 463:
r = r:sub( 2 )
if m then
r = mw.text.encode(r)
end
return r
end -- Text.ucfirstAll()
Line 534 ⟶ 476:
-- Returns: string with non-latin parts enclosed in <span>
local r
if mw.ustring.match( adjust, PatternLatin ) then
-- latin only, horizontal dashes, quotes
Line 622 ⟶ 564:
return r
end -- Text.uprightNonlatin()
Line 628 ⟶ 569:
local r
if about == "quote" then
r = { }
r.QuoteLang = QuoteLang
Line 640 ⟶ 581:
-- Export
local p = { }
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
end
end
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" )
end
end
function p.char( frame )
Line 650 ⟶ 603:
end
if story then
local items = mw.text.split( mw.text.trim(story), "%s+" )
if #items > 0 then
local j
lenient = (
codes = { }
multiple = tonumber( params[ "*" ] )
for
j =
table.insert(
end
end
end
Line 689 ⟶ 634:
frame.args.format )
end
Line 786 ⟶ 714:
tonumber( frame.args[3] ) )
end
|