Module:Text: Difference between revisions

m
1 revision imported from wikipedia:Module:Text
(return true/false)
 
m (1 revision imported from wikipedia:Module:Text)
 
(2 intermediate revisions by 2 users not shown)
Line 1:
local Text = { serialyesNo = require("2017-11-01Module:Yesno",)
local Text = { serial = "2022-07-21",
suite = "Text" }
--[=[
Line 17 ⟶ 18:
local SeekQuote = false
 
local function initLatinData()
if not RangesLatin then
RangesLatin = { { 7, 687 },
{ 7531, 7578 },
{ 7680, 7935 },
{ 8194, 8250 } }
end
if not PatternLatin then
local range
PatternLatin = "^["
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
PatternLatin = PatternLatin ..
mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
end -- for i
PatternLatin = PatternLatin .. "]*$"
end
end
 
local function initQuoteData()
 
local function factoryQuote()
-- Create quote definitions
if not QuoteLang then
QuoteLang = { af = "bd",
QuoteLang =
{ af = "bd",
ar = "la",
be = "labd",
Line 76 ⟶ 96:
["zh-tw"] = "x300C",
["zh-cn"] = "ld" }
end
QuoteType = { bd = { { 8222, 8220 }, { 8218, 8217 } },
if not QuoteType then
QuoteType =
{ bd = { { 8222, 8220 }, { 8218, 8217 } },
bdla = { { 8222, 8220 }, { 171, 187 } },
bx = { { 8222, 8221 }, { 8218, 8217 } },
Line 89 ⟶ 112:
x300C = { { 0x300C, 0x300D },
{ 0x300E, 0x300F } } }
return rend
end -- factoryQuoteinitQuoteData()
 
 
Line 100 ⟶ 123:
-- alien -- string, with language code
-- advance -- number, with level 1 or 2
local r = apply and tostring(apply) or ""
alien = alien or "en"
advance = tonumber(advance) or 0
local suite
initQuoteData()
if not QuoteLang then
local slang = alien:match( "^(%l+)-" )
factoryQuote()
suite = QuoteLang[alien] or slang and QuoteLang[slang] or QuoteLang["en"]
end
suite = QuoteLang[ alien ]
if not suite then
local slang = alien:match( "^(%l+)-" )
if slang then
suite = QuoteLang[ slang ]
end
if not suite then
suite = QuoteLang[ "en" ]
end
end
if suite then
local quotes = QuoteType[ suite ]
Line 149 ⟶ 164:
-- accept -- true, if no error messages to be appended
-- Returns: string
local r = ""
ifapply = type( apply ) == "table" thenand apply or {}
again = math.floor(tonumber(again) or 1)
local bad = { }
if again < 1 local codes = { }then
return local s""
for k, v in pairs( apply ) do
s = type( v )
if s == "number" then
if v < 32 and v ~= 9 and v ~= 10 then
v = tostring( v )
else
v = math.floor( v )
s = false
end
elseif s ~= "string" then
v = tostring( v )
end
if s then
table.insert( bad, v )
else
table.insert( codes, v )
end
end -- for k, v
if #bad == 0 then
if #codes > 0 then
r = mw.ustring.char( unpack( codes ) )
if again then
if type( again ) == "number" then
local n = math.floor( again )
if n > 1 then
r = r:rep( n )
elseif n < 1 then
r = ""
end
else
s = "bad repetitions: " .. tostring( again )
end
end
end
else
s = "bad codepoints: " .. table.concat( bad, " " )
end
if s and not accept then
r = tostring( mw.html.create( "span" )
:addClass( "error" )
:wikitext( s ) )
end
end
returnlocal rbad or "" = { }
local codes = { }
for _, v in ipairs( apply ) do
local n = tonumber(v)
if not n or (n < 32 and n ~= 9 and n ~= 10) then
table.insert(bad, tostring(v))
else
table.insert(codes, math.floor(n))
end
end
if #bad > 0 then
if not accept then
r = tostring( mw.html.create( "span" )
:addClass( "error" )
:wikitext( "bad codepoints: " .. table.concat( bad, " " )) )
end
return r
end
if #codes > 0 then
r = mw.ustring.char( unpack( codes ) )
if again > 1 then
r = r:rep(again)
end
end
return r
end -- Text.char()
 
local function trimAndFormat(args, fmt)
 
local result = {}
if type(args) ~= 'table' then
args = {args}
end
for _, v in ipairs(args) do
v = mw.text.trim(tostring(v))
if v ~= "" then
table.insert(result,fmt and mw.ustring.format(fmt, v) or v)
end
end
return result
end
 
Text.concatParams = function ( args, apply, adapt )
Line 210 ⟶ 219:
-- Returns: string
local collect = { }
return table.concat(trimAndFormat(args,adapt), apply or "|")
args = type(args) == 'table' and args or {} -- ensure args is table
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return table.concat( collect, apply or "|" )
end -- Text.concatParams()
 
 
 
Text.containsCJK = function ( analyses )
-- Is any CJK code within?
-- Parameter:
-- analyses -- string
-- Returns: true, if CJK detected
analyses = analyses and tostring(s) or ""
if not patternCJK then
patternCJK = mw.ustring.char( 91,
Line 245 ⟶ 242:
93 )
end
ifreturn mw.ustring.find( analyses, patternCJK ) then~= nil
return true
end
return false
end -- Text.containsCJK()
 
Line 258 ⟶ 252:
-- suffix = ending delimiter
-- Returns: stripped string
s = s and tostring(s) or ""
prefix = prefix and tostring(prefix) or ""
suffix = suffix and tostring(suffix) or ""
local prefixLen = mw.ustring.len(prefix)
local suffixLen = mw.ustring.len(suffix)
if prefixLen == 0 or suffixLen == 0 then
return s
end
local i = s:find(prefix, 1, true)
local r = s
Line 288:
end -- Text.getPlain()
 
Text.isLatinRange = function (s)
 
 
Text.isLatinRange = function ( adjust )
-- Are characters expected to be latin or symbols within latin texts?
-- PreconditionArguments:
-- s adjust --= string, or nil forto initializationanalyze
-- Returns: true, if valid for latin only
s = s and tostring(s) or "" --- ensure input is always string
local r
initLatinData()
if not RangesLatin then
return mw.ustring.match(s, PatternLatin) ~= nil
RangesLatin = { { 7, 687 },
{ 7531, 7578 },
{ 7680, 7935 },
{ 8194, 8250 } }
end
if not PatternLatin then
local range
PatternLatin = "^["
for i = 1, #RangesLatin do
range = RangesLatin[ i ]
PatternLatin = PatternLatin ..
mw.ustring.char( range[ 1 ], 45, range[ 2 ] )
end -- for i
PatternLatin = PatternLatin .. "]*$"
end
if adjust then
if mw.ustring.match( adjust, PatternLatin ) then
r = true
else
r = false
end
end
return r
end -- Text.isLatinRange()
 
 
 
Text.isQuote = function ( asks )
-- Is this character any quotation mark?
-- Parameter:
-- asks = --single string,character withto single characteranalyze
-- Returns: true, if asks is quotation mark
s = s and tostring(s) or ""
local r
if s == "" then
return false
end
if not SeekQuote then
SeekQuote = mw.ustring.char( 34, -- "
Line 348 ⟶ 327:
0x300F ) -- CJK
end
return mw.ustring.find( SeekQuote, s, 1, true ) ~= nil
if ask == "" then
r = false
elseif mw.ustring.find( SeekQuote, ask, 1, true ) then
r = true
else
r = false
end
return r
end -- Text.isQuote()
 
Line 366 ⟶ 338:
-- adapt -- string (optional); format including "%s"
-- Returns: string
return mw.text.listToText(trimAndFormat(args, adapt))
local collect = { }
for k, v in pairs( args ) do
if type( k ) == "number" then
v = mw.text.trim( v )
if v ~= "" then
if adapt then
v = mw.ustring.format( adapt, v )
end
table.insert( collect, v )
end
end
end -- for k, v
return mw.text.listToText( collect )
end -- Text.listToText()
 
Line 390 ⟶ 350:
-- advance -- number, with level 1 or 2, or nil
-- Returns: quoted string
apply = apply and tostring(apply) or ""
local mode, slang
if type( alien ) == "string" then
Line 417 ⟶ 378:
-- advance -- number, with level 1 or 2, or nil
-- Returns: string; possibly quoted
local r = mw.text.trim( apply and tostring(apply) or "" )
local s = mw.ustring.sub( r, 1, 1 )
if s ~= "" and not Text.isQuote( s, advance ) then
Line 445 ⟶ 406:
93 )
end
decomposed = mw.ustring.toNFD( adjust and tostring(adjust) or "" )
cleanup = mw.ustring.gsub( decomposed, PatternCombined, "" )
return mw.ustring.toNFC( cleanup )
Line 477 ⟶ 438:
 
 
Text.ucfirstAll = function ( adjust )
-- Capitalize all words
-- PreconditionArguments:
-- adjust --= string to adjust
-- Returns: string with all first letters in upper case
local radjust = "adjust " ..and tostring(adjust) or ""
local r = mw.text.decode(adjust,true)
local i = 1
local c, j, m
ifm = adjust:find(r "&"~= adjust) then
r = r:gsub( "&amp;", "&#38;" )..r
:gsub( "&lt;", "&#60;" )
:gsub( "&gt;", "&#62;" )
:gsub( "&nbsp;", "&#160;" )
:gsub( "&thinsp;", "&#8201;" )
:gsub( "&zwnj;", "&#8204;" )
:gsub( "&zwj;", "&#8205;" )
:gsub( "&lrm;", "&#8206;" )
:gsub( "&rlm;", "&#8207;" )
m = true
end
while i do
i = mw.ustring.find( r, "%W%l", i )
Line 511 ⟶ 463:
r = r:sub( 2 )
if m then
r = mw.text.encode(r)
r = r:gsub( "&#38;", "&amp;" )
:gsub( "&#60;", "&lt;" )
:gsub( "&#62;", "&gt;" )
:gsub( "&#160;", "&nbsp;" )
:gsub( "&#8201;", "&thinsp;" )
:gsub( "&#8204;", "&zwnj;" )
:gsub( "&#8205;", "&zwj;" )
:gsub( "&#8206;", "&lrm;" )
:gsub( "&#8207;", "&rlm;" )
:gsub( "&#X(%x+);", "&#x%1;" )
end
return r
end -- Text.ucfirstAll()
 
 
 
Line 534 ⟶ 476:
-- Returns: string with non-latin parts enclosed in <span>
local r
Text.isLatinRangeinitLatinData()
if mw.ustring.match( adjust, PatternLatin ) then
-- latin only, horizontal dashes, quotes
Line 622 ⟶ 564:
return r
end -- Text.uprightNonlatin()
 
 
 
Line 628 ⟶ 569:
local r
if about == "quote" then
factoryQuoteinitQuoteData()
r = { }
r.QuoteLang = QuoteLang
Line 640 ⟶ 581:
-- Export
local p = { }
 
for _, func in ipairs({'containsCJK','isLatinRange','isQuote','sentenceTerminated'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" ) and "1" or ""
end
end
 
for _, func in ipairs({'getPlain','removeDiacritics','ucfirstAll','uprightNonlatin'}) do
p[func] = function (frame)
return Text[func]( frame.args[ 1 ] or "" )
end
end
 
function p.char( frame )
Line 650 ⟶ 603:
end
if story then
local items = mw.text.split( mw.text.trim(story), "%s+" )
if #items > 0 then
local j
lenient = ( yesNo(params.errors) == "0" false)
codes = { }
multiple = tonumber( params[ "*" ] )
for k_, v in pairsipairs( items ) do
j = if tonumber((v:sub( 1, 1 ) == "x" thenand "0" or "") .. v)
table.insert( codes, j = tonumber( "0" ..or v )
end elseif v == "" then
v = false
else
j = tonumber( v )
end
if v then
table.insert( codes, j or v )
end
end -- for k, v
end
end
Line 689 ⟶ 634:
frame.args.format )
end
 
function p.containsCJK( frame )
return Text.containsCJK( frame.args[ 1 ] or "" ) and "1" or ""
end
 
function p.getPlain( frame )
return Text.getPlain( frame.args[ 1 ] or "" )
end
 
function p.isLatinRange( frame )
return Text.isLatinRange( frame.args[ 1 ] or "" ) and "1" or ""
end
 
function p.isQuote( frame )
return Text.isQuote( frame.args[ 1 ] or "" ) and "1" or ""
end
 
 
 
Line 786 ⟶ 714:
tonumber( frame.args[3] ) )
end
 
 
 
function p.removeDiacritics( frame )
return Text.removeDiacritics( frame.args[ 1 ] or "" )
end
 
function p.sentenceTerminated( frame )
return Text.sentenceTerminated( frame.args[ 1 ] or "" ) and "1" or ""
end
 
function p.ucfirstAll( frame )
return Text.ucfirstAll( frame.args[ 1 ] or "" )
end
 
function p.uprightNonlatin( frame )
return Text.uprightNonlatin( frame.args[ 1 ] or "" )
end
 
 
 
rtl-contributors
1,630

edits